diff mbox series

[1/3] KVM: arm64: add a hypercall for disowning pages

Message ID 20220623021926.3443240-2-pcc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: support MTE in protected VMs | expand

Commit Message

Peter Collingbourne June 23, 2022, 2:19 a.m. UTC
Currently we only deny the host access to hyp and guest pages. However,
there may be other pages that could potentially be used to indirectly
compromise the hypervisor or the other guests. Therefore introduce a
__pkvm_disown_pages hypercall that the host kernel may use to deny its
future self access to those pages before deprivileging itself.

Signed-off-by: Peter Collingbourne <pcc@google.com>
---
 arch/arm64/include/asm/kvm_asm.h              |  1 +
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  1 +
 arch/arm64/kvm/hyp/include/nvhe/pkvm.h        |  1 +
 arch/arm64/kvm/hyp/nvhe/hyp-main.c            |  9 +++++++++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 11 +++++++++++
 arch/arm64/kvm/hyp/pgtable.c                  |  5 +++--
 6 files changed, 26 insertions(+), 2 deletions(-)

Comments

Quentin Perret June 23, 2022, 1:11 p.m. UTC | #1
Hi Peter,

On Wednesday 22 Jun 2022 at 19:19:24 (-0700), Peter Collingbourne wrote:
> @@ -677,9 +678,9 @@ static bool stage2_pte_is_counted(kvm_pte_t pte)
>  	/*
>  	 * The refcount tracks valid entries as well as invalid entries if they
>  	 * encode ownership of a page to another entity than the page-table
> -	 * owner, whose id is 0.
> +	 * owner, whose id is 0, or NOBODY, which does not correspond to a page-table.
>  	 */
> -	return !!pte;
> +	return !!pte && pte != kvm_init_invalid_leaf_owner(PKVM_ID_NOBODY);
>  }

I'm not sure to understand this part? By not refcounting the PTEs that
are annotated with PKVM_ID_NOBODY, the page-table page that contains
them may be freed at some point. And when that happens, I don't see how
the hypervisor will remember to block host accesses to the disowned
pages.

Cheers,
Quentin
Peter Collingbourne June 23, 2022, 6:12 p.m. UTC | #2
On Thu, Jun 23, 2022 at 6:12 AM Quentin Perret <qperret@google.com> wrote:
>
> Hi Peter,
>
> On Wednesday 22 Jun 2022 at 19:19:24 (-0700), Peter Collingbourne wrote:
> > @@ -677,9 +678,9 @@ static bool stage2_pte_is_counted(kvm_pte_t pte)
> >       /*
> >        * The refcount tracks valid entries as well as invalid entries if they
> >        * encode ownership of a page to another entity than the page-table
> > -      * owner, whose id is 0.
> > +      * owner, whose id is 0, or NOBODY, which does not correspond to a page-table.
> >        */
> > -     return !!pte;
> > +     return !!pte && pte != kvm_init_invalid_leaf_owner(PKVM_ID_NOBODY);
> >  }
>
> I'm not sure to understand this part? By not refcounting the PTEs that
> are annotated with PKVM_ID_NOBODY, the page-table page that contains
> them may be freed at some point. And when that happens, I don't see how
> the hypervisor will remember to block host accesses to the disowned
> pages.

This was because I misunderstood the code and thought that this was
for maintaining a count from PTEs to the pages that they reference
(which would make the refcounting unnecessary for pages owned by
nobody). Reading the code more carefully my understanding is now that
the refcounts are instead for tracking the number of non-zero PTEs in
the page table, so that the hypervisor knows that it can free up a
page table when it becomes all zeros i.e. no longer contains any
information that needs to be preserved (so the "references" that are
being counted are implicit in the location of the PTE). So it doesn't
make sense to not track PTEs owned by nobody here. I'll remove this
part in v2.

Peter
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 411cfbe3ebbd..1a177d9ed517 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -63,6 +63,7 @@  enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
 	__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
 	__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
+	__KVM_HOST_SMCCC_FUNC___pkvm_disown_pages,
 	__KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
 
 	/* Hypercalls available after pKVM finalisation */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index e0bbb1726fa3..e88a9dab9cd5 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -58,6 +58,7 @@  enum pkvm_component_id {
 	PKVM_ID_HOST,
 	PKVM_ID_HYP,
 	PKVM_ID_GUEST,
+	PKVM_ID_NOBODY,
 };
 
 extern unsigned long hyp_nr_cpus;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index c1987115b217..fbd991a46ab3 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -98,6 +98,7 @@  int __pkvm_init_shadow(struct kvm *kvm,
 		       unsigned long pgd_hva,
 		       unsigned long last_ran_hva, size_t last_ran_size);
 int __pkvm_teardown_shadow(unsigned int shadow_handle);
+int __pkvm_disown_pages(phys_addr_t phys, size_t size);
 
 struct kvm_shadow_vcpu_state *
 pkvm_load_shadow_vcpu_state(unsigned int shadow_handle, unsigned int vcpu_idx);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index ddb36d172b60..b81908ef13e2 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -1055,6 +1055,14 @@  static void handle___pkvm_teardown_shadow(struct kvm_cpu_context *host_ctxt)
 	cpu_reg(host_ctxt, 1) = __pkvm_teardown_shadow(shadow_handle);
 }
 
+static void handle___pkvm_disown_pages(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
+	DECLARE_REG(size_t, size, host_ctxt, 2);
+
+	cpu_reg(host_ctxt, 1) = __pkvm_disown_pages(phys, size);
+}
+
 typedef void (*hcall_t)(struct kvm_cpu_context *);
 
 #define HANDLE_FUNC(x)	[__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -1072,6 +1080,7 @@  static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid),
 	HANDLE_FUNC(__kvm_flush_cpu_context),
+	HANDLE_FUNC(__pkvm_disown_pages),
 	HANDLE_FUNC(__pkvm_prot_finalize),
 
 	HANDLE_FUNC(__pkvm_host_share_hyp),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index d839bb573b49..b3a2ad8454cc 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -1756,3 +1756,14 @@  int __pkvm_host_reclaim_page(u64 pfn)
 
 	return ret;
 }
+
+int __pkvm_disown_pages(phys_addr_t phys, size_t size)
+{
+	int ret;
+
+	host_lock_component();
+	ret = host_stage2_set_owner_locked(phys, size, PKVM_ID_NOBODY);
+	host_unlock_component();
+
+	return ret;
+}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 756bbb15c1f3..e1ecddd43885 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -10,6 +10,7 @@ 
 #include <linux/bitfield.h>
 #include <asm/kvm_pgtable.h>
 #include <asm/stage2_pgtable.h>
+#include <nvhe/mem_protect.h>
 
 
 #define KVM_PTE_TYPE			BIT(1)
@@ -677,9 +678,9 @@  static bool stage2_pte_is_counted(kvm_pte_t pte)
 	/*
 	 * The refcount tracks valid entries as well as invalid entries if they
 	 * encode ownership of a page to another entity than the page-table
-	 * owner, whose id is 0.
+	 * owner, whose id is 0, or NOBODY, which does not correspond to a page-table.
 	 */
-	return !!pte;
+	return !!pte && pte != kvm_init_invalid_leaf_owner(PKVM_ID_NOBODY);
 }
 
 static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,