diff mbox series

[03/16] KVM: x86/tdp_mmu: Add a helper function to walk down the TDP MMU

Message ID 20240515005952.3410568-4-rick.p.edgecombe@intel.com (mailing list archive)
State New, archived
Headers show
Series TDX MMU prep series part 1 | expand

Commit Message

Edgecombe, Rick P May 15, 2024, 12:59 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Export a function to walk down the TDP without modifying it.

Future changes will support pre-populating TDX private memory. In order to
implement this KVM will need to check if a given GFN is already
pre-populated in the mirrored EPT, and verify the populated private memory
PFN matches the current one.[1]

There is already a TDP MMU walker, kvm_tdp_mmu_get_walk() for use within
the KVM MMU that almost does what is required. However, to make sense of
the results, MMU internal PTE helpers are needed. Refactor the code to
provide a helper that can be used outside of the KVM MMU code.

Refactoring the KVM page fault handler to support this lookup usage was
also considered, but it was an awkward fit.

Link: https://lore.kernel.org/kvm/ZfBkle1eZFfjPI8l@google.com/ [1]
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
This helper will be used in the future change that implements
KVM_TDX_INIT_MEM_REGION. Please refer to the following commit for the
usage:
https://github.com/intel/tdx/commit/2832c6d87a4e6a46828b193173550e80b31240d4

TDX MMU Part 1:
 - New patch
---
 arch/x86/kvm/mmu.h         |  3 +++
 arch/x86/kvm/mmu/tdp_mmu.c | 37 +++++++++++++++++++++++++++++++++----
 2 files changed, 36 insertions(+), 4 deletions(-)

Comments

Chao Gao May 17, 2024, 7:44 a.m. UTC | #1
On Tue, May 14, 2024 at 05:59:39PM -0700, Rick Edgecombe wrote:
>From: Isaku Yamahata <isaku.yamahata@intel.com>
>
>Export a function to walk down the TDP without modifying it.
>
>Future changes will support pre-populating TDX private memory. In order to
>implement this KVM will need to check if a given GFN is already
>pre-populated in the mirrored EPT, and verify the populated private memory
>PFN matches the current one.[1]
>
>There is already a TDP MMU walker, kvm_tdp_mmu_get_walk() for use within
>the KVM MMU that almost does what is required. However, to make sense of
>the results, MMU internal PTE helpers are needed. Refactor the code to
>provide a helper that can be used outside of the KVM MMU code.
>
>Refactoring the KVM page fault handler to support this lookup usage was
>also considered, but it was an awkward fit.
>
>Link: https://lore.kernel.org/kvm/ZfBkle1eZFfjPI8l@google.com/ [1]
>Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>---
>This helper will be used in the future change that implements
>KVM_TDX_INIT_MEM_REGION. Please refer to the following commit for the
>usage:
>https://github.com/intel/tdx/commit/2832c6d87a4e6a46828b193173550e80b31240d4
>
>TDX MMU Part 1:
> - New patch
>---
> arch/x86/kvm/mmu.h         |  3 +++
> arch/x86/kvm/mmu/tdp_mmu.c | 37 +++++++++++++++++++++++++++++++++----
> 2 files changed, 36 insertions(+), 4 deletions(-)
>
>diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
>index dc80e72e4848..3c7a88400cbb 100644
>--- a/arch/x86/kvm/mmu.h
>+++ b/arch/x86/kvm/mmu.h
>@@ -275,6 +275,9 @@ extern bool tdp_mmu_enabled;
> #define tdp_mmu_enabled false
> #endif
> 
>+int kvm_tdp_mmu_get_walk_private_pfn(struct kvm_vcpu *vcpu, u64 gpa,
>+				     kvm_pfn_t *pfn);
>+
> static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
> {
> 	return !tdp_mmu_enabled || kvm_shadow_root_allocated(kvm);
>diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
>index 1259dd63defc..1086e3b2aa5c 100644
>--- a/arch/x86/kvm/mmu/tdp_mmu.c
>+++ b/arch/x86/kvm/mmu/tdp_mmu.c
>@@ -1772,16 +1772,14 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
>  *
>  * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
>  */
>-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
>-			 int *root_level)
>+static int __kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
>+				  bool is_private)

is_private isn't used.

> {
> 	struct tdp_iter iter;
> 	struct kvm_mmu *mmu = vcpu->arch.mmu;
> 	gfn_t gfn = addr >> PAGE_SHIFT;
> 	int leaf = -1;
> 
>-	*root_level = vcpu->arch.mmu->root_role.level;
>-
> 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
> 		leaf = iter.level;
> 		sptes[leaf] = iter.old_spte;
>@@ -1790,6 +1788,37 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> 	return leaf;
> }
> 
>+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
>+			 int *root_level)
>+{
>+	*root_level = vcpu->arch.mmu->root_role.level;
>+
>+	return __kvm_tdp_mmu_get_walk(vcpu, addr, sptes, false);
>+}
>+
>+int kvm_tdp_mmu_get_walk_private_pfn(struct kvm_vcpu *vcpu, u64 gpa,
>+				     kvm_pfn_t *pfn)

private_pfn probably is a misnomer. shared/private is an attribute of
GPA rather than pfn. Since the function is to get pfn from gpa, how about
kvm_tdp_mmu_gpa_to_pfn()?

And the function is limited to handle private gpa only. It is an artificial
limitation we can get rid of easily. e.g., by making the function take
"is_private" boolean and relay it to __kvm_tdp_mmu_get_walk(). I know TDX
just calls the function to convert private gpa but having a generic API
can accommodate future use cases (e.g., get hpa from shared gpa) w/o the
need of refactoring.

>+{
>+	u64 sptes[PT64_ROOT_MAX_LEVEL + 1], spte;
>+	int leaf;
>+
>+	lockdep_assert_held(&vcpu->kvm->mmu_lock);
>+
>+	rcu_read_lock();
>+	leaf = __kvm_tdp_mmu_get_walk(vcpu, gpa, sptes, true);
>+	rcu_read_unlock();
>+	if (leaf < 0)
>+		return -ENOENT;
>+
>+	spte = sptes[leaf];
>+	if (!(is_shadow_present_pte(spte) && is_last_spte(spte, leaf)))
>+		return -ENOENT;
>+
>+	*pfn = spte_to_pfn(spte);
>+	return leaf;
>+}
>+EXPORT_SYMBOL_GPL(kvm_tdp_mmu_get_walk_private_pfn);
>+
> /*
>  * Returns the last level spte pointer of the shadow page walk for the given
>  * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
>-- 
>2.34.1
>
>
Isaku Yamahata May 17, 2024, 9:08 a.m. UTC | #2
On Fri, May 17, 2024 at 03:44:27PM +0800,
Chao Gao <chao.gao@intel.com> wrote:

> On Tue, May 14, 2024 at 05:59:39PM -0700, Rick Edgecombe wrote:
> >From: Isaku Yamahata <isaku.yamahata@intel.com>
> >
> >Export a function to walk down the TDP without modifying it.
> >
> >Future changes will support pre-populating TDX private memory. In order to
> >implement this KVM will need to check if a given GFN is already
> >pre-populated in the mirrored EPT, and verify the populated private memory
> >PFN matches the current one.[1]
> >
> >There is already a TDP MMU walker, kvm_tdp_mmu_get_walk() for use within
> >the KVM MMU that almost does what is required. However, to make sense of
> >the results, MMU internal PTE helpers are needed. Refactor the code to
> >provide a helper that can be used outside of the KVM MMU code.
> >
> >Refactoring the KVM page fault handler to support this lookup usage was
> >also considered, but it was an awkward fit.
> >
> >Link: https://lore.kernel.org/kvm/ZfBkle1eZFfjPI8l@google.com/ [1]
> >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> >Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> >---
> >This helper will be used in the future change that implements
> >KVM_TDX_INIT_MEM_REGION. Please refer to the following commit for the
> >usage:
> >https://github.com/intel/tdx/commit/2832c6d87a4e6a46828b193173550e80b31240d4
> >
> >TDX MMU Part 1:
> > - New patch
> >---
> > arch/x86/kvm/mmu.h         |  3 +++
> > arch/x86/kvm/mmu/tdp_mmu.c | 37 +++++++++++++++++++++++++++++++++----
> > 2 files changed, 36 insertions(+), 4 deletions(-)
> >
> >diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> >index dc80e72e4848..3c7a88400cbb 100644
> >--- a/arch/x86/kvm/mmu.h
> >+++ b/arch/x86/kvm/mmu.h
> >@@ -275,6 +275,9 @@ extern bool tdp_mmu_enabled;
> > #define tdp_mmu_enabled false
> > #endif
> > 
> >+int kvm_tdp_mmu_get_walk_private_pfn(struct kvm_vcpu *vcpu, u64 gpa,
> >+				     kvm_pfn_t *pfn);
> >+
> > static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
> > {
> > 	return !tdp_mmu_enabled || kvm_shadow_root_allocated(kvm);
> >diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> >index 1259dd63defc..1086e3b2aa5c 100644
> >--- a/arch/x86/kvm/mmu/tdp_mmu.c
> >+++ b/arch/x86/kvm/mmu/tdp_mmu.c
> >@@ -1772,16 +1772,14 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
> >  *
> >  * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
> >  */
> >-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> >-			 int *root_level)
> >+static int __kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> >+				  bool is_private)
> 
> is_private isn't used.
> 
> > {
> > 	struct tdp_iter iter;
> > 	struct kvm_mmu *mmu = vcpu->arch.mmu;
> > 	gfn_t gfn = addr >> PAGE_SHIFT;
> > 	int leaf = -1;
> > 
> >-	*root_level = vcpu->arch.mmu->root_role.level;
> >-
> > 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
> > 		leaf = iter.level;
> > 		sptes[leaf] = iter.old_spte;
> >@@ -1790,6 +1788,37 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> > 	return leaf;
> > }
> > 
> >+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> >+			 int *root_level)
> >+{
> >+	*root_level = vcpu->arch.mmu->root_role.level;
> >+
> >+	return __kvm_tdp_mmu_get_walk(vcpu, addr, sptes, false);
> >+}
> >+
> >+int kvm_tdp_mmu_get_walk_private_pfn(struct kvm_vcpu *vcpu, u64 gpa,
> >+				     kvm_pfn_t *pfn)
> 
> private_pfn probably is a misnomer. shared/private is an attribute of
> GPA rather than pfn. Since the function is to get pfn from gpa, how about
> kvm_tdp_mmu_gpa_to_pfn()?
> 
> And the function is limited to handle private gpa only. It is an artificial
> limitation we can get rid of easily. e.g., by making the function take
> "is_private" boolean and relay it to __kvm_tdp_mmu_get_walk(). I know TDX
> just calls the function to convert private gpa but having a generic API
> can accommodate future use cases (e.g., get hpa from shared gpa) w/o the
> need of refactoring.

Agreed.  Based on a patch at [1], we can have something like
int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 gpa,
                         enum kvm_tdp_mmu_root_types root_type,
                         kvm_pfn_t *pfn);


[1] https://lore.kernel.org/kvm/55c24448fdf42d383d45601ff6c0b07f44f61787.camel@intel.com/
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index dc80e72e4848..3c7a88400cbb 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -275,6 +275,9 @@  extern bool tdp_mmu_enabled;
 #define tdp_mmu_enabled false
 #endif
 
+int kvm_tdp_mmu_get_walk_private_pfn(struct kvm_vcpu *vcpu, u64 gpa,
+				     kvm_pfn_t *pfn);
+
 static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
 {
 	return !tdp_mmu_enabled || kvm_shadow_root_allocated(kvm);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 1259dd63defc..1086e3b2aa5c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1772,16 +1772,14 @@  bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
  *
  * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
  */
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
-			 int *root_level)
+static int __kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+				  bool is_private)
 {
 	struct tdp_iter iter;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	gfn_t gfn = addr >> PAGE_SHIFT;
 	int leaf = -1;
 
-	*root_level = vcpu->arch.mmu->root_role.level;
-
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
 		sptes[leaf] = iter.old_spte;
@@ -1790,6 +1788,37 @@  int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 	return leaf;
 }
 
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+			 int *root_level)
+{
+	*root_level = vcpu->arch.mmu->root_role.level;
+
+	return __kvm_tdp_mmu_get_walk(vcpu, addr, sptes, false);
+}
+
+int kvm_tdp_mmu_get_walk_private_pfn(struct kvm_vcpu *vcpu, u64 gpa,
+				     kvm_pfn_t *pfn)
+{
+	u64 sptes[PT64_ROOT_MAX_LEVEL + 1], spte;
+	int leaf;
+
+	lockdep_assert_held(&vcpu->kvm->mmu_lock);
+
+	rcu_read_lock();
+	leaf = __kvm_tdp_mmu_get_walk(vcpu, gpa, sptes, true);
+	rcu_read_unlock();
+	if (leaf < 0)
+		return -ENOENT;
+
+	spte = sptes[leaf];
+	if (!(is_shadow_present_pte(spte) && is_last_spte(spte, leaf)))
+		return -ENOENT;
+
+	*pfn = spte_to_pfn(spte);
+	return leaf;
+}
+EXPORT_SYMBOL_GPL(kvm_tdp_mmu_get_walk_private_pfn);
+
 /*
  * Returns the last level spte pointer of the shadow page walk for the given
  * gpa, and sets *spte to the spte value. This spte may be non-preset. If no