diff mbox series

[v2,3/6] KVM: x86/mmu: Make walk_shadow_page_lockless_{begin,end} interoperate with the TDP MMU

Message ID 20210630214802.1902448-4-dmatlack@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Fast page fault support for the TDP MMU | expand

Commit Message

David Matlack June 30, 2021, 9:47 p.m. UTC
Acquire the RCU read lock in walk_shadow_page_lockless_begin and release
it in walk_shadow_page_lockless_end when the TDP MMU is enabled.  This
should not introduce any functional changes but is used in the following
commit to make fast_page_fault interoperate with the TDP MMU.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/mmu.c     | 24 ++++++++++++++++++------
 arch/x86/kvm/mmu/tdp_mmu.c | 20 ++++++++++++++------
 arch/x86/kvm/mmu/tdp_mmu.h |  6 ++++--
 3 files changed, 36 insertions(+), 14 deletions(-)

Comments

Ben Gardon July 12, 2021, 5:02 p.m. UTC | #1
On Wed, Jun 30, 2021 at 2:48 PM David Matlack <dmatlack@google.com> wrote:
>
> Acquire the RCU read lock in walk_shadow_page_lockless_begin and release
> it in walk_shadow_page_lockless_end when the TDP MMU is enabled.  This
> should not introduce any functional changes but is used in the following
> commit to make fast_page_fault interoperate with the TDP MMU.
>
> Signed-off-by: David Matlack <dmatlack@google.com>

Reviewed-by: Ben Gardon <bgardon@google.com>

This as I understand this, we're just lifting the rcu_lock/unlock out
of kvm_tdp_mmu_get_walk, and then putting all the TDP MMU specific
stuff down a level under walk_shadow_page_lockless_begin/end and
get_walk.

Instead of moving kvm_tdp_mmu_get_walk_lockless into get_walk, it
could also be open-coded as:

walk_shadow_page_lockless_begin
 if (is_tdp_mmu(vcpu->arch.mmu))
               leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
 else
               leaf = get_walk(vcpu, addr, sptes, &root);
walk_shadow_page_lockless_end

in get_mmio_spte, since get_walk isn't used anywhere else. Then
walk_shadow_page_lockless_begin/end could also be moved up out of
get_walk instead of having to add a goto to that function.
I don't have a strong preference either way, but the above feels like
a slightly simpler refactor.


> ---
>  arch/x86/kvm/mmu/mmu.c     | 24 ++++++++++++++++++------
>  arch/x86/kvm/mmu/tdp_mmu.c | 20 ++++++++++++++------
>  arch/x86/kvm/mmu/tdp_mmu.h |  6 ++++--
>  3 files changed, 36 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 45274436d3c0..88c71a8a55f1 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -686,6 +686,11 @@ static bool mmu_spte_age(u64 *sptep)
>
>  static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
>  {
> +       if (is_tdp_mmu(vcpu->arch.mmu)) {
> +               kvm_tdp_mmu_walk_lockless_begin();
> +               return;
> +       }
> +
>         /*
>          * Prevent page table teardown by making any free-er wait during
>          * kvm_flush_remote_tlbs() IPI to all active vcpus.
> @@ -701,6 +706,11 @@ static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
>
>  static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
>  {
> +       if (is_tdp_mmu(vcpu->arch.mmu)) {
> +               kvm_tdp_mmu_walk_lockless_end();
> +               return;
> +       }
> +
>         /*
>          * Make sure the write to vcpu->mode is not reordered in front of
>          * reads to sptes.  If it does, kvm_mmu_commit_zap_page() can see us
> @@ -3621,6 +3631,12 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
>
>         walk_shadow_page_lockless_begin(vcpu);
>
> +       if (is_tdp_mmu(vcpu->arch.mmu)) {
> +               leaf = kvm_tdp_mmu_get_walk_lockless(vcpu, addr, sptes,
> +                                                    root_level);
> +               goto out;
> +       }
> +
>         for (shadow_walk_init(&iterator, vcpu, addr),
>              *root_level = iterator.level;
>              shadow_walk_okay(&iterator);
> @@ -3634,8 +3650,8 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
>                         break;
>         }
>
> +out:
>         walk_shadow_page_lockless_end(vcpu);
> -
>         return leaf;
>  }
>
> @@ -3647,11 +3663,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
>         int root, leaf, level;
>         bool reserved = false;
>
> -       if (is_tdp_mmu(vcpu->arch.mmu))
> -               leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
> -       else
> -               leaf = get_walk(vcpu, addr, sptes, &root);
> -
> +       leaf = get_walk(vcpu, addr, sptes, &root);
>         if (unlikely(leaf < 0)) {
>                 *sptep = 0ull;
>                 return reserved;
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index caac4ddb46df..c6fa8d00bf9f 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1513,12 +1513,24 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
>         return spte_set;
>  }
>
> +void kvm_tdp_mmu_walk_lockless_begin(void)
> +{
> +       rcu_read_lock();
> +}
> +
> +void kvm_tdp_mmu_walk_lockless_end(void)
> +{
> +       rcu_read_unlock();
> +}
> +
>  /*
>   * Return the level of the lowest level SPTE added to sptes.
>   * That SPTE may be non-present.
> + *
> + * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
>   */
> -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> -                        int *root_level)
> +int kvm_tdp_mmu_get_walk_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> +                                 int *root_level)
>  {
>         struct tdp_iter iter;
>         struct kvm_mmu *mmu = vcpu->arch.mmu;
> @@ -1527,14 +1539,10 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
>
>         *root_level = vcpu->arch.mmu->shadow_root_level;
>
> -       rcu_read_lock();
> -
>         tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
>                 leaf = iter.level;
>                 sptes[leaf] = iter.old_spte;
>         }
>
> -       rcu_read_unlock();
> -
>         return leaf;
>  }
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
> index 1cae4485b3bc..e9dde5f9c0ef 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.h
> +++ b/arch/x86/kvm/mmu/tdp_mmu.h
> @@ -77,8 +77,10 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
>                                    struct kvm_memory_slot *slot, gfn_t gfn,
>                                    int min_level);
>
> -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> -                        int *root_level);
> +void kvm_tdp_mmu_walk_lockless_begin(void);
> +void kvm_tdp_mmu_walk_lockless_end(void);
> +int kvm_tdp_mmu_get_walk_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> +                                 int *root_level);
>
>  #ifdef CONFIG_X86_64
>  bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
> --
> 2.32.0.93.g670b81a890-goog
>
David Matlack July 12, 2021, 6:11 p.m. UTC | #2
On Mon, Jul 12, 2021 at 10:02:31AM -0700, Ben Gardon wrote:
> On Wed, Jun 30, 2021 at 2:48 PM David Matlack <dmatlack@google.com> wrote:
> >
> > Acquire the RCU read lock in walk_shadow_page_lockless_begin and release
> > it in walk_shadow_page_lockless_end when the TDP MMU is enabled.  This
> > should not introduce any functional changes but is used in the following
> > commit to make fast_page_fault interoperate with the TDP MMU.
> >
> > Signed-off-by: David Matlack <dmatlack@google.com>
> 
> Reviewed-by: Ben Gardon <bgardon@google.com>
> 
> This as I understand this, we're just lifting the rcu_lock/unlock out
> of kvm_tdp_mmu_get_walk, and then putting all the TDP MMU specific
> stuff down a level under walk_shadow_page_lockless_begin/end and
> get_walk.
> 
> Instead of moving kvm_tdp_mmu_get_walk_lockless into get_walk, it
> could also be open-coded as:
> 
> walk_shadow_page_lockless_begin
>  if (is_tdp_mmu(vcpu->arch.mmu))
>                leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
>  else
>                leaf = get_walk(vcpu, addr, sptes, &root);
> walk_shadow_page_lockless_end
> 
> in get_mmio_spte, since get_walk isn't used anywhere else. Then
> walk_shadow_page_lockless_begin/end could also be moved up out of
> get_walk instead of having to add a goto to that function.
> I don't have a strong preference either way, but the above feels like
> a slightly simpler refactor.

I don't have a strong preference either way as well. I'd be happy to
switch to your suggestion in v3.
Sean Christopherson July 12, 2021, 8:20 p.m. UTC | #3
On Mon, Jul 12, 2021, David Matlack wrote:
> On Mon, Jul 12, 2021 at 10:02:31AM -0700, Ben Gardon wrote:
> > On Wed, Jun 30, 2021 at 2:48 PM David Matlack <dmatlack@google.com> wrote:
> > >
> > > Acquire the RCU read lock in walk_shadow_page_lockless_begin and release
> > > it in walk_shadow_page_lockless_end when the TDP MMU is enabled.  This
> > > should not introduce any functional changes but is used in the following
> > > commit to make fast_page_fault interoperate with the TDP MMU.
> > >
> > > Signed-off-by: David Matlack <dmatlack@google.com>
> > 
> > Reviewed-by: Ben Gardon <bgardon@google.com>
> > 
> > This as I understand this, we're just lifting the rcu_lock/unlock out
> > of kvm_tdp_mmu_get_walk, and then putting all the TDP MMU specific
> > stuff down a level under walk_shadow_page_lockless_begin/end and
> > get_walk.
> > 
> > Instead of moving kvm_tdp_mmu_get_walk_lockless into get_walk, it
> > could also be open-coded as:
> > 
> > walk_shadow_page_lockless_begin
> >  if (is_tdp_mmu(vcpu->arch.mmu))
> >                leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
> >  else
> >                leaf = get_walk(vcpu, addr, sptes, &root);
> > walk_shadow_page_lockless_end
> > 
> > in get_mmio_spte, since get_walk isn't used anywhere else. Then
> > walk_shadow_page_lockless_begin/end could also be moved up out of
> > get_walk instead of having to add a goto to that function.
> > I don't have a strong preference either way, but the above feels like
> > a slightly simpler refactor.
> 
> I don't have a strong preference either way as well. I'd be happy to
> switch to your suggestion in v3.

I vote for Ben's suggestion.  As is, I think it would be too easy to overlook the
TDP MMU path in get_walk() and focus only on the for-loop.
Sean Christopherson July 12, 2021, 8:23 p.m. UTC | #4
On Wed, Jun 30, 2021, David Matlack wrote:
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index caac4ddb46df..c6fa8d00bf9f 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1513,12 +1513,24 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
>  	return spte_set;
>  }
>  
> +void kvm_tdp_mmu_walk_lockless_begin(void)
> +{
> +	rcu_read_lock();
> +}
> +
> +void kvm_tdp_mmu_walk_lockless_end(void)
> +{
> +	rcu_read_unlock();
> +}

I vote to make these static inlines.  They're nops in a non-debug build, and it's
not like it's a secret that the TDP MMU relies on RCU to protect its page tables.
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 45274436d3c0..88c71a8a55f1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -686,6 +686,11 @@  static bool mmu_spte_age(u64 *sptep)
 
 static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
 {
+	if (is_tdp_mmu(vcpu->arch.mmu)) {
+		kvm_tdp_mmu_walk_lockless_begin();
+		return;
+	}
+
 	/*
 	 * Prevent page table teardown by making any free-er wait during
 	 * kvm_flush_remote_tlbs() IPI to all active vcpus.
@@ -701,6 +706,11 @@  static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
 
 static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
 {
+	if (is_tdp_mmu(vcpu->arch.mmu)) {
+		kvm_tdp_mmu_walk_lockless_end();
+		return;
+	}
+
 	/*
 	 * Make sure the write to vcpu->mode is not reordered in front of
 	 * reads to sptes.  If it does, kvm_mmu_commit_zap_page() can see us
@@ -3621,6 +3631,12 @@  static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 
 	walk_shadow_page_lockless_begin(vcpu);
 
+	if (is_tdp_mmu(vcpu->arch.mmu)) {
+		leaf = kvm_tdp_mmu_get_walk_lockless(vcpu, addr, sptes,
+						     root_level);
+		goto out;
+	}
+
 	for (shadow_walk_init(&iterator, vcpu, addr),
 	     *root_level = iterator.level;
 	     shadow_walk_okay(&iterator);
@@ -3634,8 +3650,8 @@  static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 			break;
 	}
 
+out:
 	walk_shadow_page_lockless_end(vcpu);
-
 	return leaf;
 }
 
@@ -3647,11 +3663,7 @@  static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	int root, leaf, level;
 	bool reserved = false;
 
-	if (is_tdp_mmu(vcpu->arch.mmu))
-		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
-	else
-		leaf = get_walk(vcpu, addr, sptes, &root);
-
+	leaf = get_walk(vcpu, addr, sptes, &root);
 	if (unlikely(leaf < 0)) {
 		*sptep = 0ull;
 		return reserved;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index caac4ddb46df..c6fa8d00bf9f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1513,12 +1513,24 @@  bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 	return spte_set;
 }
 
+void kvm_tdp_mmu_walk_lockless_begin(void)
+{
+	rcu_read_lock();
+}
+
+void kvm_tdp_mmu_walk_lockless_end(void)
+{
+	rcu_read_unlock();
+}
+
 /*
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
+ *
+ * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
  */
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
-			 int *root_level)
+int kvm_tdp_mmu_get_walk_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+				  int *root_level)
 {
 	struct tdp_iter iter;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
@@ -1527,14 +1539,10 @@  int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 
 	*root_level = vcpu->arch.mmu->shadow_root_level;
 
-	rcu_read_lock();
-
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
 		sptes[leaf] = iter.old_spte;
 	}
 
-	rcu_read_unlock();
-
 	return leaf;
 }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 1cae4485b3bc..e9dde5f9c0ef 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -77,8 +77,10 @@  bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn,
 				   int min_level);
 
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
-			 int *root_level);
+void kvm_tdp_mmu_walk_lockless_begin(void);
+void kvm_tdp_mmu_walk_lockless_end(void);
+int kvm_tdp_mmu_get_walk_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+				  int *root_level);
 
 #ifdef CONFIG_X86_64
 bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);