diff mbox series

[RFC,03/15] KVM: x86/mmu: Automatically update iter->old_spte if cmpxchg fails

Message ID 20211119235759.1304274-4-dmatlack@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Eager Page Splitting for the TDP MMU | expand

Commit Message

David Matlack Nov. 19, 2021, 11:57 p.m. UTC
Consolidate a bunch of code that was manually re-reading the spte if the
cmpxchg fails. There is no extra cost of doing this because we already
have the spte value as a result of the cmpxchg (and in fact this
eliminates re-reading the spte), and none of the call sites depend on
iter->old_spte retaining the stale spte value.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 56 ++++++++++++--------------------------
 1 file changed, 18 insertions(+), 38 deletions(-)

Comments

Ben Gardon Nov. 22, 2021, 6:52 p.m. UTC | #1
On Fri, Nov 19, 2021 at 3:58 PM David Matlack <dmatlack@google.com> wrote:
>
> Consolidate a bunch of code that was manually re-reading the spte if the
> cmpxchg fails. There is no extra cost of doing this because we already
> have the spte value as a result of the cmpxchg (and in fact this
> eliminates re-reading the spte), and none of the call sites depend on
> iter->old_spte retaining the stale spte value.
>
> Signed-off-by: David Matlack <dmatlack@google.com>
> ---
>  arch/x86/kvm/mmu/tdp_mmu.c | 56 ++++++++++++--------------------------
>  1 file changed, 18 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 377a96718a2e..cc9fe33c9b36 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -492,16 +492,22 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
>   * and handle the associated bookkeeping.  Do not mark the page dirty
>   * in KVM's dirty bitmaps.
>   *
> + * If setting the SPTE fails because it has changed, iter->old_spte will be
> + * updated with the updated value of the spte.
> + *
>   * @kvm: kvm instance
>   * @iter: a tdp_iter instance currently on the SPTE that should be set
>   * @new_spte: The value the SPTE should be set to
>   * Returns: true if the SPTE was set, false if it was not. If false is returned,
> - *         this function will have no side-effects.
> + *          this function will have no side-effects other than updating
> + *          iter->old_spte to the latest value of spte.
>   */
>  static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
>                                            struct tdp_iter *iter,
>                                            u64 new_spte)
>  {
> +       u64 old_spte;
> +
>         lockdep_assert_held_read(&kvm->mmu_lock);
>
>         /*
> @@ -515,9 +521,11 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
>          * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
>          * does not hold the mmu_lock.
>          */
> -       if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
> -                     new_spte) != iter->old_spte)
> +       old_spte = cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, new_spte);

This probably deserves a comment:

/*
 * If the old_spte values differ, the cmpxchg failed. Update
iter->old_spte with the value inserted by
 * another thread.
 */

> +       if (old_spte != iter->old_spte) {
> +               iter->old_spte = old_spte;
>                 return false;
> +       }
>
>         __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
>                               new_spte, iter->level, true);
> @@ -747,14 +755,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>                 if (!shared) {
>                         tdp_mmu_set_spte(kvm, &iter, 0);
>                         flush = true;
> -               } else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
> -                       /*
> -                        * The iter must explicitly re-read the SPTE because
> -                        * the atomic cmpxchg failed.
> -                        */
> -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));

I think kernel style is to include the curly braces on the else if, if
the if had them.


> +               } else if (!tdp_mmu_zap_spte_atomic(kvm, &iter))
>                         goto retry;
> -               }
>         }
>
>         rcu_read_unlock();
> @@ -978,13 +980,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>                     is_large_pte(iter.old_spte)) {
>                         if (!tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter))
>                                 break;
> -
> -                       /*
> -                        * The iter must explicitly re-read the spte here
> -                        * because the new value informs the !present
> -                        * path below.
> -                        */
> -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
>                 }
>
>                 if (!is_shadow_present_pte(iter.old_spte)) {
> @@ -1190,14 +1185,9 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>
>                 new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
>
> -               if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
> -                       /*
> -                        * The iter must explicitly re-read the SPTE because
> -                        * the atomic cmpxchg failed.
> -                        */
> -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
> +               if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
>                         goto retry;
> -               }
> +
>                 spte_set = true;
>         }
>
> @@ -1258,14 +1248,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>                                 continue;
>                 }
>
> -               if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
> -                       /*
> -                        * The iter must explicitly re-read the SPTE because
> -                        * the atomic cmpxchg failed.
> -                        */
> -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
> +               if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
>                         goto retry;
> -               }
> +
>                 spte_set = true;
>         }
>
> @@ -1391,14 +1376,9 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
>                                                             pfn, PG_LEVEL_NUM))
>                         continue;
>
> -               if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
> -                       /*
> -                        * The iter must explicitly re-read the SPTE because
> -                        * the atomic cmpxchg failed.
> -                        */
> -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
> +               if (!tdp_mmu_zap_spte_atomic(kvm, &iter))
>                         goto retry;
> -               }
> +
>                 flush = true;
>         }
>
> --
> 2.34.0.rc2.393.gf8c9666880-goog
>
David Matlack Nov. 30, 2021, 11:25 p.m. UTC | #2
On Mon, Nov 22, 2021 at 10:52 AM Ben Gardon <bgardon@google.com> wrote:
>
> On Fri, Nov 19, 2021 at 3:58 PM David Matlack <dmatlack@google.com> wrote:
> >
> > Consolidate a bunch of code that was manually re-reading the spte if the
> > cmpxchg fails. There is no extra cost of doing this because we already
> > have the spte value as a result of the cmpxchg (and in fact this
> > eliminates re-reading the spte), and none of the call sites depend on
> > iter->old_spte retaining the stale spte value.
> >
> > Signed-off-by: David Matlack <dmatlack@google.com>
> > ---
> >  arch/x86/kvm/mmu/tdp_mmu.c | 56 ++++++++++++--------------------------
> >  1 file changed, 18 insertions(+), 38 deletions(-)
> >
> > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> > index 377a96718a2e..cc9fe33c9b36 100644
> > --- a/arch/x86/kvm/mmu/tdp_mmu.c
> > +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> > @@ -492,16 +492,22 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
> >   * and handle the associated bookkeeping.  Do not mark the page dirty
> >   * in KVM's dirty bitmaps.
> >   *
> > + * If setting the SPTE fails because it has changed, iter->old_spte will be
> > + * updated with the updated value of the spte.
> > + *
> >   * @kvm: kvm instance
> >   * @iter: a tdp_iter instance currently on the SPTE that should be set
> >   * @new_spte: The value the SPTE should be set to
> >   * Returns: true if the SPTE was set, false if it was not. If false is returned,
> > - *         this function will have no side-effects.
> > + *          this function will have no side-effects other than updating
> > + *          iter->old_spte to the latest value of spte.
> >   */
> >  static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
> >                                            struct tdp_iter *iter,
> >                                            u64 new_spte)
> >  {
> > +       u64 old_spte;
> > +
> >         lockdep_assert_held_read(&kvm->mmu_lock);
> >
> >         /*
> > @@ -515,9 +521,11 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
> >          * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
> >          * does not hold the mmu_lock.
> >          */
> > -       if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
> > -                     new_spte) != iter->old_spte)
> > +       old_spte = cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, new_spte);
>
> This probably deserves a comment:
>
> /*
>  * If the old_spte values differ, the cmpxchg failed. Update
> iter->old_spte with the value inserted by
>  * another thread.
>  */

Will do.

>
> > +       if (old_spte != iter->old_spte) {
> > +               iter->old_spte = old_spte;
> >                 return false;
> > +       }
> >
> >         __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
> >                               new_spte, iter->level, true);
> > @@ -747,14 +755,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
> >                 if (!shared) {
> >                         tdp_mmu_set_spte(kvm, &iter, 0);
> >                         flush = true;
> > -               } else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
> > -                       /*
> > -                        * The iter must explicitly re-read the SPTE because
> > -                        * the atomic cmpxchg failed.
> > -                        */
> > -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
>
> I think kernel style is to include the curly braces on the else if, if
> the if had them.

You are correct! Will fix in v1.

>
>
> > +               } else if (!tdp_mmu_zap_spte_atomic(kvm, &iter))
> >                         goto retry;
> > -               }
> >         }
> >
> >         rcu_read_unlock();
> > @@ -978,13 +980,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> >                     is_large_pte(iter.old_spte)) {
> >                         if (!tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter))
> >                                 break;
> > -
> > -                       /*
> > -                        * The iter must explicitly re-read the spte here
> > -                        * because the new value informs the !present
> > -                        * path below.
> > -                        */
> > -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
> >                 }
> >
> >                 if (!is_shadow_present_pte(iter.old_spte)) {
> > @@ -1190,14 +1185,9 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
> >
> >                 new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
> >
> > -               if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
> > -                       /*
> > -                        * The iter must explicitly re-read the SPTE because
> > -                        * the atomic cmpxchg failed.
> > -                        */
> > -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
> > +               if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
> >                         goto retry;
> > -               }
> > +
> >                 spte_set = true;
> >         }
> >
> > @@ -1258,14 +1248,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
> >                                 continue;
> >                 }
> >
> > -               if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
> > -                       /*
> > -                        * The iter must explicitly re-read the SPTE because
> > -                        * the atomic cmpxchg failed.
> > -                        */
> > -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
> > +               if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
> >                         goto retry;
> > -               }
> > +
> >                 spte_set = true;
> >         }
> >
> > @@ -1391,14 +1376,9 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
> >                                                             pfn, PG_LEVEL_NUM))
> >                         continue;
> >
> > -               if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
> > -                       /*
> > -                        * The iter must explicitly re-read the SPTE because
> > -                        * the atomic cmpxchg failed.
> > -                        */
> > -                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
> > +               if (!tdp_mmu_zap_spte_atomic(kvm, &iter))
> >                         goto retry;
> > -               }
> > +
> >                 flush = true;
> >         }
> >
> > --
> > 2.34.0.rc2.393.gf8c9666880-goog
> >
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 377a96718a2e..cc9fe33c9b36 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -492,16 +492,22 @@  static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
  * and handle the associated bookkeeping.  Do not mark the page dirty
  * in KVM's dirty bitmaps.
  *
+ * If setting the SPTE fails because it has changed, iter->old_spte will be
+ * updated with the updated value of the spte.
+ *
  * @kvm: kvm instance
  * @iter: a tdp_iter instance currently on the SPTE that should be set
  * @new_spte: The value the SPTE should be set to
  * Returns: true if the SPTE was set, false if it was not. If false is returned,
- *	    this function will have no side-effects.
+ *          this function will have no side-effects other than updating
+ *          iter->old_spte to the latest value of spte.
  */
 static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
 					   struct tdp_iter *iter,
 					   u64 new_spte)
 {
+	u64 old_spte;
+
 	lockdep_assert_held_read(&kvm->mmu_lock);
 
 	/*
@@ -515,9 +521,11 @@  static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
 	 * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
 	 * does not hold the mmu_lock.
 	 */
-	if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
-		      new_spte) != iter->old_spte)
+	old_spte = cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, new_spte);
+	if (old_spte != iter->old_spte) {
+		iter->old_spte = old_spte;
 		return false;
+	}
 
 	__handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
 			      new_spte, iter->level, true);
@@ -747,14 +755,8 @@  static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 		if (!shared) {
 			tdp_mmu_set_spte(kvm, &iter, 0);
 			flush = true;
-		} else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
-			/*
-			 * The iter must explicitly re-read the SPTE because
-			 * the atomic cmpxchg failed.
-			 */
-			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+		} else if (!tdp_mmu_zap_spte_atomic(kvm, &iter))
 			goto retry;
-		}
 	}
 
 	rcu_read_unlock();
@@ -978,13 +980,6 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		    is_large_pte(iter.old_spte)) {
 			if (!tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter))
 				break;
-
-			/*
-			 * The iter must explicitly re-read the spte here
-			 * because the new value informs the !present
-			 * path below.
-			 */
-			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
 		}
 
 		if (!is_shadow_present_pte(iter.old_spte)) {
@@ -1190,14 +1185,9 @@  static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
 		new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
 
-		if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
-			/*
-			 * The iter must explicitly re-read the SPTE because
-			 * the atomic cmpxchg failed.
-			 */
-			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+		if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
 			goto retry;
-		}
+
 		spte_set = true;
 	}
 
@@ -1258,14 +1248,9 @@  static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 				continue;
 		}
 
-		if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
-			/*
-			 * The iter must explicitly re-read the SPTE because
-			 * the atomic cmpxchg failed.
-			 */
-			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+		if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
 			goto retry;
-		}
+
 		spte_set = true;
 	}
 
@@ -1391,14 +1376,9 @@  static bool zap_collapsible_spte_range(struct kvm *kvm,
 							    pfn, PG_LEVEL_NUM))
 			continue;
 
-		if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
-			/*
-			 * The iter must explicitly re-read the SPTE because
-			 * the atomic cmpxchg failed.
-			 */
-			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+		if (!tdp_mmu_zap_spte_atomic(kvm, &iter))
 			goto retry;
-		}
+
 		flush = true;
 	}