diff mbox series

[v1,03/13] KVM: x86/mmu: Automatically update iter->old_spte if cmpxchg fails

Message ID 20211213225918.672507-4-dmatlack@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Eager Page Splitting for the TDP MMU | expand

Commit Message

David Matlack Dec. 13, 2021, 10:59 p.m. UTC
Consolidate a bunch of code that was manually re-reading the spte if the
cmpxchg fails. There is no extra cost of doing this because we already
have the spte value as a result of the cmpxchg (and in fact this
eliminates re-reading the spte), and none of the call sites depend on
iter->old_spte retaining the stale spte value.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 50 ++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

Comments

Peter Xu Jan. 4, 2022, 10:13 a.m. UTC | #1
On Mon, Dec 13, 2021 at 10:59:08PM +0000, David Matlack wrote:
> @@ -985,6 +992,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  			 * path below.
>  			 */
>  			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
> +

Useless empty line?

Other than that:

Reviewed-by: Peter Xu <peterx@redhat.com>
Ben Gardon Jan. 4, 2022, 5:29 p.m. UTC | #2
On Tue, Jan 4, 2022 at 2:13 AM Peter Xu <peterx@redhat.com> wrote:
>
> On Mon, Dec 13, 2021 at 10:59:08PM +0000, David Matlack wrote:
> > @@ -985,6 +992,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> >                        * path below.
> >                        */
> >                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
> > +
>
> Useless empty line?
>
> Other than that:
>
> Reviewed-by: Peter Xu <peterx@redhat.com>
>
> --
> Peter Xu
>

Looks good to me too.

Reviewed-by: Ben Gardon <bgardon@google.com>
Sean Christopherson Jan. 6, 2022, 12:54 a.m. UTC | #3
On Mon, Dec 13, 2021, David Matlack wrote:
> Consolidate a bunch of code that was manually re-reading the spte if the
> cmpxchg fails. There is no extra cost of doing this because we already
> have the spte value as a result of the cmpxchg (and in fact this
> eliminates re-reading the spte), and none of the call sites depend on
> iter->old_spte retaining the stale spte value.
> 
> Signed-off-by: David Matlack <dmatlack@google.com>
> ---
>  arch/x86/kvm/mmu/tdp_mmu.c | 50 ++++++++++++++++----------------------
>  1 file changed, 21 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index b69e47e68307..656ebf5b20dc 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -492,16 +492,22 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
>   * and handle the associated bookkeeping.  Do not mark the page dirty
>   * in KVM's dirty bitmaps.
>   *
> + * If setting the SPTE fails because it has changed, iter->old_spte will be
> + * updated with the updated value of the spte.

First updated=>refreshed, second updated=>current?  More below.

> + *
>   * @kvm: kvm instance
>   * @iter: a tdp_iter instance currently on the SPTE that should be set
>   * @new_spte: The value the SPTE should be set to
>   * Returns: true if the SPTE was set, false if it was not. If false is returned,
> - *	    this function will have no side-effects.
> + *          this function will have no side-effects other than updating

s/updating/setting

> + *          iter->old_spte to the latest value of spte.

Strictly speaking, "latest" may not be true if yet another thread modifies the
SPTE.  Maybe this?

		iter->old_spte to the last known value of the SPTE.

>   */
>  static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
>  					   struct tdp_iter *iter,
>  					   u64 new_spte)
>  {
> +	u64 old_spte;
> +
>  	lockdep_assert_held_read(&kvm->mmu_lock);
>  
>  	/*
> @@ -515,9 +521,15 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
>  	 * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
>  	 * does not hold the mmu_lock.
>  	 */
> -	if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
> -		      new_spte) != iter->old_spte)
> +	old_spte = cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, new_spte);

To make this a bit easier to read, and to stay under 80 chars, how about
opportunistically doing this as well?

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 656ebf5b20dc..64f1369f8638 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -506,6 +506,7 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
                                           struct tdp_iter *iter,
                                           u64 new_spte)
 {
+       u64 *sptep = rcu_dereference(iter->sptep);
        u64 old_spte;
 
        lockdep_assert_held_read(&kvm->mmu_lock);
@@ -521,7 +522,7 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
         * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
         * does not hold the mmu_lock.
         */
-       old_spte = cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, new_spte);
+       old_spte = cmpxchg64(sptep, iter->old_spte, new_spte);
        if (old_spte != iter->old_spte) {
                /*
                 * The cmpxchg failed because the spte was updated by another

> +	if (old_spte != iter->old_spte) {
> +		/*
> +		 * The cmpxchg failed because the spte was updated by another
> +		 * thread so record the updated spte in old_spte.
> +		 */

Hmm, this is a bit awkward.  I think it's the double use of "updated" and the
somewhat ambiguous reference to "old_spte".  I'd also avoid "thread", as this
requires interference from not only a different task, but a different logical CPU
since iter->old_spte is refreshed if mmu_lock is dropped and reacquired.  And
"record" is an odd choice of word since it sounds like storing the current value
is only for logging/debugging.

Something like this?

		/*
		 * The entry was modified by a different logical CPU, refresh
		 * iter->old_spte with the current value so the caller operates
		 * on fresh data, e.g. if it retries tdp_mmu_set_spte_atomic().
		 */

Nits aside,

Reviewed-by: Sean Christopherson <seanjc@google.com>

> +		iter->old_spte = old_spte;
>  		return false;
> +	}
>  
>  	__handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
>  			      new_spte, iter->level, true);
David Matlack Jan. 6, 2022, 6:04 p.m. UTC | #4
On Wed, Jan 5, 2022 at 4:54 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Mon, Dec 13, 2021, David Matlack wrote:
> > Consolidate a bunch of code that was manually re-reading the spte if the
> > cmpxchg fails. There is no extra cost of doing this because we already
> > have the spte value as a result of the cmpxchg (and in fact this
> > eliminates re-reading the spte), and none of the call sites depend on
> > iter->old_spte retaining the stale spte value.
> >
> > Signed-off-by: David Matlack <dmatlack@google.com>
> > ---
> >  arch/x86/kvm/mmu/tdp_mmu.c | 50 ++++++++++++++++----------------------
> >  1 file changed, 21 insertions(+), 29 deletions(-)
> >
> > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> > index b69e47e68307..656ebf5b20dc 100644
> > --- a/arch/x86/kvm/mmu/tdp_mmu.c
> > +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> > @@ -492,16 +492,22 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
> >   * and handle the associated bookkeeping.  Do not mark the page dirty
> >   * in KVM's dirty bitmaps.
> >   *
> > + * If setting the SPTE fails because it has changed, iter->old_spte will be
> > + * updated with the updated value of the spte.
>
> First updated=>refreshed, second updated=>current?  More below.
>
> > + *
> >   * @kvm: kvm instance
> >   * @iter: a tdp_iter instance currently on the SPTE that should be set
> >   * @new_spte: The value the SPTE should be set to
> >   * Returns: true if the SPTE was set, false if it was not. If false is returned,
> > - *       this function will have no side-effects.
> > + *          this function will have no side-effects other than updating
>
> s/updating/setting
>
> > + *          iter->old_spte to the latest value of spte.
>
> Strictly speaking, "latest" may not be true if yet another thread modifies the
> SPTE.  Maybe this?
>
>                 iter->old_spte to the last known value of the SPTE.
>
> >   */
> >  static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
> >                                          struct tdp_iter *iter,
> >                                          u64 new_spte)
> >  {
> > +     u64 old_spte;
> > +
> >       lockdep_assert_held_read(&kvm->mmu_lock);
> >
> >       /*
> > @@ -515,9 +521,15 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
> >        * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
> >        * does not hold the mmu_lock.
> >        */
> > -     if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
> > -                   new_spte) != iter->old_spte)
> > +     old_spte = cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, new_spte);
>
> To make this a bit easier to read, and to stay under 80 chars, how about
> opportunistically doing this as well?
>
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 656ebf5b20dc..64f1369f8638 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -506,6 +506,7 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
>                                            struct tdp_iter *iter,
>                                            u64 new_spte)
>  {
> +       u64 *sptep = rcu_dereference(iter->sptep);
>         u64 old_spte;
>
>         lockdep_assert_held_read(&kvm->mmu_lock);
> @@ -521,7 +522,7 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
>          * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
>          * does not hold the mmu_lock.
>          */
> -       old_spte = cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, new_spte);
> +       old_spte = cmpxchg64(sptep, iter->old_spte, new_spte);
>         if (old_spte != iter->old_spte) {
>                 /*
>                  * The cmpxchg failed because the spte was updated by another
>
> > +     if (old_spte != iter->old_spte) {
> > +             /*
> > +              * The cmpxchg failed because the spte was updated by another
> > +              * thread so record the updated spte in old_spte.
> > +              */
>
> Hmm, this is a bit awkward.  I think it's the double use of "updated" and the
> somewhat ambiguous reference to "old_spte".  I'd also avoid "thread", as this
> requires interference from not only a different task, but a different logical CPU
> since iter->old_spte is refreshed if mmu_lock is dropped and reacquired.  And
> "record" is an odd choice of word since it sounds like storing the current value
> is only for logging/debugging.
>
> Something like this?
>
>                 /*
>                  * The entry was modified by a different logical CPU, refresh
>                  * iter->old_spte with the current value so the caller operates
>                  * on fresh data, e.g. if it retries tdp_mmu_set_spte_atomic().
>                  */
>
> Nits aside,
>
> Reviewed-by: Sean Christopherson <seanjc@google.com>

Thanks for the review. I'll incorporate these into v2 (which I'm
holding off on until you have a chance to finish reviewing v1).

>
> > +             iter->old_spte = old_spte;
> >               return false;
> > +     }
> >
> >       __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
> >                             new_spte, iter->level, true);
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index b69e47e68307..656ebf5b20dc 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -492,16 +492,22 @@  static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
  * and handle the associated bookkeeping.  Do not mark the page dirty
  * in KVM's dirty bitmaps.
  *
+ * If setting the SPTE fails because it has changed, iter->old_spte will be
+ * updated with the updated value of the spte.
+ *
  * @kvm: kvm instance
  * @iter: a tdp_iter instance currently on the SPTE that should be set
  * @new_spte: The value the SPTE should be set to
  * Returns: true if the SPTE was set, false if it was not. If false is returned,
- *	    this function will have no side-effects.
+ *          this function will have no side-effects other than updating
+ *          iter->old_spte to the latest value of spte.
  */
 static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
 					   struct tdp_iter *iter,
 					   u64 new_spte)
 {
+	u64 old_spte;
+
 	lockdep_assert_held_read(&kvm->mmu_lock);
 
 	/*
@@ -515,9 +521,15 @@  static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
 	 * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
 	 * does not hold the mmu_lock.
 	 */
-	if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
-		      new_spte) != iter->old_spte)
+	old_spte = cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, new_spte);
+	if (old_spte != iter->old_spte) {
+		/*
+		 * The cmpxchg failed because the spte was updated by another
+		 * thread so record the updated spte in old_spte.
+		 */
+		iter->old_spte = old_spte;
 		return false;
+	}
 
 	__handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
 			      new_spte, iter->level, true);
@@ -748,11 +760,6 @@  static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 			tdp_mmu_set_spte(kvm, &iter, 0);
 			flush = true;
 		} else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
-			/*
-			 * The iter must explicitly re-read the SPTE because
-			 * the atomic cmpxchg failed.
-			 */
-			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
 			goto retry;
 		}
 	}
@@ -985,6 +992,7 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			 * path below.
 			 */
 			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+
 		}
 
 		if (!is_shadow_present_pte(iter.old_spte)) {
@@ -1190,14 +1198,9 @@  static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
 		new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
 
-		if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
-			/*
-			 * The iter must explicitly re-read the SPTE because
-			 * the atomic cmpxchg failed.
-			 */
-			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+		if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
 			goto retry;
-		}
+
 		spte_set = true;
 	}
 
@@ -1258,14 +1261,9 @@  static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 				continue;
 		}
 
-		if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte)) {
-			/*
-			 * The iter must explicitly re-read the SPTE because
-			 * the atomic cmpxchg failed.
-			 */
-			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+		if (!tdp_mmu_set_spte_atomic(kvm, &iter, new_spte))
 			goto retry;
-		}
+
 		spte_set = true;
 	}
 
@@ -1389,14 +1387,8 @@  static void zap_collapsible_spte_range(struct kvm *kvm,
 			continue;
 
 		/* Note, a successful atomic zap also does a remote TLB flush. */
-		if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
-			/*
-			 * The iter must explicitly re-read the SPTE because
-			 * the atomic cmpxchg failed.
-			 */
-			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+		if (!tdp_mmu_zap_spte_atomic(kvm, &iter))
 			goto retry;
-		}
 	}
 
 	rcu_read_unlock();