diff mbox series

[RFC,03/19] KVM: x86/mmu: Factor flush and free up when zapping under MMU write lock

Message ID 20211110223010.1392399-4-bgardon@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Optimize disabling dirty logging | expand

Commit Message

Ben Gardon Nov. 10, 2021, 10:29 p.m. UTC
When zapping a GFN range under the MMU write lock, there is no need to
flush the TLBs for every zap. Instead, follow the lead of the Legacy MMU
can collect disconnected sps to be freed after a flush at the end of
the routine.


Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

Comments

David Matlack Nov. 11, 2021, 6:31 p.m. UTC | #1
On Wed, Nov 10, 2021 at 02:29:54PM -0800, Ben Gardon wrote:
> When zapping a GFN range under the MMU write lock, there is no need to
> flush the TLBs for every zap. Instead, follow the lead of the Legacy MMU
> can collect disconnected sps to be freed after a flush at the end of
> the routine.
> 
> 
> Signed-off-by: Ben Gardon <bgardon@google.com>

Reviewed-by: David Matlack <dmatlack@google.com>

> ---
>  arch/x86/kvm/mmu/tdp_mmu.c | 28 +++++++++++++++++++---------
>  1 file changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 5b31d046df78..a448f0f2d993 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -623,10 +623,9 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
>   */
>  static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
>  				      u64 new_spte, bool record_acc_track,
> -				      bool record_dirty_log)
> +				      bool record_dirty_log,
> +				      struct list_head *disconnected_sps)
>  {
> -	LIST_HEAD(disconnected_sps);
> -
>  	lockdep_assert_held_write(&kvm->mmu_lock);
>  
>  	/*
> @@ -641,7 +640,7 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
>  	WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);
>  
>  	__handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
> -			      new_spte, iter->level, false, &disconnected_sps);
> +			      new_spte, iter->level, false, disconnected_sps);
>  	if (record_acc_track)
>  		handle_changed_spte_acc_track(iter->old_spte, new_spte,
>  					      iter->level);
> @@ -649,28 +648,32 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
>  		handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
>  					      iter->old_spte, new_spte,
>  					      iter->level);
> +}
>  
> -	handle_disconnected_sps(kvm, &disconnected_sps);
> +static inline void tdp_mmu_zap_spte(struct kvm *kvm, struct tdp_iter *iter,
> +				    struct list_head *disconnected_sps)
> +{
> +	__tdp_mmu_set_spte(kvm, iter, 0, true, true, disconnected_sps);
>  }
>  
>  static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
>  				    u64 new_spte)
>  {
> -	__tdp_mmu_set_spte(kvm, iter, new_spte, true, true);
> +	__tdp_mmu_set_spte(kvm, iter, new_spte, true, true, NULL);
>  }
>  
>  static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm,
>  						 struct tdp_iter *iter,
>  						 u64 new_spte)
>  {
> -	__tdp_mmu_set_spte(kvm, iter, new_spte, false, true);
> +	__tdp_mmu_set_spte(kvm, iter, new_spte, false, true, NULL);
>  }
>  
>  static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
>  						 struct tdp_iter *iter,
>  						 u64 new_spte)
>  {
> -	__tdp_mmu_set_spte(kvm, iter, new_spte, true, false);
> +	__tdp_mmu_set_spte(kvm, iter, new_spte, true, false, NULL);
>  }
>  
>  #define tdp_root_for_each_pte(_iter, _root, _start, _end) \
> @@ -757,6 +760,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>  	gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
>  	bool zap_all = (start == 0 && end >= max_gfn_host);
>  	struct tdp_iter iter;
> +	LIST_HEAD(disconnected_sps);
>  
>  	/*
>  	 * No need to try to step down in the iterator when zapping all SPTEs,
> @@ -799,7 +803,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>  			continue;
>  
>  		if (!shared) {
> -			tdp_mmu_set_spte(kvm, &iter, 0);
> +			tdp_mmu_zap_spte(kvm, &iter, &disconnected_sps);
>  			flush = true;
>  		} else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
>  			/*
> @@ -811,6 +815,12 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>  		}
>  	}
>  
> +	if (!list_empty(&disconnected_sps)) {
> +		kvm_flush_remote_tlbs(kvm);
> +		handle_disconnected_sps(kvm, &disconnected_sps);

It might be worth adding a comment that we purposely do not process
disconnected_sps during the cond resched earlier in the loop because it
is an expensive call and it itself needs to cond resched (next patch).

> +		flush = false;
> +	}
> +
>  	rcu_read_unlock();
>  	return flush;
>  }
> -- 
> 2.34.0.rc0.344.g81b53c2807-goog
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 5b31d046df78..a448f0f2d993 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -623,10 +623,9 @@  static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
  */
 static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 				      u64 new_spte, bool record_acc_track,
-				      bool record_dirty_log)
+				      bool record_dirty_log,
+				      struct list_head *disconnected_sps)
 {
-	LIST_HEAD(disconnected_sps);
-
 	lockdep_assert_held_write(&kvm->mmu_lock);
 
 	/*
@@ -641,7 +640,7 @@  static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 	WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);
 
 	__handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
-			      new_spte, iter->level, false, &disconnected_sps);
+			      new_spte, iter->level, false, disconnected_sps);
 	if (record_acc_track)
 		handle_changed_spte_acc_track(iter->old_spte, new_spte,
 					      iter->level);
@@ -649,28 +648,32 @@  static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 		handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
 					      iter->old_spte, new_spte,
 					      iter->level);
+}
 
-	handle_disconnected_sps(kvm, &disconnected_sps);
+static inline void tdp_mmu_zap_spte(struct kvm *kvm, struct tdp_iter *iter,
+				    struct list_head *disconnected_sps)
+{
+	__tdp_mmu_set_spte(kvm, iter, 0, true, true, disconnected_sps);
 }
 
 static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 				    u64 new_spte)
 {
-	__tdp_mmu_set_spte(kvm, iter, new_spte, true, true);
+	__tdp_mmu_set_spte(kvm, iter, new_spte, true, true, NULL);
 }
 
 static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm,
 						 struct tdp_iter *iter,
 						 u64 new_spte)
 {
-	__tdp_mmu_set_spte(kvm, iter, new_spte, false, true);
+	__tdp_mmu_set_spte(kvm, iter, new_spte, false, true, NULL);
 }
 
 static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
 						 struct tdp_iter *iter,
 						 u64 new_spte)
 {
-	__tdp_mmu_set_spte(kvm, iter, new_spte, true, false);
+	__tdp_mmu_set_spte(kvm, iter, new_spte, true, false, NULL);
 }
 
 #define tdp_root_for_each_pte(_iter, _root, _start, _end) \
@@ -757,6 +760,7 @@  static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 	gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
 	bool zap_all = (start == 0 && end >= max_gfn_host);
 	struct tdp_iter iter;
+	LIST_HEAD(disconnected_sps);
 
 	/*
 	 * No need to try to step down in the iterator when zapping all SPTEs,
@@ -799,7 +803,7 @@  static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 			continue;
 
 		if (!shared) {
-			tdp_mmu_set_spte(kvm, &iter, 0);
+			tdp_mmu_zap_spte(kvm, &iter, &disconnected_sps);
 			flush = true;
 		} else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
 			/*
@@ -811,6 +815,12 @@  static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 		}
 	}
 
+	if (!list_empty(&disconnected_sps)) {
+		kvm_flush_remote_tlbs(kvm);
+		handle_disconnected_sps(kvm, &disconnected_sps);
+		flush = false;
+	}
+
 	rcu_read_unlock();
 	return flush;
 }