diff mbox series

[v2,3/6] KVM: x86/mmu: Set disallowed_nx_huge_page in TDP MMU before setting SPTE

Message ID 20220723012325.1715714-4-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: Apply NX mitigation more precisely | expand

Commit Message

Sean Christopherson July 23, 2022, 1:23 a.m. UTC
Set nx_huge_page_disallowed in TDP MMU shadow pages before making the SP
visible to other readers, i.e. before setting its SPTE.  This will allow
KVM to query the flag when determining if a shadow page can be replaced
by a NX huge page without violating the rules of the mitigation.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/mmu/mmu.c          | 12 +++++-------
 arch/x86/kvm/mmu/mmu_internal.h |  5 ++---
 arch/x86/kvm/mmu/tdp_mmu.c      | 30 +++++++++++++++++-------------
 3 files changed, 24 insertions(+), 23 deletions(-)

Comments

David Matlack July 25, 2022, 11:16 p.m. UTC | #1
On Sat, Jul 23, 2022 at 01:23:22AM +0000, Sean Christopherson wrote:
> Set nx_huge_page_disallowed in TDP MMU shadow pages before making the SP
> visible to other readers, i.e. before setting its SPTE.  This will allow
> KVM to query the flag when determining if a shadow page can be replaced
> by a NX huge page without violating the rules of the mitigation.

It took me a minute to figure out why the same change isn't needed in
the shadow MMU (it always holds the write-lock so it's impossible for
another CPU to see an SP without a correct nx_huge_page_disallowed.
If you send a v2 can you add a short blurb to that effect here?

Otherwise,

Reviewed-by: David Matlack <dmatlack@google.com>

> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/mmu/mmu.c          | 12 +++++-------
>  arch/x86/kvm/mmu/mmu_internal.h |  5 ++---
>  arch/x86/kvm/mmu/tdp_mmu.c      | 30 +++++++++++++++++-------------
>  3 files changed, 24 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 493cdf1c29ff..e9252e7cd5a2 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -802,8 +802,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
>  		kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
>  }
>  
> -static void untrack_possible_nx_huge_page(struct kvm *kvm,
> -					  struct kvm_mmu_page *sp)
> +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
>  	if (list_empty(&sp->possible_nx_huge_page_link))
>  		return;
> @@ -812,15 +811,14 @@ static void untrack_possible_nx_huge_page(struct kvm *kvm,
>  	list_del_init(&sp->possible_nx_huge_page_link);
>  }
>  
> -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
> +static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
>  	sp->nx_huge_page_disallowed = false;
>  
>  	untrack_possible_nx_huge_page(kvm, sp);
>  }
>  
> -static void track_possible_nx_huge_page(struct kvm *kvm,
> -					struct kvm_mmu_page *sp)
> +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
>  	if (!list_empty(&sp->possible_nx_huge_page_link))
>  		return;
> @@ -830,8 +828,8 @@ static void track_possible_nx_huge_page(struct kvm *kvm,
>  		      &kvm->arch.possible_nx_huge_pages);
>  }
>  
> -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> -			  bool nx_huge_page_possible)
> +static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> +				 bool nx_huge_page_possible)
>  {
>  	sp->nx_huge_page_disallowed = true;
>  
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 83644a0167ab..2a887d08b722 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -336,8 +336,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
>  
>  void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
>  
> -void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> -			  bool nx_huge_page_possible);
> -void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
> +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
> +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
>  
>  #endif /* __KVM_X86_MMU_INTERNAL_H */
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index a30983947fee..626c40ec2af9 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -392,8 +392,10 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
>  		lockdep_assert_held_write(&kvm->mmu_lock);
>  
>  	list_del(&sp->link);
> -	if (sp->nx_huge_page_disallowed)
> -		unaccount_nx_huge_page(kvm, sp);
> +	if (sp->nx_huge_page_disallowed) {
> +		sp->nx_huge_page_disallowed = false;
> +		untrack_possible_nx_huge_page(kvm, sp);
> +	}
>  
>  	if (shared)
>  		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
> @@ -1111,16 +1113,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
>   * @kvm: kvm instance
>   * @iter: a tdp_iter instance currently on the SPTE that should be set
>   * @sp: The new TDP page table to install.
> - * @account_nx: True if this page table is being installed to split a
> - *              non-executable huge page.
>   * @shared: This operation is running under the MMU lock in read mode.
>   *
>   * Returns: 0 if the new page table was installed. Non-0 if the page table
>   *          could not be installed (e.g. the atomic compare-exchange failed).
>   */
>  static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
> -			   struct kvm_mmu_page *sp, bool account_nx,
> -			   bool shared)
> +			   struct kvm_mmu_page *sp, bool shared)
>  {
>  	u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled());
>  	int ret = 0;
> @@ -1135,8 +1134,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
>  
>  	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
>  	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
> -	if (account_nx)
> -		account_nx_huge_page(kvm, sp, true);
>  	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
>  
>  	return 0;
> @@ -1149,6 +1146,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
>  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  {
>  	struct kvm_mmu *mmu = vcpu->arch.mmu;
> +	struct kvm *kvm = vcpu->kvm;
>  	struct tdp_iter iter;
>  	struct kvm_mmu_page *sp;
>  	int ret;
> @@ -1185,9 +1183,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  		}
>  
>  		if (!is_shadow_present_pte(iter.old_spte)) {
> -			bool account_nx = fault->huge_page_disallowed &&
> -					  fault->req_level >= iter.level;
> -
>  			/*
>  			 * If SPTE has been frozen by another thread, just
>  			 * give up and retry, avoiding unnecessary page table
> @@ -1199,10 +1194,19 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  			sp = tdp_mmu_alloc_sp(vcpu);
>  			tdp_mmu_init_child_sp(sp, &iter);
>  
> -			if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) {
> +			sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
> +
> +			if (tdp_mmu_link_sp(kvm, &iter, sp, true)) {
>  				tdp_mmu_free_sp(sp);
>  				break;
>  			}
> +
> +			if (fault->huge_page_disallowed &&
> +			    fault->req_level >= iter.level) {
> +				spin_lock(&kvm->arch.tdp_mmu_pages_lock);
> +				track_possible_nx_huge_page(kvm, sp);
> +				spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
> +			}
>  		}
>  	}
>  
> @@ -1490,7 +1494,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
>  	 * correctness standpoint since the translation will be the same either
>  	 * way.
>  	 */
> -	ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared);
> +	ret = tdp_mmu_link_sp(kvm, iter, sp, shared);
>  	if (ret)
>  		goto out;
>  
> -- 
> 2.37.1.359.gd136c6c3e2-goog
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 493cdf1c29ff..e9252e7cd5a2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -802,8 +802,7 @@  static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 		kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
 }
 
-static void untrack_possible_nx_huge_page(struct kvm *kvm,
-					  struct kvm_mmu_page *sp)
+void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	if (list_empty(&sp->possible_nx_huge_page_link))
 		return;
@@ -812,15 +811,14 @@  static void untrack_possible_nx_huge_page(struct kvm *kvm,
 	list_del_init(&sp->possible_nx_huge_page_link);
 }
 
-void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	sp->nx_huge_page_disallowed = false;
 
 	untrack_possible_nx_huge_page(kvm, sp);
 }
 
-static void track_possible_nx_huge_page(struct kvm *kvm,
-					struct kvm_mmu_page *sp)
+void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	if (!list_empty(&sp->possible_nx_huge_page_link))
 		return;
@@ -830,8 +828,8 @@  static void track_possible_nx_huge_page(struct kvm *kvm,
 		      &kvm->arch.possible_nx_huge_pages);
 }
 
-void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-			  bool nx_huge_page_possible)
+static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+				 bool nx_huge_page_possible)
 {
 	sp->nx_huge_page_disallowed = true;
 
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 83644a0167ab..2a887d08b722 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -336,8 +336,7 @@  void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
 
 void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 
-void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-			  bool nx_huge_page_possible);
-void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
 #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index a30983947fee..626c40ec2af9 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -392,8 +392,10 @@  static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
 		lockdep_assert_held_write(&kvm->mmu_lock);
 
 	list_del(&sp->link);
-	if (sp->nx_huge_page_disallowed)
-		unaccount_nx_huge_page(kvm, sp);
+	if (sp->nx_huge_page_disallowed) {
+		sp->nx_huge_page_disallowed = false;
+		untrack_possible_nx_huge_page(kvm, sp);
+	}
 
 	if (shared)
 		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
@@ -1111,16 +1113,13 @@  static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
  * @kvm: kvm instance
  * @iter: a tdp_iter instance currently on the SPTE that should be set
  * @sp: The new TDP page table to install.
- * @account_nx: True if this page table is being installed to split a
- *              non-executable huge page.
  * @shared: This operation is running under the MMU lock in read mode.
  *
  * Returns: 0 if the new page table was installed. Non-0 if the page table
  *          could not be installed (e.g. the atomic compare-exchange failed).
  */
 static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
-			   struct kvm_mmu_page *sp, bool account_nx,
-			   bool shared)
+			   struct kvm_mmu_page *sp, bool shared)
 {
 	u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled());
 	int ret = 0;
@@ -1135,8 +1134,6 @@  static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
 
 	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
 	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
-	if (account_nx)
-		account_nx_huge_page(kvm, sp, true);
 	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 
 	return 0;
@@ -1149,6 +1146,7 @@  static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
 int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	struct kvm *kvm = vcpu->kvm;
 	struct tdp_iter iter;
 	struct kvm_mmu_page *sp;
 	int ret;
@@ -1185,9 +1183,6 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		}
 
 		if (!is_shadow_present_pte(iter.old_spte)) {
-			bool account_nx = fault->huge_page_disallowed &&
-					  fault->req_level >= iter.level;
-
 			/*
 			 * If SPTE has been frozen by another thread, just
 			 * give up and retry, avoiding unnecessary page table
@@ -1199,10 +1194,19 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			sp = tdp_mmu_alloc_sp(vcpu);
 			tdp_mmu_init_child_sp(sp, &iter);
 
-			if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) {
+			sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
+
+			if (tdp_mmu_link_sp(kvm, &iter, sp, true)) {
 				tdp_mmu_free_sp(sp);
 				break;
 			}
+
+			if (fault->huge_page_disallowed &&
+			    fault->req_level >= iter.level) {
+				spin_lock(&kvm->arch.tdp_mmu_pages_lock);
+				track_possible_nx_huge_page(kvm, sp);
+				spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+			}
 		}
 	}
 
@@ -1490,7 +1494,7 @@  static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
 	 * correctness standpoint since the translation will be the same either
 	 * way.
 	 */
-	ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared);
+	ret = tdp_mmu_link_sp(kvm, iter, sp, shared);
 	if (ret)
 		goto out;