diff mbox series

[v2,6/6] KVM: x86/mmu: explicitly check nx_hugepage in disallowed_hugepage_adjust()

Message ID 20220723012325.1715714-7-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: Apply NX mitigation more precisely | expand

Commit Message

Sean Christopherson July 23, 2022, 1:23 a.m. UTC
From: Mingwei Zhang <mizhang@google.com>

Explicitly check if a NX huge page is disallowed when determining if a page
fault needs to be forced to use a smaller sized page. KVM incorrectly
assumes that the NX huge page mitigation is the only scenario where KVM
will create a shadow page instead of a huge page. Any scenario that causes
KVM to zap leaf SPTEs may result in having a SP that can be made huge
without violating the NX huge page mitigation. E.g. disabling of dirty
logging, zapping from mmu_notifier due to page migration, guest MTRR
changes that affect the viability of a huge page, etc...

Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation")

Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
[sean: add barrier comments, use spte_to_sp()]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/mmu/mmu.c     | 17 +++++++++++++++--
 arch/x86/kvm/mmu/tdp_mmu.c |  6 ++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

Comments

David Matlack July 25, 2022, 11:28 p.m. UTC | #1
On Sat, Jul 23, 2022 at 01:23:25AM +0000, Sean Christopherson wrote:
> From: Mingwei Zhang <mizhang@google.com>
> 
> Explicitly check if a NX huge page is disallowed when determining if a page
> fault needs to be forced to use a smaller sized page. KVM incorrectly
> assumes that the NX huge page mitigation is the only scenario where KVM
> will create a shadow page instead of a huge page. Any scenario that causes
> KVM to zap leaf SPTEs may result in having a SP that can be made huge
> without violating the NX huge page mitigation. E.g. disabling of dirty
> logging, zapping from mmu_notifier due to page migration, guest MTRR
> changes that affect the viability of a huge page, etc...
> 
> Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation")
> 
> Reviewed-by: Ben Gardon <bgardon@google.com>
> Signed-off-by: Mingwei Zhang <mizhang@google.com>
> [sean: add barrier comments, use spte_to_sp()]
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: David Matlack <dmatlack@google.com>

> ---
>  arch/x86/kvm/mmu/mmu.c     | 17 +++++++++++++++--
>  arch/x86/kvm/mmu/tdp_mmu.c |  6 ++++++
>  2 files changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index ed3cfb31853b..97980528bf4a 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3092,6 +3092,19 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
>  	    cur_level == fault->goal_level &&
>  	    is_shadow_present_pte(spte) &&
>  	    !is_large_pte(spte)) {
> +		u64 page_mask;
> +
> +		/*
> +		 * Ensure nx_huge_page_disallowed is read after checking for a
> +		 * present shadow page.  A different vCPU may be concurrently
> +		 * installing the shadow page if mmu_lock is held for read.
> +		 * Pairs with the smp_wmb() in kvm_tdp_mmu_map().
> +		 */
> +		smp_rmb();
> +
> +		if (!spte_to_sp(spte)->nx_huge_page_disallowed)
> +			return;
> +
>  		/*
>  		 * A small SPTE exists for this pfn, but FNAME(fetch)
>  		 * and __direct_map would like to create a large PTE
> @@ -3099,8 +3112,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
>  		 * patching back for them into pfn the next 9 bits of
>  		 * the address.
>  		 */
> -		u64 page_mask = KVM_PAGES_PER_HPAGE(cur_level) -
> -				KVM_PAGES_PER_HPAGE(cur_level - 1);
> +		page_mask = KVM_PAGES_PER_HPAGE(cur_level) -
> +			    KVM_PAGES_PER_HPAGE(cur_level - 1);
>  		fault->pfn |= fault->gfn & page_mask;
>  		fault->goal_level--;
>  	}
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index fea22dc481a0..313092d4931a 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1194,6 +1194,12 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  			tdp_mmu_init_child_sp(sp, &iter);
>  
>  			sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
> +			/*
> +			 * Ensure nx_huge_page_disallowed is visible before the
> +			 * SP is marked present, as mmu_lock is held for read.
> +			 * Pairs with the smp_rmb() in disallowed_hugepage_adjust().
> +			 */
> +			smp_wmb();
>  
>  			if (tdp_mmu_link_sp(kvm, &iter, sp, true)) {
>  				tdp_mmu_free_sp(sp);
> -- 
> 2.37.1.359.gd136c6c3e2-goog
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ed3cfb31853b..97980528bf4a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3092,6 +3092,19 @@  void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
 	    cur_level == fault->goal_level &&
 	    is_shadow_present_pte(spte) &&
 	    !is_large_pte(spte)) {
+		u64 page_mask;
+
+		/*
+		 * Ensure nx_huge_page_disallowed is read after checking for a
+		 * present shadow page.  A different vCPU may be concurrently
+		 * installing the shadow page if mmu_lock is held for read.
+		 * Pairs with the smp_wmb() in kvm_tdp_mmu_map().
+		 */
+		smp_rmb();
+
+		if (!spte_to_sp(spte)->nx_huge_page_disallowed)
+			return;
+
 		/*
 		 * A small SPTE exists for this pfn, but FNAME(fetch)
 		 * and __direct_map would like to create a large PTE
@@ -3099,8 +3112,8 @@  void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
 		 * patching back for them into pfn the next 9 bits of
 		 * the address.
 		 */
-		u64 page_mask = KVM_PAGES_PER_HPAGE(cur_level) -
-				KVM_PAGES_PER_HPAGE(cur_level - 1);
+		page_mask = KVM_PAGES_PER_HPAGE(cur_level) -
+			    KVM_PAGES_PER_HPAGE(cur_level - 1);
 		fault->pfn |= fault->gfn & page_mask;
 		fault->goal_level--;
 	}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index fea22dc481a0..313092d4931a 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1194,6 +1194,12 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			tdp_mmu_init_child_sp(sp, &iter);
 
 			sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
+			/*
+			 * Ensure nx_huge_page_disallowed is visible before the
+			 * SP is marked present, as mmu_lock is held for read.
+			 * Pairs with the smp_rmb() in disallowed_hugepage_adjust().
+			 */
+			smp_wmb();
 
 			if (tdp_mmu_link_sp(kvm, &iter, sp, true)) {
 				tdp_mmu_free_sp(sp);