diff mbox series

[1/4] KVM: x86/mmu: Check kvm_mmu_page_ad_need_write_protect() when clearing TDP MMU dirty bits

Message ID 20240315230541.1635322-2-dmatlack@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Fix TDP MMU dirty logging bug L2 running with EPT disabled | expand

Commit Message

David Matlack March 15, 2024, 11:05 p.m. UTC
Check kvm_mmu_page_ad_need_write_protect() when deciding whether to
write-protect or clear D-bits on TDP MMU SPTEs.

TDP MMU SPTEs must be write-protected when the TDP MMU is being used to
run an L2 (i.e. L1 has disabled EPT) and PML is enabled. KVM always
disables the PML hardware when running L2, so failing to write-protect
TDP MMU SPTEs will cause writes made by L2 to not be reflected in the
dirty log.

Reported-by: syzbot+900d58a45dcaab9e4821@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=900d58a45dcaab9e4821
Fixes: 5982a5392663 ("KVM: x86/mmu: Use kvm_ad_enabled() to determine if TDP MMU SPTEs need wrprot")
Cc: stable@vger.kernel.org
Cc: Vipin Sharma <vipinsh@google.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

Comments

Sean Christopherson April 9, 2024, 11:13 p.m. UTC | #1
The shortlog is a bit too literal, e.g. without intimate knowledge of what
kvm_mmu_page_ad_need_write_protect() does, it's impossible to know what this
patch actually fixes.

In this case, since it's a bug fix, I think it makes sense to explicitly call
out the L2 SPTEs angled, at the cost of not capturing the more general gist of
the patch.

On Fri, Mar 15, 2024, David Matlack wrote:
> Check kvm_mmu_page_ad_need_write_protect() when deciding whether to
> write-protect or clear D-bits on TDP MMU SPTEs.
> 
> TDP MMU SPTEs must be write-protected when the TDP MMU is being used to
> run an L2 (i.e. L1 has disabled EPT) and PML is enabled. KVM always
> disables the PML hardware when running L2, so failing to write-protect
> TDP MMU SPTEs will cause writes made by L2 to not be reflected in the
> dirty log.

I massaged this slightly to explain what kvm_mmu_page_ad_need_write_protect()
does at a high level, at least as far as this patch is concerned.

    KVM: x86/mmu: Write-protect L2 SPTEs in TDP MMU when clearing dirty status
    
    Check kvm_mmu_page_ad_need_write_protect() when deciding whether to
    write-protect or clear D-bits on TDP MMU SPTEs, so that the TDP MMU
    accounts for any role-specific reasons for disabling D-bit dirty logging.
    
    Specifically, TDP MMU SPTEs must be write-protected when the TDP MMU is
    being used to run an L2 (i.e. L1 has disabled EPT) and PML is enabled.
    KVM always disables PML when running L2, even when L1 and L2 GPAs are in
    the some domain, so failing to write-protect TDP MMU SPTEs will cause
    writes made by L2 to not be reflected in the dirty log.
 
> Reported-by: syzbot+900d58a45dcaab9e4821@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=900d58a45dcaab9e4821
> Fixes: 5982a5392663 ("KVM: x86/mmu: Use kvm_ad_enabled() to determine if TDP MMU SPTEs need wrprot")
> Cc: stable@vger.kernel.org
> Cc: Vipin Sharma <vipinsh@google.com>
> Cc: Sean Christopherson <seanjc@google.com>
> Signed-off-by: David Matlack <dmatlack@google.com>
> ---
>  arch/x86/kvm/mmu/tdp_mmu.c | 21 ++++++++++++++++-----
>  1 file changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 6ae19b4ee5b1..c3c1a8f430ef 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1498,6 +1498,16 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
>  	}
>  }
>  
> +static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
> +{
> +	/*
> +	 * All TDP MMU shadow pages share the same role as their root, aside
> +	 * from level, so it is valid to key off any shadow page to determine if
> +	 * write protection is needed for an entire tree.
> +	 */
> +	return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled();
> +}
> +
>  /*
>   * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
>   * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
> @@ -1508,7 +1518,8 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
>  static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>  			   gfn_t start, gfn_t end)
>  {
> -	u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
> +	const u64 dbit = tdp_mmu_need_write_protect(root)
> +		? PT_WRITABLE_MASK : shadow_dirty_mask;

I would much prefer to keep the '?' and the first clause on the previous line.
Putting operators on a newline is frowned upon in general, and having the
PT_WRITABLE_MASK on the same line as tdp_mmu_need_write_protect() makes it quite
easy to understand the logic.

>  	struct tdp_iter iter;
>  	bool spte_set = false;
>  
> @@ -1523,7 +1534,7 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>  		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
>  			continue;
>  
> -		KVM_MMU_WARN_ON(kvm_ad_enabled() &&
> +		KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
>  				spte_ad_need_write_protect(iter.old_spte));
>  
>  		if (!(iter.old_spte & dbit))
> @@ -1570,8 +1581,8 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
>  static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
>  				  gfn_t gfn, unsigned long mask, bool wrprot)
>  {
> -	u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
> -						   shadow_dirty_mask;
> +	const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root))
> +		? PT_WRITABLE_MASK : shadow_dirty_mask;

Same here.

>  	struct tdp_iter iter;
>  
>  	lockdep_assert_held_write(&kvm->mmu_lock);
> @@ -1583,7 +1594,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
>  		if (!mask)
>  			break;
>  
> -		KVM_MMU_WARN_ON(kvm_ad_enabled() &&
> +		KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
>  				spte_ad_need_write_protect(iter.old_spte));
>  
>  		if (iter.level > PG_LEVEL_4K ||
> -- 
> 2.44.0.291.gc1ea87d7ee-goog
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6ae19b4ee5b1..c3c1a8f430ef 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1498,6 +1498,16 @@  void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
 	}
 }
 
+static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
+{
+	/*
+	 * All TDP MMU shadow pages share the same role as their root, aside
+	 * from level, so it is valid to key off any shadow page to determine if
+	 * write protection is needed for an entire tree.
+	 */
+	return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled();
+}
+
 /*
  * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
  * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
@@ -1508,7 +1518,8 @@  void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
 static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 			   gfn_t start, gfn_t end)
 {
-	u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
+	const u64 dbit = tdp_mmu_need_write_protect(root)
+		? PT_WRITABLE_MASK : shadow_dirty_mask;
 	struct tdp_iter iter;
 	bool spte_set = false;
 
@@ -1523,7 +1534,7 @@  static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
 			continue;
 
-		KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+		KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
 				spte_ad_need_write_protect(iter.old_spte));
 
 		if (!(iter.old_spte & dbit))
@@ -1570,8 +1581,8 @@  bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
 static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
 				  gfn_t gfn, unsigned long mask, bool wrprot)
 {
-	u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
-						   shadow_dirty_mask;
+	const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root))
+		? PT_WRITABLE_MASK : shadow_dirty_mask;
 	struct tdp_iter iter;
 
 	lockdep_assert_held_write(&kvm->mmu_lock);
@@ -1583,7 +1594,7 @@  static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
 		if (!mask)
 			break;
 
-		KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+		KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
 				spte_ad_need_write_protect(iter.old_spte));
 
 		if (iter.level > PG_LEVEL_4K ||