diff mbox series

[V2] KVM: X86: Move PTE present check from loop body to __shadow_walk_next()

Message ID 20210813031629.78670-1-jiangshanlai@gmail.com (mailing list archive)
State New, archived
Headers show
Series [V2] KVM: X86: Move PTE present check from loop body to __shadow_walk_next() | expand

Commit Message

Lai Jiangshan Aug. 13, 2021, 3:16 a.m. UTC
From: Lai Jiangshan <laijs@linux.alibaba.com>

So far, the loop bodies already ensure the PTE is present before calling
__shadow_walk_next():  Some loop bodies simply exit with a !PRESENT
directly and some other loop bodies, i.e. FNAME(fetch) and __direct_map()
do not currently terminate their walks with a !PRESENT, but they get away
with it because they install present non-leaf SPTEs in the loop itself.

But checking pte present in __shadow_walk_next() is a more prudent way of
programing and loop bodies will not need to always check it. It allows us
removing unneded is_shadow_present_pte() in the loop bodies.

Terminating on !is_shadow_present_pte() is 100% the correct behavior, as
walking past a !PRESENT SPTE would lead to attempting to read a the next
level SPTE from a garbage iter->shadow_addr.  Even some paths that do _not_
currently have a !is_shadow_present_pte() in the loop body is Ok since
they will install present non-leaf SPTEs and the additinal present check
is just an NOP.

The checking result in __shadow_walk_next() will be propagated to
shadow_walk_okay() for being used in any for(;;) loop.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
Changed from V1:
	Merge the two patches
	Update changelog
	Remove !is_shadow_present_pte() in FNAME(invlpg)
 arch/x86/kvm/mmu/mmu.c         | 13 ++-----------
 arch/x86/kvm/mmu/paging_tmpl.h |  2 +-
 2 files changed, 3 insertions(+), 12 deletions(-)

Comments

Lai Jiangshan Aug. 24, 2021, 8:30 a.m. UTC | #1
Hello, Paolo

Could you have a review please.

Thanks
Lai

On Fri, Aug 13, 2021 at 9:01 PM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>
> From: Lai Jiangshan <laijs@linux.alibaba.com>
>
> So far, the loop bodies already ensure the PTE is present before calling
> __shadow_walk_next():  Some loop bodies simply exit with a !PRESENT
> directly and some other loop bodies, i.e. FNAME(fetch) and __direct_map()
> do not currently terminate their walks with a !PRESENT, but they get away
> with it because they install present non-leaf SPTEs in the loop itself.
>
> But checking pte present in __shadow_walk_next() is a more prudent way of
> programing and loop bodies will not need to always check it. It allows us
> removing unneded is_shadow_present_pte() in the loop bodies.
>
> Terminating on !is_shadow_present_pte() is 100% the correct behavior, as
> walking past a !PRESENT SPTE would lead to attempting to read a the next
> level SPTE from a garbage iter->shadow_addr.  Even some paths that do _not_
> currently have a !is_shadow_present_pte() in the loop body is Ok since
> they will install present non-leaf SPTEs and the additinal present check
> is just an NOP.
>
> The checking result in __shadow_walk_next() will be propagated to
> shadow_walk_okay() for being used in any for(;;) loop.
>
> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
> ---
> Changed from V1:
>         Merge the two patches
>         Update changelog
>         Remove !is_shadow_present_pte() in FNAME(invlpg)
>  arch/x86/kvm/mmu/mmu.c         | 13 ++-----------
>  arch/x86/kvm/mmu/paging_tmpl.h |  2 +-
>  2 files changed, 3 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index a272ccbddfa1..42eebba6782e 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2231,7 +2231,7 @@ static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
>  static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
>                                u64 spte)
>  {
> -       if (is_last_spte(spte, iterator->level)) {
> +       if (!is_shadow_present_pte(spte) || is_last_spte(spte, iterator->level)) {
>                 iterator->level = 0;
>                 return;
>         }
> @@ -3152,9 +3152,6 @@ static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
>         for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) {
>                 sptep = iterator.sptep;
>                 *spte = old_spte;
> -
> -               if (!is_shadow_present_pte(old_spte))
> -                       break;
>         }
>
>         return sptep;
> @@ -3694,9 +3691,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
>                 spte = mmu_spte_get_lockless(iterator.sptep);
>
>                 sptes[leaf] = spte;
> -
> -               if (!is_shadow_present_pte(spte))
> -                       break;
>         }
>
>         return leaf;
> @@ -3811,11 +3805,8 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
>         u64 spte;
>
>         walk_shadow_page_lockless_begin(vcpu);
> -       for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
> +       for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
>                 clear_sp_write_flooding_count(iterator.sptep);
> -               if (!is_shadow_present_pte(spte))
> -                       break;
> -       }
>         walk_shadow_page_lockless_end(vcpu);
>  }
>
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index f70afecbf3a2..13138b03cc69 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -977,7 +977,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
>                         FNAME(update_pte)(vcpu, sp, sptep, &gpte);
>                 }
>
> -               if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
> +               if (!sp->unsync_children)
>                         break;
>         }
>         write_unlock(&vcpu->kvm->mmu_lock);
> --
> 2.19.1.6.gb485710b
>
Sean Christopherson Sept. 2, 2021, 8:43 p.m. UTC | #2
On Fri, Aug 13, 2021, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
> 
> So far, the loop bodies already ensure the PTE is present before calling
> __shadow_walk_next():  Some loop bodies simply exit with a !PRESENT
> directly and some other loop bodies, i.e. FNAME(fetch) and __direct_map()
> do not currently terminate their walks with a !PRESENT, but they get away
> with it because they install present non-leaf SPTEs in the loop itself.
> 
> But checking pte present in __shadow_walk_next() is a more prudent way of
> programing and loop bodies will not need to always check it. It allows us
> removing unneded is_shadow_present_pte() in the loop bodies.
           ^^^^^^^
	   unneeded

> 
> Terminating on !is_shadow_present_pte() is 100% the correct behavior, as
> walking past a !PRESENT SPTE would lead to attempting to read a the next
> level SPTE from a garbage iter->shadow_addr.  Even some paths that do _not_
> currently have a !is_shadow_present_pte() in the loop body is Ok since
> they will install present non-leaf SPTEs and the additinal present check
                                                   ^^^^^^^^^
						   additional
> is just an NOP.
> 
> The checking result in __shadow_walk_next() will be propagated to
> shadow_walk_okay() for being used in any for(;;) loop.
> 
> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
> ---

Nits aside,

Reviewed-by: Sean Christopherson <seanjc@google.com>
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a272ccbddfa1..42eebba6782e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2231,7 +2231,7 @@  static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
 static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
 			       u64 spte)
 {
-	if (is_last_spte(spte, iterator->level)) {
+	if (!is_shadow_present_pte(spte) || is_last_spte(spte, iterator->level)) {
 		iterator->level = 0;
 		return;
 	}
@@ -3152,9 +3152,6 @@  static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
 	for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) {
 		sptep = iterator.sptep;
 		*spte = old_spte;
-
-		if (!is_shadow_present_pte(old_spte))
-			break;
 	}
 
 	return sptep;
@@ -3694,9 +3691,6 @@  static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 		spte = mmu_spte_get_lockless(iterator.sptep);
 
 		sptes[leaf] = spte;
-
-		if (!is_shadow_present_pte(spte))
-			break;
 	}
 
 	return leaf;
@@ -3811,11 +3805,8 @@  static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
 	u64 spte;
 
 	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
+	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
 		clear_sp_write_flooding_count(iterator.sptep);
-		if (!is_shadow_present_pte(spte))
-			break;
-	}
 	walk_shadow_page_lockless_end(vcpu);
 }
 
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index f70afecbf3a2..13138b03cc69 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -977,7 +977,7 @@  static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
 			FNAME(update_pte)(vcpu, sp, sptep, &gpte);
 		}
 
-		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
+		if (!sp->unsync_children)
 			break;
 	}
 	write_unlock(&vcpu->kvm->mmu_lock);