diff mbox series

[V3,2/2] KVM: X86: Move PTE present check from loop body to __shadow_walk_next()

Message ID 20210906122547.263316-2-jiangshanlai@gmail.com (mailing list archive)
State New, archived
Headers show
Series [V3,1/2] KVM: x86/mmu: Verify shadow walk doesn't terminate early in page faults | expand

Commit Message

Lai Jiangshan Sept. 6, 2021, 12:25 p.m. UTC
From: Lai Jiangshan <laijs@linux.alibaba.com>

So far, the loop bodies already ensure the PTE is present before calling
__shadow_walk_next():  Some loop bodies simply exit with a !PRESENT
directly and some other loop bodies, i.e. FNAME(fetch) and __direct_map()
do not currently terminate their walks with a !PRESENT, but they get away
with it because they install present non-leaf SPTEs in the loop itself.

But checking pte present in __shadow_walk_next() is a more prudent way of
programing and loop bodies will not need to always check it. It allows us
removing unneeded is_shadow_present_pte() in the loop bodies.

Terminating on !is_shadow_present_pte() is 100% the correct behavior, as
walking past a !PRESENT SPTE would lead to attempting to read a the next
level SPTE from a garbage iter->shadow_addr.  Even some paths that do _not_
currently have a !is_shadow_present_pte() in the loop body is Ok since
they will install present non-leaf SPTEs and the additional present check
is just an NOP.

The checking result in __shadow_walk_next() will be propagated to
shadow_walk_okay() for being used in any for(;;) loop.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
Changed from V2:
	Fix typo in the changelog reported by Sean
	Add Reviewed-by from Sean
Changed from V1:
	Merge the two patches
	Update changelog
	Remove !is_shadow_present_pte() in FNAME(invlpg)
 arch/x86/kvm/mmu/mmu.c         | 13 ++-----------
 arch/x86/kvm/mmu/paging_tmpl.h |  2 +-
 2 files changed, 3 insertions(+), 12 deletions(-)

Comments

Paolo Bonzini Sept. 24, 2021, 9:56 a.m. UTC | #1
On 06/09/21 14:25, Lai Jiangshan wrote:
> But checking pte present in __shadow_walk_next() is a more prudent way of
> programing and loop bodies will not need to always check it. It allows us
> removing unneeded is_shadow_present_pte() in the loop bodies.
> 
> Terminating on !is_shadow_present_pte() is 100% the correct behavior, as
> walking past a !PRESENT SPTE would lead to attempting to read a the next
> level SPTE from a garbage iter->shadow_addr.  Even some paths that do_not_
> currently have a !is_shadow_present_pte() in the loop body is Ok since
> they will install present non-leaf SPTEs and the additional present check
> is just an NOP.
> 
> The checking result in __shadow_walk_next() will be propagated to
> shadow_walk_okay() for being used in any for(;;) loop.
> 
> Reviewed-by: Sean Christopherson<seanjc@google.com>
> Signed-off-by: Lai Jiangshan<laijs@linux.alibaba.com>
> ---
> Changed from V2:
> 	Fix typo in the changelog reported by Sean
> 	Add Reviewed-by from Sean
> Changed from V1:
> 	Merge the two patches
> 	Update changelog
> 	Remove !is_shadow_present_pte() in FNAME(invlpg)
>   arch/x86/kvm/mmu/mmu.c         | 13 ++-----------
>   arch/x86/kvm/mmu/paging_tmpl.h |  2 +-
>   2 files changed, 3 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 538be037549d..26f6bd238a77 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2223,7 +2223,7 @@ static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
>   static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
>   			       u64 spte)
>   {
> -	if (is_last_spte(spte, iterator->level)) {
> +	if (!is_shadow_present_pte(spte) || is_last_spte(spte, iterator->level)) {
>   		iterator->level = 0;
>   		return;
>   	}
> @@ -3159,9 +3159,6 @@ static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
>   	for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) {
>   		sptep = iterator.sptep;
>   		*spte = old_spte;
> -
> -		if (!is_shadow_present_pte(old_spte))
> -			break;
>   	}
>   
>   	return sptep;
> @@ -3721,9 +3718,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
>   		spte = mmu_spte_get_lockless(iterator.sptep);
>   
>   		sptes[leaf] = spte;
> -
> -		if (!is_shadow_present_pte(spte))
> -			break;
>   	}
>   
>   	return leaf;
> @@ -3838,11 +3832,8 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
>   	u64 spte;
>   
>   	walk_shadow_page_lockless_begin(vcpu);
> -	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
> +	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
>   		clear_sp_write_flooding_count(iterator.sptep);
> -		if (!is_shadow_present_pte(spte))
> -			break;
> -	}
>   	walk_shadow_page_lockless_end(vcpu);
>   }
>   
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 4d559d2d4d66..72f358613786 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -982,7 +982,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
>   			FNAME(update_pte)(vcpu, sp, sptep, &gpte);
>   		}
>   
> -		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
> +		if (!sp->unsync_children)

Queued both, thanks.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 538be037549d..26f6bd238a77 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2223,7 +2223,7 @@  static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
 static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
 			       u64 spte)
 {
-	if (is_last_spte(spte, iterator->level)) {
+	if (!is_shadow_present_pte(spte) || is_last_spte(spte, iterator->level)) {
 		iterator->level = 0;
 		return;
 	}
@@ -3159,9 +3159,6 @@  static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
 	for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) {
 		sptep = iterator.sptep;
 		*spte = old_spte;
-
-		if (!is_shadow_present_pte(old_spte))
-			break;
 	}
 
 	return sptep;
@@ -3721,9 +3718,6 @@  static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 		spte = mmu_spte_get_lockless(iterator.sptep);
 
 		sptes[leaf] = spte;
-
-		if (!is_shadow_present_pte(spte))
-			break;
 	}
 
 	return leaf;
@@ -3838,11 +3832,8 @@  static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
 	u64 spte;
 
 	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
+	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
 		clear_sp_write_flooding_count(iterator.sptep);
-		if (!is_shadow_present_pte(spte))
-			break;
-	}
 	walk_shadow_page_lockless_end(vcpu);
 }
 
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 4d559d2d4d66..72f358613786 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -982,7 +982,7 @@  static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
 			FNAME(update_pte)(vcpu, sp, sptep, &gpte);
 		}
 
-		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
+		if (!sp->unsync_children)
 			break;
 	}
 	write_unlock(&vcpu->kvm->mmu_lock);