diff mbox

[v2,5/5] KVM: x86: improve reexecute_instruction

Message ID 50C8DB11.5050204@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong Dec. 12, 2012, 7:29 p.m. UTC
On 12/12/2012 09:09 AM, Marcelo Tosatti wrote:
> On Mon, Dec 10, 2012 at 05:14:47PM +0800, Xiao Guangrong wrote:
>> The current reexecute_instruction can not well detect the failed instruction
>> emulation. It allows guest to retry all the instructions except it accesses
>> on error pfn
>>
>> For example, some cases are nested-write-protect - if the page we want to
>> write is used as PDE but it chains to itself. Under this case, we should
>> stop the emulation and report the case to userspace
>>
>> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
>> ---
>>  arch/x86/include/asm/kvm_host.h |    2 +
>>  arch/x86/kvm/paging_tmpl.h      |    2 +
>>  arch/x86/kvm/x86.c              |   82 +++++++++++++++++++++++++++++----------
>>  3 files changed, 65 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index dc87b65..8d01c02 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -575,6 +575,8 @@ struct kvm_arch {
>>  	u64 hv_guest_os_id;
>>  	u64 hv_hypercall;
>>
>> +	/* synchronizing reexecute_instruction and page fault path. */
>> +	u64 page_fault_count;
>>  	#ifdef CONFIG_KVM_MMU_AUDIT
>>  	int audit_point;
>>  	#endif
>> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
>> index 32d77ff..85b8e0e 100644
>> --- a/arch/x86/kvm/paging_tmpl.h
>> +++ b/arch/x86/kvm/paging_tmpl.h
>> @@ -614,6 +614,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
>>  	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
>>  		goto out_unlock;
>>
>> +	vcpu->kvm->arch.page_fault_count++;
>> +
>>  	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
>>  	kvm_mmu_free_some_pages(vcpu);
>>  	if (!force_pt_level)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 3796f8c..5677869 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -4756,29 +4756,27 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
>>  static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
>>  {
>>  	gpa_t gpa = cr2;
>> +	gfn_t gfn;
>>  	pfn_t pfn;
>> -	unsigned int indirect_shadow_pages;
>> -
>> -	spin_lock(&vcpu->kvm->mmu_lock);
>> -	indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
>> -	spin_unlock(&vcpu->kvm->mmu_lock);
>> -
>> -	if (!indirect_shadow_pages)
>> -		return false;
>> +	u64 page_fault_count;
>> +	int emulate;
>>
>>  	if (!vcpu->arch.mmu.direct_map) {
>> -		gpa = kvm_mmu_gva_to_gpa_read(vcpu, cr2, NULL);
>> +		/*
>> +		 * Write permission should be allowed since only
>> +		 * write access need to be emulated.
>> +		 */
>> +		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
>> +
>> +		/*
>> +		 * If the mapping is invalid in guest, let cpu retry
>> +		 * it to generate fault.
>> +		 */
>>  		if (gpa == UNMAPPED_GVA)
>> -			return true; /* let cpu generate fault */
>> +			return true;
>>  	}
>>
>> -	/*
>> -	 * if emulation was due to access to shadowed page table
>> -	 * and it failed try to unshadow page and re-enter the
>> -	 * guest to let CPU execute the instruction.
>> -	 */
>> -	if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)))
>> -		return true;
>> +	gfn = gpa_to_gfn(gpa);
>>
>>  	/*
>>  	 * Do not retry the unhandleable instruction if it faults on the
>> @@ -4786,13 +4784,55 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
>>  	 * retry instruction -> write #PF -> emulation fail -> retry
>>  	 * instruction -> ...
>>  	 */
>> -	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
>> -	if (!is_error_noslot_pfn(pfn)) {
>> -		kvm_release_pfn_clean(pfn);
>> +	pfn = gfn_to_pfn(vcpu->kvm, gfn);
>> +
>> +	/*
>> +	 * If the instruction failed on the error pfn, it can not be fixed,
>> +	 * report the error to userspace.
>> +	 */
>> +	if (is_error_noslot_pfn(pfn))
>> +		return false;
>> +
>> +	kvm_release_pfn_clean(pfn);
>> +
>> +	/* The instructions are well-emulated on direct mmu. */
>> +	if (vcpu->arch.mmu.direct_map) {
>> +		unsigned int indirect_shadow_pages;
>> +
>> +		spin_lock(&vcpu->kvm->mmu_lock);
>> +		indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
>> +		spin_unlock(&vcpu->kvm->mmu_lock);
>> +
>> +		if (indirect_shadow_pages)
>> +			kvm_mmu_unprotect_page(vcpu->kvm, gfn);
>> +
>>  		return true;
>>  	}
>>
>> -	return false;
>> +again:
>> +	page_fault_count = ACCESS_ONCE(vcpu->kvm->arch.page_fault_count);
>> +
>> +	/*
>> +	 * The instruction emulation is caused by fault access on cr2.
>> +	 * After unprotect the target page, we call
>> +	 * vcpu->arch.mmu.page_fault to fix the mapping of cr2. If it
>> +	 * return 1, mmu can not fix the mapping, we should report the
>> +	 * error, otherwise it is good to return to guest and let it
>> +	 * re-execute the instruction again.
>> +	 *
>> +	 * page_fault_count is used to avoid the race on other vcpus,
>> +	 * since after we unprotect the target page, other cpu can enter
>> +	 * page fault path and let the page be write-protected again.
>> +	 */
>> +	kvm_mmu_unprotect_page(vcpu->kvm, gfn);
>> +	emulate = vcpu->arch.mmu.page_fault(vcpu, cr2, PFERR_WRITE_MASK, false);
>> +
>> +	/* The page fault path called above can increase the count. */
>> +	if (page_fault_count + 1 !=
>> +		  ACCESS_ONCE(vcpu->kvm->arch.page_fault_count))
>> +		goto again;
>> +
>> +	return !emulate;
>>  }
>>
>>  static bool retry_instruction(struct x86_emulate_ctxt *ctxt,\
> 
> Same comment as before: the only case where it should not attempt to 
> emulate is when there is a condition which makes it impossible to fix
> (the information is available to detect that condition).
> 
> The earlier suggestion
> 
> 	"How about recording the gfn number for shadow pages that have been
> shadowed in the current pagefault run?"
> 
> Was about that.

I think we can have a try. Is this change good to you, Marcelo?

[eric@localhost kvm]$ git diff
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Marcelo Tosatti Dec. 13, 2012, 11:02 p.m. UTC | #1
On Thu, Dec 13, 2012 at 03:29:21AM +0800, Xiao Guangrong wrote:
> On 12/12/2012 09:09 AM, Marcelo Tosatti wrote:
> > On Mon, Dec 10, 2012 at 05:14:47PM +0800, Xiao Guangrong wrote:
> >> The current reexecute_instruction can not well detect the failed instruction
> >> emulation. It allows guest to retry all the instructions except it accesses
> >> on error pfn
> >>
> >> For example, some cases are nested-write-protect - if the page we want to
> >> write is used as PDE but it chains to itself. Under this case, we should
> >> stop the emulation and report the case to userspace
> >>
> >> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> >> ---
> >>  arch/x86/include/asm/kvm_host.h |    2 +
> >>  arch/x86/kvm/paging_tmpl.h      |    2 +
> >>  arch/x86/kvm/x86.c              |   82 +++++++++++++++++++++++++++++----------
> >>  3 files changed, 65 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >> index dc87b65..8d01c02 100644
> >> --- a/arch/x86/include/asm/kvm_host.h
> >> +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -575,6 +575,8 @@ struct kvm_arch {
> >>  	u64 hv_guest_os_id;
> >>  	u64 hv_hypercall;
> >>
> >> +	/* synchronizing reexecute_instruction and page fault path. */
> >> +	u64 page_fault_count;
> >>  	#ifdef CONFIG_KVM_MMU_AUDIT
> >>  	int audit_point;
> >>  	#endif
> >> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> >> index 32d77ff..85b8e0e 100644
> >> --- a/arch/x86/kvm/paging_tmpl.h
> >> +++ b/arch/x86/kvm/paging_tmpl.h
> >> @@ -614,6 +614,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
> >>  	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
> >>  		goto out_unlock;
> >>
> >> +	vcpu->kvm->arch.page_fault_count++;
> >> +
> >>  	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
> >>  	kvm_mmu_free_some_pages(vcpu);
> >>  	if (!force_pt_level)
> >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >> index 3796f8c..5677869 100644
> >> --- a/arch/x86/kvm/x86.c
> >> +++ b/arch/x86/kvm/x86.c
> >> @@ -4756,29 +4756,27 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
> >>  static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
> >>  {
> >>  	gpa_t gpa = cr2;
> >> +	gfn_t gfn;
> >>  	pfn_t pfn;
> >> -	unsigned int indirect_shadow_pages;
> >> -
> >> -	spin_lock(&vcpu->kvm->mmu_lock);
> >> -	indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
> >> -	spin_unlock(&vcpu->kvm->mmu_lock);
> >> -
> >> -	if (!indirect_shadow_pages)
> >> -		return false;
> >> +	u64 page_fault_count;
> >> +	int emulate;
> >>
> >>  	if (!vcpu->arch.mmu.direct_map) {
> >> -		gpa = kvm_mmu_gva_to_gpa_read(vcpu, cr2, NULL);
> >> +		/*
> >> +		 * Write permission should be allowed since only
> >> +		 * write access need to be emulated.
> >> +		 */
> >> +		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
> >> +
> >> +		/*
> >> +		 * If the mapping is invalid in guest, let cpu retry
> >> +		 * it to generate fault.
> >> +		 */
> >>  		if (gpa == UNMAPPED_GVA)
> >> -			return true; /* let cpu generate fault */
> >> +			return true;
> >>  	}
> >>
> >> -	/*
> >> -	 * if emulation was due to access to shadowed page table
> >> -	 * and it failed try to unshadow page and re-enter the
> >> -	 * guest to let CPU execute the instruction.
> >> -	 */
> >> -	if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)))
> >> -		return true;
> >> +	gfn = gpa_to_gfn(gpa);
> >>
> >>  	/*
> >>  	 * Do not retry the unhandleable instruction if it faults on the
> >> @@ -4786,13 +4784,55 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
> >>  	 * retry instruction -> write #PF -> emulation fail -> retry
> >>  	 * instruction -> ...
> >>  	 */
> >> -	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
> >> -	if (!is_error_noslot_pfn(pfn)) {
> >> -		kvm_release_pfn_clean(pfn);
> >> +	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> >> +
> >> +	/*
> >> +	 * If the instruction failed on the error pfn, it can not be fixed,
> >> +	 * report the error to userspace.
> >> +	 */
> >> +	if (is_error_noslot_pfn(pfn))
> >> +		return false;
> >> +
> >> +	kvm_release_pfn_clean(pfn);
> >> +
> >> +	/* The instructions are well-emulated on direct mmu. */
> >> +	if (vcpu->arch.mmu.direct_map) {
> >> +		unsigned int indirect_shadow_pages;
> >> +
> >> +		spin_lock(&vcpu->kvm->mmu_lock);
> >> +		indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
> >> +		spin_unlock(&vcpu->kvm->mmu_lock);
> >> +
> >> +		if (indirect_shadow_pages)
> >> +			kvm_mmu_unprotect_page(vcpu->kvm, gfn);
> >> +
> >>  		return true;
> >>  	}
> >>
> >> -	return false;
> >> +again:
> >> +	page_fault_count = ACCESS_ONCE(vcpu->kvm->arch.page_fault_count);
> >> +
> >> +	/*
> >> +	 * The instruction emulation is caused by fault access on cr2.
> >> +	 * After unprotect the target page, we call
> >> +	 * vcpu->arch.mmu.page_fault to fix the mapping of cr2. If it
> >> +	 * return 1, mmu can not fix the mapping, we should report the
> >> +	 * error, otherwise it is good to return to guest and let it
> >> +	 * re-execute the instruction again.
> >> +	 *
> >> +	 * page_fault_count is used to avoid the race on other vcpus,
> >> +	 * since after we unprotect the target page, other cpu can enter
> >> +	 * page fault path and let the page be write-protected again.
> >> +	 */
> >> +	kvm_mmu_unprotect_page(vcpu->kvm, gfn);
> >> +	emulate = vcpu->arch.mmu.page_fault(vcpu, cr2, PFERR_WRITE_MASK, false);
> >> +
> >> +	/* The page fault path called above can increase the count. */
> >> +	if (page_fault_count + 1 !=
> >> +		  ACCESS_ONCE(vcpu->kvm->arch.page_fault_count))
> >> +		goto again;
> >> +
> >> +	return !emulate;
> >>  }
> >>
> >>  static bool retry_instruction(struct x86_emulate_ctxt *ctxt,\
> > 
> > Same comment as before: the only case where it should not attempt to 
> > emulate is when there is a condition which makes it impossible to fix
> > (the information is available to detect that condition).
> > 
> > The earlier suggestion
> > 
> > 	"How about recording the gfn number for shadow pages that have been
> > shadowed in the current pagefault run?"
> > 
> > Was about that.
> 
> I think we can have a try. Is this change good to you, Marcelo?
> 
> [eric@localhost kvm]$ git diff
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 01d7c2a..e3d0001 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -4359,24 +4359,34 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>         return nr_mmu_pages;
>  }
> 
> -int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
> +void kvm_mmu_get_sp_hierarchy(struct kvm_vcpu *vcpu, u64 addr,
> +                             struct kvm_mmu_sp_hierarchy *hierarchy)
>  {
>         struct kvm_shadow_walk_iterator iterator;
>         u64 spte;
> -       int nr_sptes = 0;
> +
> +       hierarchy->max_level = hierarchy->nr_levels = 0;
> 
>         walk_shadow_page_lockless_begin(vcpu);
>         for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
> -               sptes[iterator.level-1] = spte;
> -               nr_sptes++;
> +               struct kvm_mmu_page *sp =  page_header(__pa(iterator.sptep));
> +
> +               if (hierarchy->indirect_only && sp->role.direct)
> +                       break;
> +
> +               if (!hierarchy->max_level)
> +                       hierarchy->max_level = iterator.level;
> +
> +               hierarchy->shadow_gfns[iterator.level-1] = sp->gfn;
> +               hierarchy->sptes[iterator.level-1] = spte;
> +               hierarchy->nr_levels++;
> +
>                 if (!is_shadow_present_pte(spte))
>                         break;
>         }
>         walk_shadow_page_lockless_end(vcpu);
> -
> -       return nr_sptes;
>  }

Record gfns while shadowing in the vcpu struct, in a struct, along with cr2.
Then validate 
That way its guaranteed its not some other vcpu.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Dec. 14, 2012, 3:40 a.m. UTC | #2
On 12/14/2012 07:02 AM, Marcelo Tosatti wrote:

>>> Same comment as before: the only case where it should not attempt to 
>>> emulate is when there is a condition which makes it impossible to fix
>>> (the information is available to detect that condition).
>>>
>>> The earlier suggestion
>>>
>>> 	"How about recording the gfn number for shadow pages that have been
>>> shadowed in the current pagefault run?"
>>>
>>> Was about that.
>>
>> I think we can have a try. Is this change good to you, Marcelo?
>>
>> [eric@localhost kvm]$ git diff
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 01d7c2a..e3d0001 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -4359,24 +4359,34 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>>         return nr_mmu_pages;
>>  }
>>
>> -int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
>> +void kvm_mmu_get_sp_hierarchy(struct kvm_vcpu *vcpu, u64 addr,
>> +                             struct kvm_mmu_sp_hierarchy *hierarchy)
>>  {
>>         struct kvm_shadow_walk_iterator iterator;
>>         u64 spte;
>> -       int nr_sptes = 0;
>> +
>> +       hierarchy->max_level = hierarchy->nr_levels = 0;
>>
>>         walk_shadow_page_lockless_begin(vcpu);
>>         for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
>> -               sptes[iterator.level-1] = spte;
>> -               nr_sptes++;
>> +               struct kvm_mmu_page *sp =  page_header(__pa(iterator.sptep));
>> +
>> +               if (hierarchy->indirect_only && sp->role.direct)
>> +                       break;
>> +
>> +               if (!hierarchy->max_level)
>> +                       hierarchy->max_level = iterator.level;
>> +
>> +               hierarchy->shadow_gfns[iterator.level-1] = sp->gfn;
>> +               hierarchy->sptes[iterator.level-1] = spte;
>> +               hierarchy->nr_levels++;
>> +
>>                 if (!is_shadow_present_pte(spte))
>>                         break;
>>         }
>>         walk_shadow_page_lockless_end(vcpu);
>> -
>> -       return nr_sptes;
>>  }
> 
> Record gfns while shadowing in the vcpu struct, in a struct, along with cr2.
> Then validate 
> That way its guaranteed its not some other vcpu.

Okay, i will try this way. :)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01d7c2a..e3d0001 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4359,24 +4359,34 @@  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
        return nr_mmu_pages;
 }

-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
+void kvm_mmu_get_sp_hierarchy(struct kvm_vcpu *vcpu, u64 addr,
+                             struct kvm_mmu_sp_hierarchy *hierarchy)
 {
        struct kvm_shadow_walk_iterator iterator;
        u64 spte;
-       int nr_sptes = 0;
+
+       hierarchy->max_level = hierarchy->nr_levels = 0;

        walk_shadow_page_lockless_begin(vcpu);
        for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
-               sptes[iterator.level-1] = spte;
-               nr_sptes++;
+               struct kvm_mmu_page *sp =  page_header(__pa(iterator.sptep));
+
+               if (hierarchy->indirect_only && sp->role.direct)
+                       break;
+
+               if (!hierarchy->max_level)
+                       hierarchy->max_level = iterator.level;
+
+               hierarchy->shadow_gfns[iterator.level-1] = sp->gfn;
+               hierarchy->sptes[iterator.level-1] = spte;
+               hierarchy->nr_levels++;
+
                if (!is_shadow_present_pte(spte))
                        break;
        }
        walk_shadow_page_lockless_end(vcpu);
-
-       return nr_sptes;
 }
-EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
+EXPORT_SYMBOL_GPL(kvm_mmu_get_sp_hierarchy);

 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 6987108..d7ba397 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -50,7 +50,17 @@ 
 #define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)

-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
+struct kvm_mmu_sp_hierarchy {
+       int max_level;
+       int nr_levels;
+       bool indirect_only;
+
+       u64 sptes[PT64_ROOT_LEVEL];
+       gfn_t shadow_gfns[PT64_ROOT_LEVEL];
+};
+
+void kvm_mmu_get_sp_hierarchy(struct kvm_vcpu *vcpu, u64 addr,
+                             struct kvm_mmu_sp_hierarchy *hierarchy);
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
 int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d14bb12..9c60f8c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4984,8 +4984,8 @@  static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,

 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 {
-       u64 sptes[4];
-       int nr_sptes, i, ret;
+       struct kvm_mmu_sp_hierarchy hierarchy;
+       int i, ret;
        gpa_t gpa;

        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -5001,10 +5001,10 @@  static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
        printk(KERN_ERR "EPT: Misconfiguration.\n");
        printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);

-       nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes);
-
-       for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
-               ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
+       kvm_mmu_get_sp_hierarchy(vcpu, gpa, &hierarchy);
+
+       for (i = hierarchy.max_level; hierarchy.nr_levels; hierarchy.nr_levels--, i--)
+                       ept_misconfig_inspect_spte(vcpu, hierarchy.sptes[i-1], i);

        vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
        vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1056106..cee6242 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4755,11 +4755,11 @@  static int handle_emulation_failure(struct kvm_vcpu *vcpu)

 static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
 {
+       struct kvm_mmu_sp_hierarchy hierarchy;
        gpa_t gpa = cr2;
        gfn_t gfn;
        pfn_t pfn;
-       u64 page_fault_count;
-       int emulate;
+       int level;

        if (!vcpu->arch.mmu.direct_map) {
                /*
@@ -4809,30 +4809,14 @@  static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
                return true;
        }

-again:
-       page_fault_count = ACCESS_ONCE(vcpu->kvm->arch.page_fault_count);
+       hierarchy.indirect_only = true;
+       kvm_mmu_get_sp_hierarchy(vcpu, cr2, &hierarchy);

-       /*
-        * The instruction emulation is caused by fault access on cr2.
-        * After unprotect the target page, we call
-        * vcpu->arch.mmu.page_fault to fix the mapping of cr2. If it
-        * return 1, mmu can not fix the mapping, we should report the
-        * error, otherwise it is good to return to guest and let it
-        * re-execute the instruction again.
-        *
-        * page_fault_count is used to avoid the race on other vcpus,
-        * since after we unprotect the target page, other cpu can enter
-        * page fault path and let the page be write-protected again.
-        */
-       kvm_mmu_unprotect_page(vcpu->kvm, gfn);
-       emulate = vcpu->arch.mmu.page_fault(vcpu, cr2, PFERR_WRITE_MASK, false);
-
-       /* The page fault path called above can increase the count. */
-       if (page_fault_count + 1 !=
-                 ACCESS_ONCE(vcpu->kvm->arch.page_fault_count))
-               goto again;
+       for (level = hierarchy.max_level; hierarchy.nr_levels > 0; level--, hierarchy.nr_levels--)
+               if (hierarchy.shadow_gfns[level - 1] == gfn)
+                       return false;

-       return !emulate;
+       return true;
 }