diff mbox

[4/4] KVM: nVMX: Do not generate #DF if #PF happens during exception delivery into L2

Message ID 1380102696-25267-5-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov Sept. 25, 2013, 9:51 a.m. UTC
If #PF happens during delivery of an exception into L2 and L1 also do
not have the page mapped in its shadow page table then L0 needs to
generate vmexit to L2 with original event in IDT_VECTORING_INFO, but
current code combines both exception and generates #DF instead. Fix that
by providing nVMX specific function to handle page faults during page
table walk that handles this case correctly.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/vmx.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

Comments

Paolo Bonzini Sept. 25, 2013, 11:24 a.m. UTC | #1
Il 25/09/2013 11:51, Gleb Natapov ha scritto:
> If #PF happens during delivery of an exception into L2 and L1 also do
> not have the page mapped in its shadow page table then L0 needs to
> generate vmexit to L2 with original event in IDT_VECTORING_INFO, but
> current code combines both exception and generates #DF instead. Fix that
> by providing nVMX specific function to handle page faults during page
> table walk that handles this case correctly.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/kvm/vmx.c | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5bfa09d..07c36fd 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -7520,6 +7520,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
>  	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
>  }
>  
> +static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
> +		struct x86_exception *fault)
> +{
> +	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +
> +	WARN_ON(!is_guest_mode(vcpu));
> +
> +	/* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
> +	if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
> +		nested_vmx_vmexit(vcpu);
> +	else
> +		kvm_inject_page_fault(vcpu, fault);
> +}
> +
>  /*
>   * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
>   * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
> @@ -7773,6 +7787,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	kvm_set_cr3(vcpu, vmcs12->guest_cr3);
>  	kvm_mmu_reset_context(vcpu);
>  
> +	if (!enable_ept)
> +		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
> +
>  	/*
>  	 * L1 may access the L2's PDPTR, so save them to construct vmcs12
>  	 */
> @@ -8232,6 +8249,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
>  	kvm_set_cr3(vcpu, vmcs12->host_cr3);
>  	kvm_mmu_reset_context(vcpu);
>  
> +	if (!enable_ept)
> +		vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;

This is strictly speaking not needed, because kvm_mmu_reset_context
takes care of it.

But I wonder if it is cleaner to not touch the struct here, and instead
add a new member to kvm_x86_ops---used directly in init_kvm_softmmu like
kvm_x86_ops->set_cr3.  The new member can do something like

	if (is_guest_mode(vcpu)) {
		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
		if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) {
			nested_vmx_vmexit(vcpu);
			return;
		}
	}

	kvm_inject_page_fault(vcpu, fault);

Marcelo, Jan, what do you think?

Alex (or Gleb :)), do you have any idea why SVM does not need this?

Paolo

>  	if (enable_vpid) {
>  		/*
>  		 * Trivially support vpid by letting L2s share their parent
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Sept. 25, 2013, 11:51 a.m. UTC | #2
On Wed, Sep 25, 2013 at 01:24:49PM +0200, Paolo Bonzini wrote:
> Il 25/09/2013 11:51, Gleb Natapov ha scritto:
> > If #PF happens during delivery of an exception into L2 and L1 also do
> > not have the page mapped in its shadow page table then L0 needs to
> > generate vmexit to L2 with original event in IDT_VECTORING_INFO, but
> > current code combines both exception and generates #DF instead. Fix that
> > by providing nVMX specific function to handle page faults during page
> > table walk that handles this case correctly.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/kvm/vmx.c | 20 ++++++++++++++++++++
> >  1 file changed, 20 insertions(+)
> > 
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 5bfa09d..07c36fd 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -7520,6 +7520,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
> >  	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
> >  }
> >  
> > +static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
> > +		struct x86_exception *fault)
> > +{
> > +	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> > +
> > +	WARN_ON(!is_guest_mode(vcpu));
> > +
> > +	/* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
> > +	if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
> > +		nested_vmx_vmexit(vcpu);
> > +	else
> > +		kvm_inject_page_fault(vcpu, fault);
> > +}
> > +
> >  /*
> >   * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
> >   * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
> > @@ -7773,6 +7787,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> >  	kvm_set_cr3(vcpu, vmcs12->guest_cr3);
> >  	kvm_mmu_reset_context(vcpu);
> >  
> > +	if (!enable_ept)
> > +		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
> > +
> >  	/*
> >  	 * L1 may access the L2's PDPTR, so save them to construct vmcs12
> >  	 */
> > @@ -8232,6 +8249,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
> >  	kvm_set_cr3(vcpu, vmcs12->host_cr3);
> >  	kvm_mmu_reset_context(vcpu);
> >  
> > +	if (!enable_ept)
> > +		vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
> 
> This is strictly speaking not needed, because kvm_mmu_reset_context
> takes care of it.
> 
Yeah, but better make it explicit, it does not hurt but make it more
clear what is going on. Or at least add comment above
kvm_mmu_reset_context() about this side effect.

> But I wonder if it is cleaner to not touch the struct here, and instead
> add a new member to kvm_x86_ops---used directly in init_kvm_softmmu like
> kvm_x86_ops->set_cr3.  The new member can do something like
> 
> 	if (is_guest_mode(vcpu)) {
> 		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> 		if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) {
> 			nested_vmx_vmexit(vcpu);
> 			return;
> 		}
> 	}
> 
> 	kvm_inject_page_fault(vcpu, fault);
I do not quite understand what you mean here. inject_page_fault() is
called from the depth of page table walking. How the code will not to
call new member in some circumstances?

> 
> Marcelo, Jan, what do you think?
> 
> Alex (or Gleb :)), do you have any idea why SVM does not need this?
> 
It's probably needed there too. At least I fail to see why it does
not. Without that patch guest is actually booting (most of the times),
but sometimes random processes crash with double fault exception.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5bfa09d..07c36fd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7520,6 +7520,20 @@  static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
 	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
 }
 
+static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
+		struct x86_exception *fault)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+	WARN_ON(!is_guest_mode(vcpu));
+
+	/* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
+	if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
+		nested_vmx_vmexit(vcpu);
+	else
+		kvm_inject_page_fault(vcpu, fault);
+}
+
 /*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7773,6 +7787,9 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	kvm_set_cr3(vcpu, vmcs12->guest_cr3);
 	kvm_mmu_reset_context(vcpu);
 
+	if (!enable_ept)
+		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+
 	/*
 	 * L1 may access the L2's PDPTR, so save them to construct vmcs12
 	 */
@@ -8232,6 +8249,9 @@  static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	kvm_set_cr3(vcpu, vmcs12->host_cr3);
 	kvm_mmu_reset_context(vcpu);
 
+	if (!enable_ept)
+		vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+
 	if (enable_vpid) {
 		/*
 		 * Trivially support vpid by letting L2s share their parent