diff mbox

[v3,3/6] KVM: Move mmu reload out of line

Message ID 1341853545-3023-4-git-send-email-avi@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Avi Kivity July 9, 2012, 5:05 p.m. UTC
Currently we check that the mmu root exits before every entry.  Use the
existing KVM_REQ_MMU_RELOAD mechanism instead, by making it really reload
the mmu, and by adding the request to mmu initialization code.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c |  4 +++-
 arch/x86/kvm/svm.c |  1 +
 arch/x86/kvm/x86.c | 13 +++++++------
 3 files changed, 11 insertions(+), 7 deletions(-)

Comments

Xiao Guangrong July 10, 2012, 3:57 a.m. UTC | #1
On 07/10/2012 01:05 AM, Avi Kivity wrote:
> Currently we check that the mmu root exits before every entry.  Use the
> existing KVM_REQ_MMU_RELOAD mechanism instead, by making it really reload
> the mmu, and by adding the request to mmu initialization code.
> 
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
>  arch/x86/kvm/mmu.c |  4 +++-
>  arch/x86/kvm/svm.c |  1 +
>  arch/x86/kvm/x86.c | 13 +++++++------
>  3 files changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 569cd66..136d757 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3180,7 +3180,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
>  static void paging_new_cr3(struct kvm_vcpu *vcpu)
>  {
>  	pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
> -	mmu_free_roots(vcpu);
> +	kvm_mmu_unload(vcpu);
> +	kvm_mmu_load(vcpu);
>  }
> 
>  static unsigned long get_cr3(struct kvm_vcpu *vcpu)
> @@ -3469,6 +3470,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
> 
>  static int init_kvm_mmu(struct kvm_vcpu *vcpu)
>  {
> +	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
>  	if (mmu_is_nested(vcpu))
>  		return init_kvm_nested_mmu(vcpu);
>  	else if (tdp_enabled)
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 7a41878..d77ad8c 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2523,6 +2523,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
> 
>  	if (nested_vmcb->control.nested_ctl) {
>  		kvm_mmu_unload(&svm->vcpu);
> +		kvm_make_request(KVM_REQ_MMU_RELOAD, &svm->vcpu);
>  		svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
>  		nested_svm_init_mmu_context(&svm->vcpu);
>  	}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 959e5a9..162231f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5226,8 +5226,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		kvm_make_request(KVM_REQ_EVENT, vcpu);
> 
>  	if (vcpu->requests) {
> -		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
> +		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
>  			kvm_mmu_unload(vcpu);
> +			r = kvm_mmu_reload(vcpu);
> +			if (unlikely(r)) {
> +				kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
> +				goto out;
> +			}

Now, reload mmu is before event injecting, can below bug be triggered again?

commit d8368af8b46b904def42a0f341d2f4f29001fa77
Author: Avi Kivity <avi@redhat.com>
Date:   Mon May 14 18:07:56 2012 +0300

    KVM: Fix mmu_reload() clash with nested vmx event injection

    Currently the inject_pending_event() call during guest entry happens after
    kvm_mmu_reload().  This is for historical reasons - we used to
    inject_pending_event() in atomic context, while kvm_mmu_reload() needs task
    context.

    A problem is that nested vmx can cause the mmu context to be reset, if event
    injection is intercepted and causes a #VMEXIT instead (the #VMEXIT resets
    CR0/CR3/CR4).  If this happens, we end up with invalid root_hpa, and since
    kvm_mmu_reload() has already run, no one will fix it and we end up entering
    the guest this way.

    Fix by reordering event injection to be before kvm_mmu_reload().  Use
    ->cancel_injection() to undo if kvm_mmu_reload() fails.

    https://bugzilla.kernel.org/show_bug.cgi?id=42980

    Reported-by: Luke-Jr <luke-jr+linuxbugs@utopios.org>
    Signed-off-by: Avi Kivity <avi@redhat.com>
    Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity July 10, 2012, 7:48 a.m. UTC | #2
On 07/10/2012 06:57 AM, Xiao Guangrong wrote:
> On 07/10/2012 01:05 AM, Avi Kivity wrote:
> > Currently we check that the mmu root exits before every entry.  Use the
> > existing KVM_REQ_MMU_RELOAD mechanism instead, by making it really reload
> > the mmu, and by adding the request to mmu initialization code.
> > 
> > Signed-off-by: Avi Kivity <avi@redhat.com>
> > ---
> >  arch/x86/kvm/mmu.c |  4 +++-
> >  arch/x86/kvm/svm.c |  1 +
> >  arch/x86/kvm/x86.c | 13 +++++++------
> >  3 files changed, 11 insertions(+), 7 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> > index 569cd66..136d757 100644
> > --- a/arch/x86/kvm/mmu.c
> > +++ b/arch/x86/kvm/mmu.c
> > @@ -3180,7 +3180,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
> >  static void paging_new_cr3(struct kvm_vcpu *vcpu)
> >  {
> >  	pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
> > -	mmu_free_roots(vcpu);
> > +	kvm_mmu_unload(vcpu);
> > +	kvm_mmu_load(vcpu);
> >  }
> > 
> >  static unsigned long get_cr3(struct kvm_vcpu *vcpu)
> > @@ -3469,6 +3470,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
> > 
> >  static int init_kvm_mmu(struct kvm_vcpu *vcpu)
> >  {
> > +	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
> >  	if (mmu_is_nested(vcpu))
> >  		return init_kvm_nested_mmu(vcpu);
> >  	else if (tdp_enabled)
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 7a41878..d77ad8c 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -2523,6 +2523,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
> > 
> >  	if (nested_vmcb->control.nested_ctl) {
> >  		kvm_mmu_unload(&svm->vcpu);
> > +		kvm_make_request(KVM_REQ_MMU_RELOAD, &svm->vcpu);
> >  		svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
> >  		nested_svm_init_mmu_context(&svm->vcpu);
> >  	}
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 959e5a9..162231f 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -5226,8 +5226,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  		kvm_make_request(KVM_REQ_EVENT, vcpu);
> > 
> >  	if (vcpu->requests) {
> > -		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
> > +		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
> >  			kvm_mmu_unload(vcpu);
> > +			r = kvm_mmu_reload(vcpu);
> > +			if (unlikely(r)) {
> > +				kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
> > +				goto out;
> > +			}
>
> Now, reload mmu is before event injecting, can below bug be triggered again?
>
> commit d8368af8b46b904def42a0f341d2f4f29001fa77
> Author: Avi Kivity <avi@redhat.com>
> Date:   Mon May 14 18:07:56 2012 +0300
>
>     KVM: Fix mmu_reload() clash with nested vmx event injection
>
>     Currently the inject_pending_event() call during guest entry happens after
>     kvm_mmu_reload().  This is for historical reasons - we used to
>     inject_pending_event() in atomic context, while kvm_mmu_reload() needs task
>     context.
>
>     A problem is that nested vmx can cause the mmu context to be reset, if event
>     injection is intercepted and causes a #VMEXIT instead (the #VMEXIT resets
>     CR0/CR3/CR4).  If this happens, we end up with invalid root_hpa, and since
>     kvm_mmu_reload() has already run, no one will fix it and we end up entering
>     the guest this way.
>
>     Fix by reordering event injection to be before kvm_mmu_reload().  Use
>     ->cancel_injection() to undo if kvm_mmu_reload() fails.
>

I haven't considered it, but I think the patch is safe.  If
init_kvm_mmu() is called as part of event injection then it will set
KVM_REQ_MMU_RELOAD and we will process the requests again before
entering the guest.
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 569cd66..136d757 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3180,7 +3180,8 @@  void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
 {
 	pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
-	mmu_free_roots(vcpu);
+	kvm_mmu_unload(vcpu);
+	kvm_mmu_load(vcpu);
 }
 
 static unsigned long get_cr3(struct kvm_vcpu *vcpu)
@@ -3469,6 +3470,7 @@  static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 
 static int init_kvm_mmu(struct kvm_vcpu *vcpu)
 {
+	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 	if (mmu_is_nested(vcpu))
 		return init_kvm_nested_mmu(vcpu);
 	else if (tdp_enabled)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7a41878..d77ad8c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2523,6 +2523,7 @@  static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
 	if (nested_vmcb->control.nested_ctl) {
 		kvm_mmu_unload(&svm->vcpu);
+		kvm_make_request(KVM_REQ_MMU_RELOAD, &svm->vcpu);
 		svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
 		nested_svm_init_mmu_context(&svm->vcpu);
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 959e5a9..162231f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5226,8 +5226,14 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
 
 	if (vcpu->requests) {
-		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
+		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
 			kvm_mmu_unload(vcpu);
+			r = kvm_mmu_reload(vcpu);
+			if (unlikely(r)) {
+				kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+				goto out;
+			}
+		}
 		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
 			__kvm_migrate_timers(vcpu);
 		if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
@@ -5285,11 +5291,6 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	r = kvm_mmu_reload(vcpu);
-	if (unlikely(r)) {
-		goto cancel_injection;
-	}
-
 	preempt_disable();
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);