diff mbox

[master/3.5.y] KVM: VMX: Fix ds/es corruption on i386 with preemption

Message ID 1343828883-1968-1-git-send-email-avi@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Avi Kivity Aug. 1, 2012, 1:48 p.m. UTC
Commit b2da15ac26a0c ("KVM: VMX: Optimize %ds, %es reload") broke i386
in the following scenario:

  vcpu_load
  ...
  vmx_save_host_state
  vmx_vcpu_run
  (ds.rpl, es.rpl cleared by hardware)

  interrupt
    push ds, es  # pushes bad ds, es
    schedule
      vmx_vcpu_put
        vmx_load_host_state
          reload ds, es (with __USER_DS)
    pop ds, es  # of other thread's stack
    iret
  # other thread runs
  interrupt
    push ds, es
    schedule  # back in vcpu thread
    pop ds, es  # now with rpl=0
    iret
  ...
  vcpu_put
  resume_userspace
  iret  # clears ds, es due to mismatched rpl

(instead of resume_userspace, we might return with SYSEXIT and then
take an exception; when the exception IRETs we end up with cleared
ds, es)

Fix by avoiding the optimization on i386 and reloading ds, es on the
lightweight exit path.

Reported-by: Chris Clayron <chris2553@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

Comments

Chris Clayton Aug. 1, 2012, 2:36 p.m. UTC | #1
On 08/01/12 14:48, Avi Kivity wrote:
> Commit b2da15ac26a0c ("KVM: VMX: Optimize %ds, %es reload") broke i386
> in the following scenario:
>
>    vcpu_load
>    ...
>    vmx_save_host_state
>    vmx_vcpu_run
>    (ds.rpl, es.rpl cleared by hardware)
>
>    interrupt
>      push ds, es  # pushes bad ds, es
>      schedule
>        vmx_vcpu_put
>          vmx_load_host_state
>            reload ds, es (with __USER_DS)
>      pop ds, es  # of other thread's stack
>      iret
>    # other thread runs
>    interrupt
>      push ds, es
>      schedule  # back in vcpu thread
>      pop ds, es  # now with rpl=0
>      iret
>    ...
>    vcpu_put
>    resume_userspace
>    iret  # clears ds, es due to mismatched rpl
>
> (instead of resume_userspace, we might return with SYSEXIT and then
> take an exception; when the exception IRETs we end up with cleared
> ds, es)
>
> Fix by avoiding the optimization on i386 and reloading ds, es on the
> lightweight exit path.
>
> Reported-by: Chris Clayron <chris2553@googlemail.com>
> Signed-off-by: Avi Kivity <avi@redhat.com>

I've just had 15 successful runs of qemu-kvm-1.1.1 with kernel 3.5.0 
plus this patch, so:

Tested-by: Chris Clayton <chris2553@googlemail.com>

I assume this will be forwarded to stable once it has been applied to 
mainline.

> ---
>   arch/x86/kvm/vmx.c | 20 +++++++++++++-------
>   1 file changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c39b607..c00f03d 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1488,13 +1488,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
>   		loadsegment(ds, vmx->host_state.ds_sel);
>   		loadsegment(es, vmx->host_state.es_sel);
>   	}
> -#else
> -	/*
> -	 * The sysexit path does not restore ds/es, so we must set them to
> -	 * a reasonable value ourselves.
> -	 */
> -	loadsegment(ds, __USER_DS);
> -	loadsegment(es, __USER_DS);
>   #endif
>   	reload_tss();
>   #ifdef CONFIG_X86_64
> @@ -6370,6 +6363,19 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>   #endif
>   	      );
>
> +#ifndef CONFIG_X86_64
> +	/*
> +	 * The sysexit path does not restore ds/es, so we must set them to
> +	 * a reasonable value ourselves.
> +	 *
> +	 * We can't defer this to vmx_load_host_state() since that function
> +	 * may be executed in interrupt context, which saves and restore segments
> +	 * around it, nullifying its effect.
> +	 */
> +	loadsegment(ds, __USER_DS);
> +	loadsegment(es, __USER_DS);
> +#endif
> +
>   	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
>   				  | (1 << VCPU_EXREG_RFLAGS)
>   				  | (1 << VCPU_EXREG_CPL)
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Clayton Aug. 7, 2012, 7:09 a.m. UTC | #2
HI,

I've just been looking through the patches Greg KH has stored for 
3.5-stable and this doesn't seem to be amongst them.

I can't see any way in which it contravenes the stable rules and, given 
there's a distinct possibility of a mix of i386, kvm-1.1.x and a 3.5 
kernel resulting in a crash, it seems to me to be a good candidate. The 
commit in mainline is aa67f6096c19bcdb1951ef88be3cf3d2118809dc.

Thanks.


-------- Original Message --------
Subject: Re: [PATCH master/3.5.y] KVM: VMX: Fix ds/es corruption on i386 
with preemption
Date: Wed, 01 Aug 2012 15:36:30 +0100
From: Chris Clayton <chris2553@googlemail.com>
To: Avi Kivity <avi@redhat.com>
CC: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org

On 08/01/12 14:48, Avi Kivity wrote:
> Commit b2da15ac26a0c ("KVM: VMX: Optimize %ds, %es reload") broke i386
> in the following scenario:
>
>    vcpu_load
>    ...
>    vmx_save_host_state
>    vmx_vcpu_run
>    (ds.rpl, es.rpl cleared by hardware)
>
>    interrupt
>      push ds, es  # pushes bad ds, es
>      schedule
>        vmx_vcpu_put
>          vmx_load_host_state
>            reload ds, es (with __USER_DS)
>      pop ds, es  # of other thread's stack
>      iret
>    # other thread runs
>    interrupt
>      push ds, es
>      schedule  # back in vcpu thread
>      pop ds, es  # now with rpl=0
>      iret
>    ...
>    vcpu_put
>    resume_userspace
>    iret  # clears ds, es due to mismatched rpl
>
> (instead of resume_userspace, we might return with SYSEXIT and then
> take an exception; when the exception IRETs we end up with cleared
> ds, es)
>
> Fix by avoiding the optimization on i386 and reloading ds, es on the
> lightweight exit path.
>
> Reported-by: Chris Clayron <chris2553@googlemail.com>
> Signed-off-by: Avi Kivity <avi@redhat.com>

I've just had 15 successful runs of qemu-kvm-1.1.1 with kernel 3.5.0
plus this patch, so:

Tested-by: Chris Clayton <chris2553@googlemail.com>

I assume this will be forwarded to stable once it has been applied to
mainline.

> ---
>   arch/x86/kvm/vmx.c | 20 +++++++++++++-------
>   1 file changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c39b607..c00f03d 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1488,13 +1488,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
>   		loadsegment(ds, vmx->host_state.ds_sel);
>   		loadsegment(es, vmx->host_state.es_sel);
>   	}
> -#else
> -	/*
> -	 * The sysexit path does not restore ds/es, so we must set them to
> -	 * a reasonable value ourselves.
> -	 */
> -	loadsegment(ds, __USER_DS);
> -	loadsegment(es, __USER_DS);
>   #endif
>   	reload_tss();
>   #ifdef CONFIG_X86_64
> @@ -6370,6 +6363,19 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>   #endif
>   	      );
>
> +#ifndef CONFIG_X86_64
> +	/*
> +	 * The sysexit path does not restore ds/es, so we must set them to
> +	 * a reasonable value ourselves.
> +	 *
> +	 * We can't defer this to vmx_load_host_state() since that function
> +	 * may be executed in interrupt context, which saves and restore segments
> +	 * around it, nullifying its effect.
> +	 */
> +	loadsegment(ds, __USER_DS);
> +	loadsegment(es, __USER_DS);
> +#endif
> +
>   	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
>   				  | (1 << VCPU_EXREG_RFLAGS)
>   				  | (1 << VCPU_EXREG_CPL)
>





--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c39b607..c00f03d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1488,13 +1488,6 @@  static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 		loadsegment(ds, vmx->host_state.ds_sel);
 		loadsegment(es, vmx->host_state.es_sel);
 	}
-#else
-	/*
-	 * The sysexit path does not restore ds/es, so we must set them to
-	 * a reasonable value ourselves.
-	 */
-	loadsegment(ds, __USER_DS);
-	loadsegment(es, __USER_DS);
 #endif
 	reload_tss();
 #ifdef CONFIG_X86_64
@@ -6370,6 +6363,19 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 	      );
 
+#ifndef CONFIG_X86_64
+	/*
+	 * The sysexit path does not restore ds/es, so we must set them to
+	 * a reasonable value ourselves.
+	 *
+	 * We can't defer this to vmx_load_host_state() since that function
+	 * may be executed in interrupt context, which saves and restore segments
+	 * around it, nullifying its effect.
+	 */
+	loadsegment(ds, __USER_DS);
+	loadsegment(es, __USER_DS);
+#endif
+
 	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
 				  | (1 << VCPU_EXREG_RFLAGS)
 				  | (1 << VCPU_EXREG_CPL)