diff mbox

KVM: nVMX: Fix handling of lmsw instruction

Message ID 1495279376-4340-1-git-send-email-jschoenh@amazon.de (mailing list archive)
State New, archived
Headers show

Commit Message

Jan H. Schönherr May 20, 2017, 11:22 a.m. UTC
The decision whether or not to exit from L2 to L1 on an lmsw instruction is
based on bogus values: instead of using the information encoded within the
exit qualification, it uses the data also used for the mov-to-cr
instruction, which boils down to using whatever is in %eax at that point.

Use the correct values instead.

Without this fix, an L1 may not get notified when a 32-bit Linux L2
switches its secondary CPUs to protected mode; the L1 is only notified on
the next modification of CR0. This short time window poses a problem, when
there is some other reason to exit to L1 in between. Then, L2 will be
resumed in real mode and chaos ensues.

Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de>
---
 arch/x86/kvm/vmx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

Comments

Wanpeng Li May 21, 2017, 11:18 p.m. UTC | #1
2017-05-20 19:22 GMT+08:00 Jan H. Schönherr <jschoenh@amazon.de>:
> The decision whether or not to exit from L2 to L1 on an lmsw instruction is
> based on bogus values: instead of using the information encoded within the
> exit qualification, it uses the data also used for the mov-to-cr
> instruction, which boils down to using whatever is in %eax at that point.

Good catch!

>
> Use the correct values instead.
>
> Without this fix, an L1 may not get notified when a 32-bit Linux L2
> switches its secondary CPUs to protected mode; the L1 is only notified on
> the next modification of CR0. This short time window poses a problem, when
> there is some other reason to exit to L1 in between. Then, L2 will be
> resumed in real mode and chaos ensues.
>
> Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de>

Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com>

> ---
>  arch/x86/kvm/vmx.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c6f4ad4..116569a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -7913,11 +7913,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
>  {
>         unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>         int cr = exit_qualification & 15;
> -       int reg = (exit_qualification >> 8) & 15;
> -       unsigned long val = kvm_register_readl(vcpu, reg);
> +       int reg;
> +       unsigned long val;
>
>         switch ((exit_qualification >> 4) & 3) {
>         case 0: /* mov to cr */
> +               reg = (exit_qualification >> 8) & 15;
> +               val = kvm_register_readl(vcpu, reg);
>                 switch (cr) {
>                 case 0:
>                         if (vmcs12->cr0_guest_host_mask &
> @@ -7972,6 +7974,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
>                  * lmsw can change bits 1..3 of cr0, and only set bit 0 of
>                  * cr0. Other attempted changes are ignored, with no exit.
>                  */
> +               val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
>                 if (vmcs12->cr0_guest_host_mask & 0xe &
>                     (val ^ vmcs12->cr0_read_shadow))
>                         return true;
> --
> 2.3.1.dirty
>
Paolo Bonzini May 26, 2017, 4 p.m. UTC | #2
On 20/05/2017 13:22, Jan H. Schönherr wrote:
> The decision whether or not to exit from L2 to L1 on an lmsw instruction is
> based on bogus values: instead of using the information encoded within the
> exit qualification, it uses the data also used for the mov-to-cr
> instruction, which boils down to using whatever is in %eax at that point.
> 
> Use the correct values instead.
> 
> Without this fix, an L1 may not get notified when a 32-bit Linux L2
> switches its secondary CPUs to protected mode; the L1 is only notified on
> the next modification of CR0. This short time window poses a problem, when
> there is some other reason to exit to L1 in between. Then, L2 will be
> resumed in real mode and chaos ensues.
> 
> Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de>
> ---
>  arch/x86/kvm/vmx.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c6f4ad4..116569a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -7913,11 +7913,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
>  {
>  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>  	int cr = exit_qualification & 15;
> -	int reg = (exit_qualification >> 8) & 15;
> -	unsigned long val = kvm_register_readl(vcpu, reg);
> +	int reg;
> +	unsigned long val;
>  
>  	switch ((exit_qualification >> 4) & 3) {
>  	case 0: /* mov to cr */
> +		reg = (exit_qualification >> 8) & 15;
> +		val = kvm_register_readl(vcpu, reg);
>  		switch (cr) {
>  		case 0:
>  			if (vmcs12->cr0_guest_host_mask &
> @@ -7972,6 +7974,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
>  		 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
>  		 * cr0. Other attempted changes are ignored, with no exit.
>  		 */
> +		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
>  		if (vmcs12->cr0_guest_host_mask & 0xe &
>  		    (val ^ vmcs12->cr0_read_shadow))
>  			return true;
> 

Queued, thanks.

Paolo
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c6f4ad4..116569a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7913,11 +7913,13 @@  static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 {
 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	int cr = exit_qualification & 15;
-	int reg = (exit_qualification >> 8) & 15;
-	unsigned long val = kvm_register_readl(vcpu, reg);
+	int reg;
+	unsigned long val;
 
 	switch ((exit_qualification >> 4) & 3) {
 	case 0: /* mov to cr */
+		reg = (exit_qualification >> 8) & 15;
+		val = kvm_register_readl(vcpu, reg);
 		switch (cr) {
 		case 0:
 			if (vmcs12->cr0_guest_host_mask &
@@ -7972,6 +7974,7 @@  static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 		 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
 		 * cr0. Other attempted changes are ignored, with no exit.
 		 */
+		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
 		if (vmcs12->cr0_guest_host_mask & 0xe &
 		    (val ^ vmcs12->cr0_read_shadow))
 			return true;