diff mbox

[v3,2/3] x86/emulate: add support of emulating SSE2 instruction {, v}movd mm, r32/m32 and {, v}movq mm, r64

Message ID 20160801025231.7211-2-mdontu@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mihai Donțu Aug. 1, 2016, 2:52 a.m. UTC
Found that Windows driver was using a SSE2 instruction MOVD.

Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
---
Picked from the XenServer 7 patch queue, as suggested by Andrew Cooper

Changed since v2:
 * handle the case where the destination is a GPR
---
 xen/arch/x86/x86_emulate/x86_emulate.c | 38 +++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

Comments

Jan Beulich Aug. 1, 2016, 1:38 p.m. UTC | #1
>>> On 01.08.16 at 04:52, <mdontu@bitdefender.com> wrote:
> Found that Windows driver was using a SSE2 instruction MOVD.

Nevertheless the title shouldn't say "SSE2", as you also add AVX ones.
Nor are you limiting things to MM register sources. Hence I think the
title needs changing.

> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -204,7 +204,7 @@ static uint8_t twobyte_table[256] = {
>      /* 0x60 - 0x6F */
>      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
>      /* 0x70 - 0x7F */
> -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
> +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, ImplicitOps|ModRM,

What about the moves in the other direction (opcode 6E)?

> @@ -4409,6 +4409,10 @@ x86_emulate(
>      case 0x6f: /* movq mm/m64,mm */
>                 /* {,v}movdq{a,u} xmm/m128,xmm */
>                 /* vmovdq{a,u} ymm/m256,ymm */
> +    case 0x7e: /* movd mm,r/m32 */
> +               /* movq mm,r/m64 */
> +               /* {,v}movd xmm,r/m32 */
> +               /* {,v}movq xmm,r/m64 */

Same problem as in the other patch - 7E prefixed by F3 has yet
another meaning ({,v}movq xmm,xmm/m64).

> @@ -4432,7 +4436,17 @@ x86_emulate(
>                  host_and_vcpu_must_have(sse2);
>                  buf[0] = 0x66; /* SSE */
>                  get_fpu(X86EMUL_FPU_xmm, &fic);
> -                ea.bytes = (b == 0xd6 ? 8 : 16);
> +                switch ( b )
> +                {
> +                case 0x7e:
> +                    ea.bytes = 4;
> +                    break;
> +                case 0xd6:
> +                    ea.bytes = 8;
> +                    break;
> +                default:
> +                    ea.bytes = 16;
> +                }

Please don't omit the final break statement (also below).

> @@ -4452,7 +4466,17 @@ x86_emulate(
>                      ((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
>              host_and_vcpu_must_have(avx);
>              get_fpu(X86EMUL_FPU_ymm, &fic);
> -            ea.bytes = (b == 0xd6 ? 8 : (16 << vex.l));
> +            switch ( b )
> +            {
> +            case 0x7e:
> +                ea.bytes = 4;
> +                break;
> +            case 0xd6:
> +                ea.bytes = 8;
> +                break;
> +            default:
> +                ea.bytes = 16 << vex.l;
> +            }
>          }
>          if ( ea.type == OP_MEM )
>          {
> @@ -4468,6 +4492,14 @@ x86_emulate(
>              vex.b = 1;
>              buf[4] &= 0x38;
>          }
> +        else if ( b == 0x7e )
> +        {
> +            /* convert the GPR destination to (%rAX) */
> +            *((unsigned long *)&mmvalp) = (unsigned long)ea.reg;

Hmm, you write the original register value to the memory location
you want to use, but the instruction means to _write_ to the
register. Nor would this mechanism, even if used properly, clear the
high half of the GPR for a 32-bit destination.

Also - comment style (admittedly the pre-existing nearby one is
wrong too).

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 44de3b6..9f89ada 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -204,7 +204,7 @@  static uint8_t twobyte_table[256] = {
     /* 0x60 - 0x6F */
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
     /* 0x70 - 0x7F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, ImplicitOps|ModRM,
     /* 0x80 - 0x87 */
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
@@ -4409,6 +4409,10 @@  x86_emulate(
     case 0x6f: /* movq mm/m64,mm */
                /* {,v}movdq{a,u} xmm/m128,xmm */
                /* vmovdq{a,u} ymm/m256,ymm */
+    case 0x7e: /* movd mm,r/m32 */
+               /* movq mm,r/m64 */
+               /* {,v}movd xmm,r/m32 */
+               /* {,v}movq xmm,r/m64 */
     case 0x7f: /* movq mm,mm/m64 */
                /* {,v}movdq{a,u} xmm,xmm/m128 */
                /* vmovdq{a,u} ymm,ymm/m256 */
@@ -4432,7 +4436,17 @@  x86_emulate(
                 host_and_vcpu_must_have(sse2);
                 buf[0] = 0x66; /* SSE */
                 get_fpu(X86EMUL_FPU_xmm, &fic);
-                ea.bytes = (b == 0xd6 ? 8 : 16);
+                switch ( b )
+                {
+                case 0x7e:
+                    ea.bytes = 4;
+                    break;
+                case 0xd6:
+                    ea.bytes = 8;
+                    break;
+                default:
+                    ea.bytes = 16;
+                }
                 break;
             case vex_none:
                 if ( b != 0xe7 )
@@ -4452,7 +4466,17 @@  x86_emulate(
                     ((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
             host_and_vcpu_must_have(avx);
             get_fpu(X86EMUL_FPU_ymm, &fic);
-            ea.bytes = (b == 0xd6 ? 8 : (16 << vex.l));
+            switch ( b )
+            {
+            case 0x7e:
+                ea.bytes = 4;
+                break;
+            case 0xd6:
+                ea.bytes = 8;
+                break;
+            default:
+                ea.bytes = 16 << vex.l;
+            }
         }
         if ( ea.type == OP_MEM )
         {
@@ -4468,6 +4492,14 @@  x86_emulate(
             vex.b = 1;
             buf[4] &= 0x38;
         }
+        else if ( b == 0x7e )
+        {
+            /* convert the GPR destination to (%rAX) */
+            *((unsigned long *)&mmvalp) = (unsigned long)ea.reg;
+            rex_prefix &= ~REX_B;
+            vex.b = 1;
+            buf[4] &= 0x38;
+        }
         if ( !rc )
         {
            copy_REX_VEX(buf, rex_prefix, vex);