diff mbox

[4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64

Message ID 57D1878F020000780010D270@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Sept. 8, 2016, 1:45 p.m. UTC
From: Mihai Donțu <mdontu@bitdefender.com>

Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Re-base on decoding changes. Address my own review comments (where
    still applicable). #UD when vex.l is set. Various adjustments to
    the test tool change.
x86/emulate: add support for {,v}movq xmm,xmm/m64

From: Mihai Donțu <mdontu@bitdefender.com>

Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Re-base on decoding changes. Address my own review comments (where
    still applicable). #UD when vex.l is set. Various adjustments to
    the test tool change.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -713,6 +713,54 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movq %%xmm0,32(%%ecx)...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movq_to_mem2);
+
+        asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n"
+                       put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(movq_to_mem2);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) ||
+             *((uint64_t *)res + 4) ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovq %%xmm1,32(%%edx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_to_mem);
+
+        asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
+                       put_insn(vmovq_to_mem, "vmovq %%xmm1, 32(%0)")
+                       :: "d" (NULL) );
+
+        memset(res, 0xdb, 64);
+        set_insn(vmovq_to_mem);
+        regs.ecx = 0;
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem) ||
+             *((uint64_t *)res + 4) ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
     if ( stack_exec && cpu_has_sse2 )
     {
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -269,7 +269,7 @@ static const opcode_desc_t twobyte_table
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     /* 0xD0 - 0xDF */
-    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
+    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, ModRM,
     ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
     /* 0xE0 - 0xEF */
     ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
@@ -4779,6 +4779,8 @@ x86_emulate(
     case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */
                                          /* vmovdqu ymm,ymm/m256 */
+    case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
+    case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
         struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
@@ -4796,7 +4798,8 @@ x86_emulate(
             case vex_66:
             case vex_f3:
                 host_and_vcpu_must_have(sse2);
-                buf[0] = 0x66; /* movdqa */
+                /* Converting movdqu to movdqa here: Our buffer is aligned. */
+                buf[0] = 0x66;
                 get_fpu(X86EMUL_FPU_xmm, &fic);
                 ea.bytes = 16;
                 break;
@@ -4819,6 +4822,11 @@ x86_emulate(
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
+        if ( b == 0xd6 )
+        {
+            generate_exception_if(vex.l, EXC_UD, -1);
+            ea.bytes = 8;
+        }
         if ( ea.type == OP_MEM )
         {
             generate_exception_if((vex.pfx == vex_66) &&

Comments

Mihai Donțu Sept. 8, 2016, 1:56 p.m. UTC | #1
On Thursday 08 September 2016 07:45:19 Jan Beulich wrote:
> From: Mihai Donțu <mdontu@bitdefender.com>
> 
> Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v4: Re-base on decoding changes. Address my own review comments (where
>     still applicable). #UD when vex.l is set. Various adjustments to
>     the test tool change.

Thank you! They were in my queue for too long and I was struggling to
find a window of time to get them in shape.

> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -713,6 +713,54 @@ int main(int argc, char **argv)
>      else
>          printf("skipped\n");
>  
> +    printf("%-40s", "Testing movq %%xmm0,32(%%ecx)...");
> +    if ( stack_exec && cpu_has_sse2 )
> +    {
> +        decl_insn(movq_to_mem2);
> +
> +        asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n"
> +                       put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)")
> +                       :: "c" (NULL) );
> +
> +        memset(res, 0xbd, 64);
> +        set_insn(movq_to_mem2);
> +        regs.ecx = (unsigned long)res;
> +        regs.edx = 0;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) ||
> +             *((uint64_t *)res + 4) ||
> +             memcmp(res, res + 10, 24) ||
> +             memcmp(res, res + 6, 8) )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing vmovq %%xmm1,32(%%edx)...");
> +    if ( stack_exec && cpu_has_avx )
> +    {
> +        decl_insn(vmovq_to_mem);
> +
> +        asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
> +                       put_insn(vmovq_to_mem, "vmovq %%xmm1, 32(%0)")
> +                       :: "d" (NULL) );
> +
> +        memset(res, 0xdb, 64);
> +        set_insn(vmovq_to_mem);
> +        regs.ecx = 0;
> +        regs.edx = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem) ||
> +             *((uint64_t *)res + 4) ||
> +             memcmp(res, res + 10, 24) ||
> +             memcmp(res, res + 6, 8) )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
>      printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
>      if ( stack_exec && cpu_has_sse2 )
>      {
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -269,7 +269,7 @@ static const opcode_desc_t twobyte_table
>      ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
>      ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
>      /* 0xD0 - 0xDF */
> -    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
> +    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, ModRM,
>      ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
>      /* 0xE0 - 0xEF */
>      ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
> @@ -4779,6 +4779,8 @@ x86_emulate(
>      case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
>      case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */
>                                           /* vmovdqu ymm,ymm/m256 */
> +    case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
> +    case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
>      {
>          uint8_t *buf = get_stub(stub);
>          struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
> @@ -4796,7 +4798,8 @@ x86_emulate(
>              case vex_66:
>              case vex_f3:
>                  host_and_vcpu_must_have(sse2);
> -                buf[0] = 0x66; /* movdqa */
> +                /* Converting movdqu to movdqa here: Our buffer is aligned. */
> +                buf[0] = 0x66;
>                  get_fpu(X86EMUL_FPU_xmm, &fic);
>                  ea.bytes = 16;
>                  break;
> @@ -4819,6 +4822,11 @@ x86_emulate(
>              get_fpu(X86EMUL_FPU_ymm, &fic);
>              ea.bytes = 16 << vex.l;
>          }
> +        if ( b == 0xd6 )
> +        {
> +            generate_exception_if(vex.l, EXC_UD, -1);
> +            ea.bytes = 8;
> +        }
>          if ( ea.type == OP_MEM )
>          {
>              generate_exception_if((vex.pfx == vex_66) &&
>
Andrew Cooper Sept. 30, 2016, 10:43 a.m. UTC | #2
On 08/09/16 14:45, Jan Beulich wrote:
> From: Mihai Donțu <mdontu@bitdefender.com>
>
> Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff mbox

Patch

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -713,6 +713,54 @@  int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movq %%xmm0,32(%%ecx)...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movq_to_mem2);
+
+        asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n"
+                       put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(movq_to_mem2);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) ||
+             *((uint64_t *)res + 4) ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovq %%xmm1,32(%%edx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_to_mem);
+
+        asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
+                       put_insn(vmovq_to_mem, "vmovq %%xmm1, 32(%0)")
+                       :: "d" (NULL) );
+
+        memset(res, 0xdb, 64);
+        set_insn(vmovq_to_mem);
+        regs.ecx = 0;
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem) ||
+             *((uint64_t *)res + 4) ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
     if ( stack_exec && cpu_has_sse2 )
     {
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -269,7 +269,7 @@  static const opcode_desc_t twobyte_table
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     /* 0xD0 - 0xDF */
-    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
+    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, ModRM,
     ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
     /* 0xE0 - 0xEF */
     ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
@@ -4779,6 +4779,8 @@  x86_emulate(
     case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */
                                          /* vmovdqu ymm,ymm/m256 */
+    case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
+    case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
         struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
@@ -4796,7 +4798,8 @@  x86_emulate(
             case vex_66:
             case vex_f3:
                 host_and_vcpu_must_have(sse2);
-                buf[0] = 0x66; /* movdqa */
+                /* Converting movdqu to movdqa here: Our buffer is aligned. */
+                buf[0] = 0x66;
                 get_fpu(X86EMUL_FPU_xmm, &fic);
                 ea.bytes = 16;
                 break;
@@ -4819,6 +4822,11 @@  x86_emulate(
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
+        if ( b == 0xd6 )
+        {
+            generate_exception_if(vex.l, EXC_UD, -1);
+            ea.bytes = 8;
+        }
         if ( ea.type == OP_MEM )
         {
             generate_exception_if((vex.pfx == vex_66) &&