diff mbox

[v3,1/3] x86/emulate: add support for {, v}movq xmm, xmm/m64

Message ID 20160802021920.3772b070@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mihai Donțu Aug. 1, 2016, 11:19 p.m. UTC
On Monday 01 August 2016 07:19:51 Jan Beulich wrote:
> >>> On 01.08.16 at 04:52, <mdontu@bitdefender.com> wrote:  
> > @@ -4412,6 +4412,7 @@ x86_emulate(
> >      case 0x7f: /* movq mm,mm/m64 */
> >                 /* {,v}movdq{a,u} xmm,xmm/m128 */
> >                 /* vmovdq{a,u} ymm,ymm/m256 */
> > +    case 0xd6: /* {,v}movq xmm,xmm/m64 */
> >      {
> >          uint8_t *buf = get_stub(stub);
> >          struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
> > @@ -4429,9 +4430,9 @@ x86_emulate(
> >              case vex_66:
> >              case vex_f3:
> >                  host_and_vcpu_must_have(sse2);
> > -                buf[0] = 0x66; /* movdqa */
> > +                buf[0] = 0x66; /* SSE */  
> 
> The comment change here indicates a problem: So far it was indicating
> that despite the possible F3 prefix (movdqu) we encode a 66 one
> (movdqa). Opcode D6 prefixed with F3, however, is movq2dq, which
> you then either don't emulate correctly, or if it happens to be
> emulated correctly you should include in the comment accompanying
> the case label. And its AVX counterpart should then produce #UD.

I fiddled with this for a while and the attached patch (adjusted)
appears to be doing the right thing: ie. movq2dq gets emulated
correctly too. copy_REX_VEX() does not work OK with movq2dq, but it
looked easy to single out this case.

All tests pass, including for {,v}movq xmm/m64 and movq2dq. There does
not appear to be an AVX variant for the latter, or I'm not reading the
Intel SDM right (or binutils' as is lying to me).

Comments

Jan Beulich Aug. 2, 2016, 6:19 a.m. UTC | #1
>>> On 02.08.16 at 01:19, <mdontu@bitdefender.com> wrote:
> On Monday 01 August 2016 07:19:51 Jan Beulich wrote:
>> >>> On 01.08.16 at 04:52, <mdontu@bitdefender.com> wrote:  
>> > @@ -4412,6 +4412,7 @@ x86_emulate(
>> >      case 0x7f: /* movq mm,mm/m64 */
>> >                 /* {,v}movdq{a,u} xmm,xmm/m128 */
>> >                 /* vmovdq{a,u} ymm,ymm/m256 */
>> > +    case 0xd6: /* {,v}movq xmm,xmm/m64 */
>> >      {
>> >          uint8_t *buf = get_stub(stub);
>> >          struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
>> > @@ -4429,9 +4430,9 @@ x86_emulate(
>> >              case vex_66:
>> >              case vex_f3:
>> >                  host_and_vcpu_must_have(sse2);
>> > -                buf[0] = 0x66; /* movdqa */
>> > +                buf[0] = 0x66; /* SSE */  
>> 
>> The comment change here indicates a problem: So far it was indicating
>> that despite the possible F3 prefix (movdqu) we encode a 66 one
>> (movdqa). Opcode D6 prefixed with F3, however, is movq2dq, which
>> you then either don't emulate correctly, or if it happens to be
>> emulated correctly you should include in the comment accompanying
>> the case label. And its AVX counterpart should then produce #UD.
> 
> I fiddled with this for a while and the attached patch (adjusted)
> appears to be doing the right thing: ie. movq2dq gets emulated
> correctly too. copy_REX_VEX() does not work OK with movq2dq, but it
> looked easy to single out this case.

Except that you can't really avoid it (see below). Without you being
more explicit I also can't tell what it is that doesn't work right in that
case.

> All tests pass, including for {,v}movq xmm/m64 and movq2dq. There does
> not appear to be an AVX variant for the latter, or I'm not reading the
> Intel SDM right (or binutils' as is lying to me).

Well, that's what I said earlier on (still visible above), and what you
still fail to deal with.

> @@ -4469,7 +4471,11 @@ x86_emulate(
>          }
>          if ( !rc )
>          {
> -           copy_REX_VEX(buf, rex_prefix, vex);
> +           /* try to preserve the mandatory prefix for movq2dq */
> +           if ( !rex_prefix && vex.opcx == vex_none && vex.pfx == vex_f3 )
> +               buf[0] = 0xf3;
> +           else
> +               copy_REX_VEX(buf, rex_prefix, vex);

So what about a movq2dq having a REX prefix to encode XMM8..15
for one or both of its operands? The writing of the F3 prefix really
needs to go elsewhere - probably the best place is where the 66
one gets written unconditionally right now. And afaict then
copy_REX_VEX() will work fine here too.

Jan
Mihai Donțu Aug. 2, 2016, 8:13 a.m. UTC | #2
On Tuesday 02 August 2016 00:19:22 Jan Beulich wrote:
> > > > On 02.08.16 at 01:19, <mdontu@bitdefender.com> wrote:  
> > > > @@ -4412,6 +4412,7 @@ x86_emulate(
> > > >      case 0x7f: /* movq mm,mm/m64 */
> > > >                 /* {,v}movdq{a,u} xmm,xmm/m128 */
> > > >                 /* vmovdq{a,u} ymm,ymm/m256 */
> > > > +    case 0xd6: /* {,v}movq xmm,xmm/m64 */
> > > >      {
> > > >          uint8_t *buf = get_stub(stub);
> > > >          struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
> > > > @@ -4429,9 +4430,9 @@ x86_emulate(
> > > >              case vex_66:
> > > >              case vex_f3:
> > > >                  host_and_vcpu_must_have(sse2);
> > > > -                buf[0] = 0x66; /* movdqa */
> > > > +                buf[0] = 0x66; /* SSE */    
> > > 
> > > The comment change here indicates a problem: So far it was indicating
> > > that despite the possible F3 prefix (movdqu) we encode a 66 one
> > > (movdqa). Opcode D6 prefixed with F3, however, is movq2dq, which
> > > you then either don't emulate correctly, or if it happens to be
> > > emulated correctly you should include in the comment accompanying
> > > the case label. And its AVX counterpart should then produce #UD.  
> > 
> > I fiddled with this for a while and the attached patch (adjusted)
> > appears to be doing the right thing: ie. movq2dq gets emulated
> > correctly too. copy_REX_VEX() does not work OK with movq2dq, but it
> > looked easy to single out this case.  
> 
> Except that you can't really avoid it (see below). Without you being
> more explicit I also can't tell what it is that doesn't work right in that
> case.

Sorry about that. In x86_emulate(), after buf[0] == 0x66 and
copy_REX_VEX():

   f3 0f d6 d1     movq2dq %mm1,%xmm2

becomes:

   66 40 0f d6 d1  rex movq %xmm2,%xmm1

Now that I slept over it, I can see it's not really a problem with
copy_REX_VEX().

> > All tests pass, including for {,v}movq xmm/m64 and movq2dq. There does
> > not appear to be an AVX variant for the latter, or I'm not reading the
> > Intel SDM right (or binutils' as is lying to me).  
> 
> Well, that's what I said earlier on (still visible above), and what you
> still fail to deal with.
> 
> > @@ -4469,7 +4471,11 @@ x86_emulate(
> >          }
> >          if ( !rc )
> >          {
> > -           copy_REX_VEX(buf, rex_prefix, vex);
> > +           /* try to preserve the mandatory prefix for movq2dq */
> > +           if ( !rex_prefix && vex.opcx == vex_none && vex.pfx == vex_f3 )
> > +               buf[0] = 0xf3;
> > +           else
> > +               copy_REX_VEX(buf, rex_prefix, vex);  
> 
> So what about a movq2dq having a REX prefix to encode XMM8..15
> for one or both of its operands? The writing of the F3 prefix really
> needs to go elsewhere - probably the best place is where the 66
> one gets written unconditionally right now. And afaict then
> copy_REX_VEX() will work fine here too.
diff mbox

Patch

diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index fe594ba..d6c199b 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -245,7 +245,7 @@  static uint8_t twobyte_table[256] = {
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     /* 0xD0 - 0xDF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     /* 0xE0 - 0xEF */
     0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
     /* 0xF0 - 0xFF */
@@ -4412,6 +4412,8 @@  x86_emulate(
     case 0x7f: /* movq mm,mm/m64 */
                /* {,v}movdq{a,u} xmm,xmm/m128 */
                /* vmovdq{a,u} ymm,ymm/m256 */
+    case 0xd6: /* {,v}movq xmm,xmm/m64 */
+               /* movq2dq mm,xmm */
     {
         uint8_t *buf = get_stub(stub);
         struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
@@ -4431,7 +4433,7 @@  x86_emulate(
                 host_and_vcpu_must_have(sse2);
                 buf[0] = 0x66; /* movdqa */
                 get_fpu(X86EMUL_FPU_xmm, &fic);
-                ea.bytes = 16;
+                ea.bytes = (b == 0xd6 ? 8 : 16);
                 break;
             case vex_none:
                 if ( b != 0xe7 )
@@ -4451,7 +4453,7 @@  x86_emulate(
                     ((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
             host_and_vcpu_must_have(avx);
             get_fpu(X86EMUL_FPU_ymm, &fic);
-            ea.bytes = 16 << vex.l;
+            ea.bytes = (b == 0xd6 ? 8 : (16 << vex.l));
         }
         if ( ea.type == OP_MEM )
         {
@@ -4469,7 +4471,11 @@  x86_emulate(
         }
         if ( !rc )
         {
-           copy_REX_VEX(buf, rex_prefix, vex);
+           /* try to preserve the mandatory prefix for movq2dq */
+           if ( !rex_prefix && vex.opcx == vex_none && vex.pfx == vex_f3 )
+               buf[0] = 0xf3;
+           else
+               copy_REX_VEX(buf, rex_prefix, vex);
            asm volatile ( "call *%0" : : "r" (stub.func), "a" (mmvalp)
                                      : "memory" );
         }