Message ID | 20160801025231.7211-2-mdontu@bitdefender.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
>>> On 01.08.16 at 04:52, <mdontu@bitdefender.com> wrote: > Found that Windows driver was using a SSE2 instruction MOVD. Nevertheless the title shouldn't say "SSE2", as you also add AVX ones. Nor are you limiting things to MM register sources. Hence I think the title needs changing. > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -204,7 +204,7 @@ static uint8_t twobyte_table[256] = { > /* 0x60 - 0x6F */ > 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, > /* 0x70 - 0x7F */ > - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, > + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, ImplicitOps|ModRM, What about the moves in the other direction (opcode 6E)? > @@ -4409,6 +4409,10 @@ x86_emulate( > case 0x6f: /* movq mm/m64,mm */ > /* {,v}movdq{a,u} xmm/m128,xmm */ > /* vmovdq{a,u} ymm/m256,ymm */ > + case 0x7e: /* movd mm,r/m32 */ > + /* movq mm,r/m64 */ > + /* {,v}movd xmm,r/m32 */ > + /* {,v}movq xmm,r/m64 */ Same problem as in the other patch - 7E prefixed by F3 has yet another meaning ({,v}movq xmm,xmm/m64). > @@ -4432,7 +4436,17 @@ x86_emulate( > host_and_vcpu_must_have(sse2); > buf[0] = 0x66; /* SSE */ > get_fpu(X86EMUL_FPU_xmm, &fic); > - ea.bytes = (b == 0xd6 ? 8 : 16); > + switch ( b ) > + { > + case 0x7e: > + ea.bytes = 4; > + break; > + case 0xd6: > + ea.bytes = 8; > + break; > + default: > + ea.bytes = 16; > + } Please don't omit the final break statement (also below). > @@ -4452,7 +4466,17 @@ x86_emulate( > ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); > host_and_vcpu_must_have(avx); > get_fpu(X86EMUL_FPU_ymm, &fic); > - ea.bytes = (b == 0xd6 ? 8 : (16 << vex.l)); > + switch ( b ) > + { > + case 0x7e: > + ea.bytes = 4; > + break; > + case 0xd6: > + ea.bytes = 8; > + break; > + default: > + ea.bytes = 16 << vex.l; > + } > } > if ( ea.type == OP_MEM ) > { > @@ -4468,6 +4492,14 @@ x86_emulate( > vex.b = 1; > buf[4] &= 0x38; > } > + else if ( b == 0x7e ) > + { > + /* convert the GPR destination to (%rAX) */ > + *((unsigned long *)&mmvalp) = (unsigned long)ea.reg; Hmm, you write the original register value to the memory location you want to use, but the instruction means to _write_ to the register. Nor would this mechanism, even if used properly, clear the high half of the GPR for a 32-bit destination. Also - comment style (admittedly the pre-existing nearby one is wrong too). Jan
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index 44de3b6..9f89ada 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -204,7 +204,7 @@ static uint8_t twobyte_table[256] = { /* 0x60 - 0x6F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, /* 0x70 - 0x7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, ImplicitOps|ModRM, /* 0x80 - 0x87 */ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, @@ -4409,6 +4409,10 @@ x86_emulate( case 0x6f: /* movq mm/m64,mm */ /* {,v}movdq{a,u} xmm/m128,xmm */ /* vmovdq{a,u} ymm/m256,ymm */ + case 0x7e: /* movd mm,r/m32 */ + /* movq mm,r/m64 */ + /* {,v}movd xmm,r/m32 */ + /* {,v}movq xmm,r/m64 */ case 0x7f: /* movq mm,mm/m64 */ /* {,v}movdq{a,u} xmm,xmm/m128 */ /* vmovdq{a,u} ymm,ymm/m256 */ @@ -4432,7 +4436,17 @@ x86_emulate( host_and_vcpu_must_have(sse2); buf[0] = 0x66; /* SSE */ get_fpu(X86EMUL_FPU_xmm, &fic); - ea.bytes = (b == 0xd6 ? 8 : 16); + switch ( b ) + { + case 0x7e: + ea.bytes = 4; + break; + case 0xd6: + ea.bytes = 8; + break; + default: + ea.bytes = 16; + } break; case vex_none: if ( b != 0xe7 ) @@ -4452,7 +4466,17 @@ x86_emulate( ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); host_and_vcpu_must_have(avx); get_fpu(X86EMUL_FPU_ymm, &fic); - ea.bytes = (b == 0xd6 ? 8 : (16 << vex.l)); + switch ( b ) + { + case 0x7e: + ea.bytes = 4; + break; + case 0xd6: + ea.bytes = 8; + break; + default: + ea.bytes = 16 << vex.l; + } } if ( ea.type == OP_MEM ) { @@ -4468,6 +4492,14 @@ x86_emulate( vex.b = 1; buf[4] &= 0x38; } + else if ( b == 0x7e ) + { + /* convert the GPR destination to (%rAX) */ + *((unsigned long *)&mmvalp) = (unsigned long)ea.reg; + rex_prefix &= ~REX_B; + vex.b = 1; + buf[4] &= 0x38; + } if ( !rc ) { copy_REX_VEX(buf, rex_prefix, vex);