diff mbox

[5/8] x86emul: support TBM insns

Message ID 58790131020000780012FF5B@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Jan. 13, 2017, 3:32 p.m. UTC
Signed-off-by: Jan Beulich <jbeulich@suse.com>
x86emul: support TBM insns

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1244,6 +1244,234 @@ int main(int argc, char **argv)
         printf("okay\n");
     }
 
+    printf("%-40s", "Testing bextr $0x0a03,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(bextr_imm);
+#ifdef __x86_64__
+        decl_insn(bextr64_imm);
+#endif
+
+        asm volatile ( put_insn(bextr_imm, "bextr $0x0a03, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(bextr_imm);
+
+        *res        = 0xfedcba98;
+        regs.ecx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != ((*res >> 3) & 0x3ff) ||
+             *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr_imm) )
+            goto fail;
+        printf("okay\n");
+#ifdef __x86_64__
+        printf("%-40s", "Testing bextr $0x211e,(%r10),%r11...");
+
+        asm volatile ( put_insn(bextr64_imm, "bextr $0x211e, (%r10), %r11") );
+        set_insn(bextr64_imm);
+
+        res[0]      = 0x76543210;
+        res[1]      = 0xfedcba98;
+        regs.r10    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.r11 != (((unsigned long)(res[1] << 1) << 1) |
+                          (res[0] >> 30)) ||
+             res[0] != 0x76543210 || res[1] != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr64_imm) )
+            goto fail;
+        printf("okay\n");
+#endif
+    }
+    else
+        printf("skipped\n");
+
+    res[0]      = 0xfedcba98;
+    res[1]      = 0x01234567;
+    regs.edx    = (unsigned long)res;
+
+    printf("%-40s", "Testing blcfill 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcfill);
+
+        asm volatile ( put_insn(blcfill, "blcfill 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcfill);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) & res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcfill) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blci 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blci);
+
+        asm volatile ( put_insn(blci, "blci 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blci);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != (~(res[1] + 1) | res[1]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blci) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcic 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcic);
+
+        asm volatile ( put_insn(blcic, "blcic 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcic);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) & ~res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcic) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcmsk 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcmsk);
+
+        asm volatile ( put_insn(blcmsk, "blcmsk 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) ^ res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcs 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcs);
+
+        asm volatile ( put_insn(blcs, "blcs 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcs);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) | res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcs) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsfill (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blsfill);
+
+        asm volatile ( put_insn(blsfill, "blsfill (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsfill);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) | res[0]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blsfill) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsic (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blsic);
+
+        asm volatile ( put_insn(blsic, "blsic (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsic);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) | ~res[0]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blsic) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing t1mskc 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(t1mskc);
+
+        asm volatile ( put_insn(t1mskc, "t1mskc 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(t1mskc);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) | ~res[1]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(t1mskc) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing tzmsk (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(tzmsk);
+
+        asm volatile ( put_insn(tzmsk, "tzmsk (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(tzmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) & ~res[0]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(tzmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -125,6 +125,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 8)) != 0; \
 })
 
+#define cpu_has_tbm ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.c & (1U << 21)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1355,6 +1355,7 @@ static bool vcpu_has(
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
+#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
 #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
@@ -6014,6 +6015,85 @@ x86_emulate(
             asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
         break;
 
+    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blcfill r/m,r */
+        case 2: /* blsfill r/m,r */
+        case 3: /* blcs r/m,r */
+        case 4: /* tzmsk r/m,r */
+        case 5: /* blcic r/m,r */
+        case 6: /* blsic r/m,r */
+        case 7: /* t1mskc r/m,r */
+            host_and_vcpu_must_have(tbm);
+            break;
+        default:
+            goto cannot_emulate;
+        }
+
+    xop_09_rm_rv:
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
+
+        generate_exception_if(vex.l, EXC_UD);
+
+        buf[0] = 0x8f;
+        *pxop = vex;
+        pxop->b = 1;
+        pxop->r = 1;
+        pxop->reg = ~0; /* rAX */
+        buf[3] = b;
+        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
+        buf[5] = 0xc3;
+
+        dst.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                  &_regs, 0);
+        emulate_stub([dst] "=&a" (dst.val), "c" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
+    case X86EMUL_OPC(0x8f09, 0x02): /* XOP Grp2 */
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blcmsk r/m,r */
+        case 6: /* blci r/m,r */
+            host_and_vcpu_must_have(tbm);
+            goto xop_09_rm_rv;
+        }
+        goto cannot_emulate;
+
+    case X86EMUL_OPC(0x8f0a, 0x10): /* bextr imm,r/m,r */
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
+
+        host_and_vcpu_must_have(tbm);
+        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+
+        if ( ea.type == OP_REG )
+            src.val = *ea.reg;
+        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
+                                   ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+
+        buf[0] = 0x8f;
+        *pxop = vex;
+        pxop->b = 1;
+        pxop->r = 1;
+        buf[3] = b;
+        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
+        *(uint32_t *)(buf + 5) = imm1;
+        buf[9] = 0xc3;
+
+        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
     default:
         goto cannot_emulate;
     }
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -74,6 +74,7 @@
 #define cpu_has_eist		boot_cpu_has(X86_FEATURE_EIST)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
+#define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,

Comments

Andrew Cooper Jan. 13, 2017, 6:48 p.m. UTC | #1
On 13/01/17 15:32, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -1355,6 +1355,7 @@ static bool vcpu_has(
>  #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
>  #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
>  #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
> +#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
>  #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
>  #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
>  #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
> @@ -6014,6 +6015,85 @@ x86_emulate(
>              asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
>          break;
>  
> +    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */

Surely this calls for the introduction of X86EMUL_OPC_XOP_* to match
their VEX/EVEX counterparts?

~Andrew
Jan Beulich Jan. 16, 2017, 11:36 a.m. UTC | #2
>>> On 13.01.17 at 19:48, <andrew.cooper3@citrix.com> wrote:
> On 13/01/17 15:32, Jan Beulich wrote:
>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>> @@ -1355,6 +1355,7 @@ static bool vcpu_has(
>>  #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
>>  #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
>>  #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
>> +#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
>>  #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
>>  #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
>>  #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
>> @@ -6014,6 +6015,85 @@ x86_emulate(
>>              asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
>>          break;
>>  
>> +    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
> 
> Surely this calls for the introduction of X86EMUL_OPC_XOP_* to match
> their VEX/EVEX counterparts?

Do really you think

    case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */

or

    case X86EMUL_OPC_XOP09(0x01): /* XOP Grp1 */

are any better? Iirc you had asked this same question already
when the opcode canonicalization patch was under review. The
situation hasn't changed: The nothing/VEX/EVEX distinction is
needed because the same base opcode may have (slightly or
significantly) different meaning depending on which of the three
(or four, if we also considered MVEX) encodings are being used.
There's no such duplicate meaning for XOP encodings.

Jan
Andrew Cooper Jan. 16, 2017, 2:52 p.m. UTC | #3
On 16/01/17 11:36, Jan Beulich wrote:
>>>> On 13.01.17 at 19:48, <andrew.cooper3@citrix.com> wrote:
>> On 13/01/17 15:32, Jan Beulich wrote:
>>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>>> @@ -1355,6 +1355,7 @@ static bool vcpu_has(
>>>  #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
>>>  #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
>>>  #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
>>> +#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
>>>  #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
>>>  #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
>>>  #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
>>> @@ -6014,6 +6015,85 @@ x86_emulate(
>>>              asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
>>>          break;
>>>  
>>> +    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
>> Surely this calls for the introduction of X86EMUL_OPC_XOP_* to match
>> their VEX/EVEX counterparts?
> Do really you think
>
>     case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */
>
> or
>
>     case X86EMUL_OPC_XOP09(0x01): /* XOP Grp1 */
>
> are any better?

Either would be better, as it avoids the 0x8f magic prefix.

> Iirc you had asked this same question already
> when the opcode canonicalization patch was under review. The
> situation hasn't changed: The nothing/VEX/EVEX distinction is
> needed because the same base opcode may have (slightly or
> significantly) different meaning depending on which of the three
> (or four, if we also considered MVEX) encodings are being used.

MVEX is the precursor to EVEX, and as far as I can tell, was only
implemented on the Knights-Corner co-processor, now superseded by
Knights-Landing processor which uses EVEX.

There are a number of other reasons why Xen doesn't currently boot on
Knights-Corner (whereas its functions fine on Kights-Landing), so unless
someone has a specific usecase in mind and is willing to spend the
effort, I don't think it is worth our effort at the moment.

> There's no such duplicate meaning for XOP encodings.

How have you come to this conclusion?  The XOP map spaces are separate
to the main encodings, so the same primary opcode byte does have
different meanings depending on whether it is XOP encoded or not.

~Andrew
Jan Beulich Jan. 16, 2017, 3:45 p.m. UTC | #4
>>> On 16.01.17 at 15:52, <andrew.cooper3@citrix.com> wrote:
> On 16/01/17 11:36, Jan Beulich wrote:
>>>>> On 13.01.17 at 19:48, <andrew.cooper3@citrix.com> wrote:
>>> On 13/01/17 15:32, Jan Beulich wrote:
>>>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>>>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>>>> @@ -1355,6 +1355,7 @@ static bool vcpu_has(
>>>>  #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
>>>>  #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
>>>>  #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
>>>> +#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
>>>>  #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
>>>>  #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
>>>>  #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
>>>> @@ -6014,6 +6015,85 @@ x86_emulate(
>>>>              asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
>>>>          break;
>>>>  
>>>> +    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
>>> Surely this calls for the introduction of X86EMUL_OPC_XOP_* to match
>>> their VEX/EVEX counterparts?
>> Do really you think
>>
>>     case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */
>>
>> or
>>
>>     case X86EMUL_OPC_XOP09(0x01): /* XOP Grp1 */
>>
>> are any better?
> 
> Either would be better, as it avoids the 0x8f magic prefix.

Well, okay then.

>> Iirc you had asked this same question already
>> when the opcode canonicalization patch was under review. The
>> situation hasn't changed: The nothing/VEX/EVEX distinction is
>> needed because the same base opcode may have (slightly or
>> significantly) different meaning depending on which of the three
>> (or four, if we also considered MVEX) encodings are being used.
> 
> MVEX is the precursor to EVEX, and as far as I can tell, was only
> implemented on the Knights-Corner co-processor, now superseded by
> Knights-Landing processor which uses EVEX.
> 
> There are a number of other reasons why Xen doesn't currently boot on
> Knights-Corner (whereas its functions fine on Kights-Landing), so unless
> someone has a specific usecase in mind and is willing to spend the
> effort, I don't think it is worth our effort at the moment.

I fully agree.

>> There's no such duplicate meaning for XOP encodings.
> 
> How have you come to this conclusion?  The XOP map spaces are separate
> to the main encodings, so the same primary opcode byte does have
> different meanings depending on whether it is XOP encoded or not.

That's not the duplicate meaning I was referring to. What I've tried
to point at is that e.g. ADDPS and VADDPS share their encoding, and
are distinguished only by non-VEX, VEX, or EVEX. There's nothing
equivalent for XOP.

Jan
diff mbox

Patch

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1244,6 +1244,234 @@  int main(int argc, char **argv)
         printf("okay\n");
     }
 
+    printf("%-40s", "Testing bextr $0x0a03,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(bextr_imm);
+#ifdef __x86_64__
+        decl_insn(bextr64_imm);
+#endif
+
+        asm volatile ( put_insn(bextr_imm, "bextr $0x0a03, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(bextr_imm);
+
+        *res        = 0xfedcba98;
+        regs.ecx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != ((*res >> 3) & 0x3ff) ||
+             *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr_imm) )
+            goto fail;
+        printf("okay\n");
+#ifdef __x86_64__
+        printf("%-40s", "Testing bextr $0x211e,(%r10),%r11...");
+
+        asm volatile ( put_insn(bextr64_imm, "bextr $0x211e, (%r10), %r11") );
+        set_insn(bextr64_imm);
+
+        res[0]      = 0x76543210;
+        res[1]      = 0xfedcba98;
+        regs.r10    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.r11 != (((unsigned long)(res[1] << 1) << 1) |
+                          (res[0] >> 30)) ||
+             res[0] != 0x76543210 || res[1] != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bextr64_imm) )
+            goto fail;
+        printf("okay\n");
+#endif
+    }
+    else
+        printf("skipped\n");
+
+    res[0]      = 0xfedcba98;
+    res[1]      = 0x01234567;
+    regs.edx    = (unsigned long)res;
+
+    printf("%-40s", "Testing blcfill 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcfill);
+
+        asm volatile ( put_insn(blcfill, "blcfill 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcfill);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) & res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcfill) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blci 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blci);
+
+        asm volatile ( put_insn(blci, "blci 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blci);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != (~(res[1] + 1) | res[1]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blci) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcic 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcic);
+
+        asm volatile ( put_insn(blcic, "blcic 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcic);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) & ~res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcic) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcmsk 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcmsk);
+
+        asm volatile ( put_insn(blcmsk, "blcmsk 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) ^ res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blcs 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blcs);
+
+        asm volatile ( put_insn(blcs, "blcs 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blcs);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) | res[1]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(blcs) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsfill (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blsfill);
+
+        asm volatile ( put_insn(blsfill, "blsfill (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsfill);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) | res[0]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blsfill) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing blsic (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(blsic);
+
+        asm volatile ( put_insn(blsic, "blsic (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(blsic);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) | ~res[0]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(blsic) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing t1mskc 4(%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(t1mskc);
+
+        asm volatile ( put_insn(t1mskc, "t1mskc 4(%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(t1mskc);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[1] != 0x01234567 ||
+             regs.ecx != ((res[1] + 1) | ~res[1]) ||
+             (regs.eflags & 0xfeb) != 0x282 || !check_eip(t1mskc) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing tzmsk (%edx),%ecx...");
+    if ( stack_exec && cpu_has_tbm )
+    {
+        decl_insn(tzmsk);
+
+        asm volatile ( put_insn(tzmsk, "tzmsk (%0), %%ecx")
+                       :: "d" (NULL) );
+        set_insn(tzmsk);
+
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || res[0] != 0xfedcba98 ||
+             regs.ecx != ((res[0] - 1) & ~res[0]) ||
+             (regs.eflags & 0xfeb) != 0x202 || !check_eip(tzmsk) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -125,6 +125,12 @@  static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 8)) != 0; \
 })
 
+#define cpu_has_tbm ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.c & (1U << 21)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1355,6 +1355,7 @@  static bool vcpu_has(
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
+#define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
 #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
@@ -6014,6 +6015,85 @@  x86_emulate(
             asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
         break;
 
+    case X86EMUL_OPC(0x8f09, 0x01): /* XOP Grp1 */
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blcfill r/m,r */
+        case 2: /* blsfill r/m,r */
+        case 3: /* blcs r/m,r */
+        case 4: /* tzmsk r/m,r */
+        case 5: /* blcic r/m,r */
+        case 6: /* blsic r/m,r */
+        case 7: /* t1mskc r/m,r */
+            host_and_vcpu_must_have(tbm);
+            break;
+        default:
+            goto cannot_emulate;
+        }
+
+    xop_09_rm_rv:
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
+
+        generate_exception_if(vex.l, EXC_UD);
+
+        buf[0] = 0x8f;
+        *pxop = vex;
+        pxop->b = 1;
+        pxop->r = 1;
+        pxop->reg = ~0; /* rAX */
+        buf[3] = b;
+        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
+        buf[5] = 0xc3;
+
+        dst.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
+                                  &_regs, 0);
+        emulate_stub([dst] "=&a" (dst.val), "c" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
+    case X86EMUL_OPC(0x8f09, 0x02): /* XOP Grp2 */
+        switch ( modrm_reg & 7 )
+        {
+        case 1: /* blcmsk r/m,r */
+        case 6: /* blci r/m,r */
+            host_and_vcpu_must_have(tbm);
+            goto xop_09_rm_rv;
+        }
+        goto cannot_emulate;
+
+    case X86EMUL_OPC(0x8f0a, 0x10): /* bextr imm,r/m,r */
+    {
+        uint8_t *buf = get_stub(stub);
+        typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
+
+        host_and_vcpu_must_have(tbm);
+        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+
+        if ( ea.type == OP_REG )
+            src.val = *ea.reg;
+        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
+                                   ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+
+        buf[0] = 0x8f;
+        *pxop = vex;
+        pxop->b = 1;
+        pxop->r = 1;
+        buf[3] = b;
+        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
+        *(uint32_t *)(buf + 5) = imm1;
+        buf[9] = 0xc3;
+
+        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val));
+
+        put_stub(stub);
+        break;
+    }
+
     default:
         goto cannot_emulate;
     }
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -74,6 +74,7 @@ 
 #define cpu_has_eist		boot_cpu_has(X86_FEATURE_EIST)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
+#define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,