diff mbox

[2/6] x86emul: reduce CMPXCHG{8, 16}B footprint and casting

Message ID 58500D9402000078001289E5@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Dec. 13, 2016, 2:02 p.m. UTC
Re-use an existing stack variable (reducing stack footprint, which also
results in smaller code due to some stack accesses no longer needing a
32-bit displacement), at once using a union instead of casts. Also
switch to rex_prefix based conditionals instead of op_bytes ones.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
x86emul: reduce CMPXCHG{8,16}B footprint and casting

Re-use an existing stack variable (reducing stack footprint, which also
results in smaller code due to some stack accesses no longer needing a
32-bit displacement), at once using a union instead of casts. Also
switch to rex_prefix based conditionals instead of op_bytes ones.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -31,6 +31,11 @@
 #define likely(x)   __builtin_expect(!!(x), true)
 #define unlikely(x) __builtin_expect(!!(x), false)
 
+#define container_of(ptr, type, member) ({             \
+    typeof(((type *)0)->member) *mptr__ = (ptr);       \
+    (type *)((char *)mptr__ - offsetof(type, member)); \
+})
+
 #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63))
 
 #define MMAP_SZ 16384
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -5282,11 +5282,14 @@ x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
-        unsigned long old[2], exp[2], new[2];
+        union {
+            uint32_t u32[2];
+            uint64_t u64[2];
+        } *old, *aux;
 
         generate_exception_if((modrm_reg & 7) != 1, EXC_UD);
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
-        if ( op_bytes == 8 )
+        if ( rex_prefix & REX_W )
         {
             host_and_vcpu_must_have(cx16);
             op_bytes = 16;
@@ -5295,35 +5298,52 @@ x86_emulate(
         else
             op_bytes = 8;
 
+        old = container_of(&mmvalp->ymm[0], typeof(*old), u64[0]);
+        aux = container_of(&mmvalp->ymm[2], typeof(*old), u64[0]);
+
         /* Get actual old value. */
         if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes,
-                             ctxt)) != 0 )
+                             ctxt)) != X86EMUL_OKAY )
             goto done;
 
-        /* Get expected and proposed values. */
-        if ( op_bytes == 8 )
+        /* Get expected value. */
+        if ( !(rex_prefix & REX_W) )
         {
-            ((uint32_t *)exp)[0] = _regs.eax; ((uint32_t *)exp)[1] = _regs.edx;
-            ((uint32_t *)new)[0] = _regs.ebx; ((uint32_t *)new)[1] = _regs.ecx;
+            aux->u32[0] = _regs.eax;
+            aux->u32[1] = _regs.edx;
         }
         else
         {
-            exp[0] = _regs.eax; exp[1] = _regs.edx;
-            new[0] = _regs.ebx; new[1] = _regs.ecx;
+            aux->u64[0] = _regs.eax;
+            aux->u64[1] = _regs.edx;
         }
 
-        if ( memcmp(old, exp, op_bytes) )
+        if ( memcmp(old, aux, op_bytes) )
         {
             /* Expected != actual: store actual to rDX:rAX and clear ZF. */
-            _regs.eax = (op_bytes == 8) ? ((uint32_t *)old)[0] : old[0];
-            _regs.edx = (op_bytes == 8) ? ((uint32_t *)old)[1] : old[1];
+            _regs.eax = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
+            _regs.edx = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
             _regs.eflags &= ~EFLG_ZF;
         }
         else
         {
-            /* Expected == actual: attempt atomic cmpxchg and set ZF. */
-            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
-                                    new, op_bytes, ctxt)) != 0 )
+            /*
+             * Expected == actual: Get proposed value, attempt atomic cmpxchg
+             * and set ZF.
+             */
+            if ( !(rex_prefix & REX_W) )
+            {
+                aux->u32[0] = _regs.ebx;
+                aux->u32[1] = _regs.ecx;
+            }
+            else
+            {
+                aux->u64[0] = _regs.ebx;
+                aux->u64[1] = _regs.ecx;
+            }
+
+            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
+                                    op_bytes, ctxt)) != X86EMUL_OKAY )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }

Comments

Andrew Cooper Dec. 13, 2016, 2:26 p.m. UTC | #1
On 13/12/16 14:02, Jan Beulich wrote:
> Re-use an existing stack variable (reducing stack footprint, which also
> results in smaller code due to some stack accesses no longer needing a
> 32-bit displacement), at once using a union instead of casts. Also
> switch to rex_prefix based conditionals instead of op_bytes ones.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/tools/tests/x86_emulator/x86_emulate.h
> +++ b/tools/tests/x86_emulator/x86_emulate.h
> @@ -31,6 +31,11 @@
>  #define likely(x)   __builtin_expect(!!(x), true)
>  #define unlikely(x) __builtin_expect(!!(x), false)
>  
> +#define container_of(ptr, type, member) ({             \
> +    typeof(((type *)0)->member) *mptr__ = (ptr);       \
> +    (type *)((char *)mptr__ - offsetof(type, member)); \
> +})
> +

Please could we use __builtin_containerof()?  I believe It is available
on all of the compilers we support.

This particular construct causes UBSAN to have a fit.

>  #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63))
>  
>  #define MMAP_SZ 16384
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -5282,11 +5282,14 @@ x86_emulate(
>          break;
>  
>      case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
> -        unsigned long old[2], exp[2], new[2];
> +        union {
> +            uint32_t u32[2];
> +            uint64_t u64[2];
> +        } *old, *aux;
>  
>          generate_exception_if((modrm_reg & 7) != 1, EXC_UD);
>          generate_exception_if(ea.type != OP_MEM, EXC_UD);
> -        if ( op_bytes == 8 )
> +        if ( rex_prefix & REX_W )
>          {
>              host_and_vcpu_must_have(cx16);
>              op_bytes = 16;
> @@ -5295,35 +5298,52 @@ x86_emulate(
>          else
>              op_bytes = 8;
>  
> +        old = container_of(&mmvalp->ymm[0], typeof(*old), u64[0]);
> +        aux = container_of(&mmvalp->ymm[2], typeof(*old), u64[0]);

This should be typeof(*aux), although it makes no difference at the moment.

> +
>          /* Get actual old value. */
>          if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes,
> -                             ctxt)) != 0 )
> +                             ctxt)) != X86EMUL_OKAY )
>              goto done;
>  
> -        /* Get expected and proposed values. */
> -        if ( op_bytes == 8 )
> +        /* Get expected value. */
> +        if ( !(rex_prefix & REX_W) )
>          {
> -            ((uint32_t *)exp)[0] = _regs.eax; ((uint32_t *)exp)[1] = _regs.edx;
> -            ((uint32_t *)new)[0] = _regs.ebx; ((uint32_t *)new)[1] = _regs.ecx;
> +            aux->u32[0] = _regs.eax;
> +            aux->u32[1] = _regs.edx;
>          }
>          else
>          {
> -            exp[0] = _regs.eax; exp[1] = _regs.edx;
> -            new[0] = _regs.ebx; new[1] = _regs.ecx;
> +            aux->u64[0] = _regs.eax;
> +            aux->u64[1] = _regs.edx;
>          }
>  
> -        if ( memcmp(old, exp, op_bytes) )
> +        if ( memcmp(old, aux, op_bytes) )
>          {
>              /* Expected != actual: store actual to rDX:rAX and clear ZF. */
> -            _regs.eax = (op_bytes == 8) ? ((uint32_t *)old)[0] : old[0];
> -            _regs.edx = (op_bytes == 8) ? ((uint32_t *)old)[1] : old[1];
> +            _regs.eax = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
> +            _regs.edx = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
>              _regs.eflags &= ~EFLG_ZF;

This clearing of ZF should be unconditional.  I think this is a latent
bug, but if the cmpxchg() fails, we would exit having failed the xchg,
but leaving ZF stale.

~Andrew

>          }
>          else
>          {
> -            /* Expected == actual: attempt atomic cmpxchg and set ZF. */
> -            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
> -                                    new, op_bytes, ctxt)) != 0 )
> +            /*
> +             * Expected == actual: Get proposed value, attempt atomic cmpxchg
> +             * and set ZF.
> +             */
> +            if ( !(rex_prefix & REX_W) )
> +            {
> +                aux->u32[0] = _regs.ebx;
> +                aux->u32[1] = _regs.ecx;
> +            }
> +            else
> +            {
> +                aux->u64[0] = _regs.ebx;
> +                aux->u64[1] = _regs.ecx;
> +            }
> +
> +            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
> +                                    op_bytes, ctxt)) != X86EMUL_OKAY )
>                  goto done;
>              _regs.eflags |= EFLG_ZF;
>          }
>
>
>
Jan Beulich Dec. 13, 2016, 3:08 p.m. UTC | #2
>>> On 13.12.16 at 15:26, <andrew.cooper3@citrix.com> wrote:
> On 13/12/16 14:02, Jan Beulich wrote:
>> --- a/tools/tests/x86_emulator/x86_emulate.h
>> +++ b/tools/tests/x86_emulator/x86_emulate.h
>> @@ -31,6 +31,11 @@
>>  #define likely(x)   __builtin_expect(!!(x), true)
>>  #define unlikely(x) __builtin_expect(!!(x), false)
>>  
>> +#define container_of(ptr, type, member) ({             \
>> +    typeof(((type *)0)->member) *mptr__ = (ptr);       \
>> +    (type *)((char *)mptr__ - offsetof(type, member)); \
>> +})
>> +
> 
> Please could we use __builtin_containerof()?  I believe It is available
> on all of the compilers we support.

Where have you found reference to this? I can't find it in the doc
(nor anything similar, consulting the index), and grep-ing the gcc
sources doesn't reveal anything either.

>> @@ -5295,35 +5298,52 @@ x86_emulate(
>>          else
>>              op_bytes = 8;
>>  
>> +        old = container_of(&mmvalp->ymm[0], typeof(*old), u64[0]);
>> +        aux = container_of(&mmvalp->ymm[2], typeof(*old), u64[0]);
> 
> This should be typeof(*aux), although it makes no difference at the moment.

Oh, right - copy-and-paste mistake. Fixed.

>> -        if ( memcmp(old, exp, op_bytes) )
>> +        if ( memcmp(old, aux, op_bytes) )
>>          {
>>              /* Expected != actual: store actual to rDX:rAX and clear ZF. */
>> -            _regs.eax = (op_bytes == 8) ? ((uint32_t *)old)[0] : old[0];
>> -            _regs.edx = (op_bytes == 8) ? ((uint32_t *)old)[1] : old[1];
>> +            _regs.eax = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
>> +            _regs.edx = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
>>              _regs.eflags &= ~EFLG_ZF;
> 
> This clearing of ZF should be unconditional.  I think this is a latent
> bug, but if the cmpxchg() fails, we would exit having failed the xchg,
> but leaving ZF stale.

First of all - this request of yours is unrelated to this patch: The
conditions under which ZF gets altered don't get changed here
at all. And then this comment or yours is similar to one you gave
elsewhere not all that long ago; my answer is the same here -
we don't commit _regs.eflags to ctxt->regs if anything fails. And
namely when ops->cmpxchg() returns RETRY, then we indeed
want to retry with the flag unaltered.

Jan
diff mbox

Patch

--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -31,6 +31,11 @@ 
 #define likely(x)   __builtin_expect(!!(x), true)
 #define unlikely(x) __builtin_expect(!!(x), false)
 
+#define container_of(ptr, type, member) ({             \
+    typeof(((type *)0)->member) *mptr__ = (ptr);       \
+    (type *)((char *)mptr__ - offsetof(type, member)); \
+})
+
 #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63))
 
 #define MMAP_SZ 16384
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -5282,11 +5282,14 @@  x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
-        unsigned long old[2], exp[2], new[2];
+        union {
+            uint32_t u32[2];
+            uint64_t u64[2];
+        } *old, *aux;
 
         generate_exception_if((modrm_reg & 7) != 1, EXC_UD);
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
-        if ( op_bytes == 8 )
+        if ( rex_prefix & REX_W )
         {
             host_and_vcpu_must_have(cx16);
             op_bytes = 16;
@@ -5295,35 +5298,52 @@  x86_emulate(
         else
             op_bytes = 8;
 
+        old = container_of(&mmvalp->ymm[0], typeof(*old), u64[0]);
+        aux = container_of(&mmvalp->ymm[2], typeof(*old), u64[0]);
+
         /* Get actual old value. */
         if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes,
-                             ctxt)) != 0 )
+                             ctxt)) != X86EMUL_OKAY )
             goto done;
 
-        /* Get expected and proposed values. */
-        if ( op_bytes == 8 )
+        /* Get expected value. */
+        if ( !(rex_prefix & REX_W) )
         {
-            ((uint32_t *)exp)[0] = _regs.eax; ((uint32_t *)exp)[1] = _regs.edx;
-            ((uint32_t *)new)[0] = _regs.ebx; ((uint32_t *)new)[1] = _regs.ecx;
+            aux->u32[0] = _regs.eax;
+            aux->u32[1] = _regs.edx;
         }
         else
         {
-            exp[0] = _regs.eax; exp[1] = _regs.edx;
-            new[0] = _regs.ebx; new[1] = _regs.ecx;
+            aux->u64[0] = _regs.eax;
+            aux->u64[1] = _regs.edx;
         }
 
-        if ( memcmp(old, exp, op_bytes) )
+        if ( memcmp(old, aux, op_bytes) )
         {
             /* Expected != actual: store actual to rDX:rAX and clear ZF. */
-            _regs.eax = (op_bytes == 8) ? ((uint32_t *)old)[0] : old[0];
-            _regs.edx = (op_bytes == 8) ? ((uint32_t *)old)[1] : old[1];
+            _regs.eax = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
+            _regs.edx = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
             _regs.eflags &= ~EFLG_ZF;
         }
         else
         {
-            /* Expected == actual: attempt atomic cmpxchg and set ZF. */
-            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
-                                    new, op_bytes, ctxt)) != 0 )
+            /*
+             * Expected == actual: Get proposed value, attempt atomic cmpxchg
+             * and set ZF.
+             */
+            if ( !(rex_prefix & REX_W) )
+            {
+                aux->u32[0] = _regs.ebx;
+                aux->u32[1] = _regs.ecx;
+            }
+            else
+            {
+                aux->u64[0] = _regs.ebx;
+                aux->u64[1] = _regs.ecx;
+            }
+
+            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
+                                    op_bytes, ctxt)) != X86EMUL_OKAY )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }