diff mbox

[4/6] x86emul: reduce FPU handling code size

Message ID 5846D5800200007800125C23@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Dec. 6, 2016, 2:13 p.m. UTC
Pulling out the {get,put}_fpu() invocations from individual emulation
paths leads to a couple of kb code size reduction in my builds. Note
that this is fine exception-wise:
- #UD and #NM have implementation defined order relative to one
  another,
- data read #GP/#SS/#PF now properly are delivered after #NM/#UD.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
x86emul: reduce FPU handling code size

Pulling out the {get,put}_fpu() invocations from individual emulation
paths leads to a couple of kb code size reduction in my builds. Note
that this is fine exception-wise:
- #UD and #NM have implementation defined order relative to one
  another,
- data read #GP/#SS/#PF now properly are delivered after #NM/#UD.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -874,59 +874,44 @@ do {
 } while (0)
 
 #define emulate_fpu_insn(_op)                           \
-do{ struct fpu_insn_ctxt fic;                           \
-    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
     asm volatile (                                      \
         "movb $2f-1f,%0 \n"                             \
         "1: " _op "     \n"                             \
         "2:             \n"                             \
-        : "=m" (fic.insn_bytes) : : "memory" );         \
-    put_fpu(&fic);                                      \
-} while (0)
+        : "=m" (fic.insn_bytes) : : "memory" )
 
 #define emulate_fpu_insn_memdst(_op, _arg)              \
-do{ struct fpu_insn_ctxt fic;                           \
-    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
     asm volatile (                                      \
         "movb $2f-1f,%0 \n"                             \
         "1: " _op " %1  \n"                             \
         "2:             \n"                             \
         : "=m" (fic.insn_bytes), "=m" (_arg)            \
-        : : "memory" );                                 \
-    put_fpu(&fic);                                      \
-} while (0)
+        : : "memory" )
 
 #define emulate_fpu_insn_memsrc(_op, _arg)              \
-do{ struct fpu_insn_ctxt fic;                           \
-    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
     asm volatile (                                      \
         "movb $2f-1f,%0 \n"                             \
         "1: " _op " %1  \n"                             \
         "2:             \n"                             \
         : "=m" (fic.insn_bytes)                         \
-        : "m" (_arg) : "memory" );                      \
-    put_fpu(&fic);                                      \
-} while (0)
+        : "m" (_arg) : "memory" )
 
 #define emulate_fpu_insn_stub(_bytes...)                                \
 do {                                                                    \
     uint8_t *buf = get_stub(stub);                                      \
     unsigned int _nr = sizeof((uint8_t[]){ _bytes });                   \
-    struct fpu_insn_ctxt fic = { .insn_bytes = _nr };                   \
+    fic.insn_bytes = _nr;                                               \
     memcpy(buf, ((uint8_t[]){ _bytes, 0xc3 }), _nr + 1);                \
-    get_fpu(X86EMUL_FPU_fpu, &fic);                                     \
     stub.func();                                                        \
-    put_fpu(&fic);                                                      \
     put_stub(stub);                                                     \
 } while (0)
 
 #define emulate_fpu_insn_stub_eflags(bytes...)                          \
 do {                                                                    \
     unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
-    struct fpu_insn_ctxt fic_ = { .insn_bytes = nr_ };                  \
     unsigned long tmp_;                                                 \
+    fic.insn_bytes = nr_;                                               \
     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
-    get_fpu(X86EMUL_FPU_fpu, &fic_);                                    \
     asm volatile ( _PRE_EFLAGS("[eflags]", "[mask]", "[tmp]")           \
                    "call *%[func];"                                     \
                    _POST_EFLAGS("[eflags]", "[mask]", "[tmp]")          \
@@ -934,7 +919,6 @@ do {
                      [tmp] "=&r" (tmp_)                                 \
                    : [func] "rm" (stub.func),                           \
                      [mask] "i" (EFLG_ZF|EFLG_PF|EFLG_CF) );            \
-    put_fpu(&fic_);                                                     \
     put_stub(stub);                                                     \
 } while (0)
 
@@ -2579,6 +2563,7 @@ x86_emulate(
     struct operand src = { .reg = PTR_POISON };
     struct operand dst = { .reg = PTR_POISON };
     enum x86_swint_type swint_type;
+    struct fpu_insn_ctxt fic;
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
 
@@ -3272,15 +3257,12 @@ x86_emulate(
         break;
 
     case 0x9b:  /* wait/fwait */
-    {
-        struct fpu_insn_ctxt fic = { .insn_bytes = 1 };
-
+        fic.insn_bytes = 1;
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_wait, &fic);
         asm volatile ( "fwait" ::: "memory" );
         put_fpu(&fic);
         break;
-    }
 
     case 0x9c: /* pushf */
         generate_exception_if((_regs.eflags & EFLG_VM) &&
@@ -3670,6 +3652,7 @@ x86_emulate(
 
     case 0xd8: /* FPU 0xd8 */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fadd %stN,%st */
@@ -3715,10 +3698,12 @@ x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xd9: /* FPU 0xd9 */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xfb: /* fsincos */
@@ -3795,10 +3780,12 @@ x86_emulate(
                 generate_exception_if(true, EXC_UD);
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xda: /* FPU 0xda */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fcmovb %stN */
@@ -3844,10 +3831,12 @@ x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xdb: /* FPU 0xdb */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fcmovnb %stN */
@@ -3907,10 +3896,12 @@ x86_emulate(
                 generate_exception_if(true, EXC_UD);
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xdc: /* FPU 0xdc */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fadd %st,%stN */
@@ -3956,10 +3947,12 @@ x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xdd: /* FPU 0xdd */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* ffree %stN */
@@ -4006,10 +3999,12 @@ x86_emulate(
                 generate_exception_if(true, EXC_UD);
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xde: /* FPU 0xde */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* faddp %stN */
@@ -4052,10 +4047,12 @@ x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xdf: /* FPU 0xdf */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xe0:
@@ -4125,6 +4122,7 @@ x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
@@ -4945,8 +4943,8 @@ x86_emulate(
     case X86EMUL_OPC_VEX_F2(0x0f, 0x11): /* vmovsd xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
-        struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
 
+        fic.insn_bytes = 5;
         buf[0] = 0x3e;
         buf[1] = 0x3e;
         buf[2] = 0x0f;
@@ -5215,8 +5213,8 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
-        struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
 
+        fic.insn_bytes = 5;
         buf[0] = 0x3e;
         buf[1] = 0x3e;
         buf[2] = 0x0f;

Comments

Andrew Cooper Dec. 7, 2016, 3:06 p.m. UTC | #1
On 06/12/16 14:13, Jan Beulich wrote:
> @@ -3670,6 +3652,7 @@ x86_emulate(
>  
>      case 0xd8: /* FPU 0xd8 */
>          host_and_vcpu_must_have(fpu);
> +        get_fpu(X86EMUL_FPU_fpu, &fic);
>          switch ( modrm )
>          {
>          case 0xc0 ... 0xc7: /* fadd %stN,%st */
> @@ -3715,10 +3698,12 @@ x86_emulate(
>                  break;
>              }
>          }
> +        put_fpu(&fic);
>          break;

This repositioning does mean that we might skip the put_fpu() call,
although we still retain the catch-all _put_fpu().

I think this is still safe as we only skip this put_fpu() in cases where
we didn't emulate an instruction, and there isn't the risk of missing a
pending FPU exception.

~Andrew
Jan Beulich Dec. 7, 2016, 3:16 p.m. UTC | #2
>>> On 07.12.16 at 16:06, <andrew.cooper3@citrix.com> wrote:
> On 06/12/16 14:13, Jan Beulich wrote:
>> @@ -3670,6 +3652,7 @@ x86_emulate(
>>  
>>      case 0xd8: /* FPU 0xd8 */
>>          host_and_vcpu_must_have(fpu);
>> +        get_fpu(X86EMUL_FPU_fpu, &fic);
>>          switch ( modrm )
>>          {
>>          case 0xc0 ... 0xc7: /* fadd %stN,%st */
>> @@ -3715,10 +3698,12 @@ x86_emulate(
>>                  break;
>>              }
>>          }
>> +        put_fpu(&fic);
>>          break;
> 
> This repositioning does mean that we might skip the put_fpu() call,
> although we still retain the catch-all _put_fpu().
> 
> I think this is still safe as we only skip this put_fpu() in cases where
> we didn't emulate an instruction, and there isn't the risk of missing a
> pending FPU exception.

Correct - the catch-all one is sufficient for all cases.

Jan
Andrew Cooper Dec. 7, 2016, 3:21 p.m. UTC | #3
On 07/12/16 15:16, Jan Beulich wrote:
>>>> On 07.12.16 at 16:06, <andrew.cooper3@citrix.com> wrote:
>> On 06/12/16 14:13, Jan Beulich wrote:
>>> @@ -3670,6 +3652,7 @@ x86_emulate(
>>>  
>>>      case 0xd8: /* FPU 0xd8 */
>>>          host_and_vcpu_must_have(fpu);
>>> +        get_fpu(X86EMUL_FPU_fpu, &fic);
>>>          switch ( modrm )
>>>          {
>>>          case 0xc0 ... 0xc7: /* fadd %stN,%st */
>>> @@ -3715,10 +3698,12 @@ x86_emulate(
>>>                  break;
>>>              }
>>>          }
>>> +        put_fpu(&fic);
>>>          break;
>> This repositioning does mean that we might skip the put_fpu() call,
>> although we still retain the catch-all _put_fpu().
>>
>> I think this is still safe as we only skip this put_fpu() in cases where
>> we didn't emulate an instruction, and there isn't the risk of missing a
>> pending FPU exception.
> Correct - the catch-all one is sufficient for all cases.

Ok.  Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff mbox

Patch

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -874,59 +874,44 @@  do {
 } while (0)
 
 #define emulate_fpu_insn(_op)                           \
-do{ struct fpu_insn_ctxt fic;                           \
-    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
     asm volatile (                                      \
         "movb $2f-1f,%0 \n"                             \
         "1: " _op "     \n"                             \
         "2:             \n"                             \
-        : "=m" (fic.insn_bytes) : : "memory" );         \
-    put_fpu(&fic);                                      \
-} while (0)
+        : "=m" (fic.insn_bytes) : : "memory" )
 
 #define emulate_fpu_insn_memdst(_op, _arg)              \
-do{ struct fpu_insn_ctxt fic;                           \
-    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
     asm volatile (                                      \
         "movb $2f-1f,%0 \n"                             \
         "1: " _op " %1  \n"                             \
         "2:             \n"                             \
         : "=m" (fic.insn_bytes), "=m" (_arg)            \
-        : : "memory" );                                 \
-    put_fpu(&fic);                                      \
-} while (0)
+        : : "memory" )
 
 #define emulate_fpu_insn_memsrc(_op, _arg)              \
-do{ struct fpu_insn_ctxt fic;                           \
-    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
     asm volatile (                                      \
         "movb $2f-1f,%0 \n"                             \
         "1: " _op " %1  \n"                             \
         "2:             \n"                             \
         : "=m" (fic.insn_bytes)                         \
-        : "m" (_arg) : "memory" );                      \
-    put_fpu(&fic);                                      \
-} while (0)
+        : "m" (_arg) : "memory" )
 
 #define emulate_fpu_insn_stub(_bytes...)                                \
 do {                                                                    \
     uint8_t *buf = get_stub(stub);                                      \
     unsigned int _nr = sizeof((uint8_t[]){ _bytes });                   \
-    struct fpu_insn_ctxt fic = { .insn_bytes = _nr };                   \
+    fic.insn_bytes = _nr;                                               \
     memcpy(buf, ((uint8_t[]){ _bytes, 0xc3 }), _nr + 1);                \
-    get_fpu(X86EMUL_FPU_fpu, &fic);                                     \
     stub.func();                                                        \
-    put_fpu(&fic);                                                      \
     put_stub(stub);                                                     \
 } while (0)
 
 #define emulate_fpu_insn_stub_eflags(bytes...)                          \
 do {                                                                    \
     unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
-    struct fpu_insn_ctxt fic_ = { .insn_bytes = nr_ };                  \
     unsigned long tmp_;                                                 \
+    fic.insn_bytes = nr_;                                               \
     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
-    get_fpu(X86EMUL_FPU_fpu, &fic_);                                    \
     asm volatile ( _PRE_EFLAGS("[eflags]", "[mask]", "[tmp]")           \
                    "call *%[func];"                                     \
                    _POST_EFLAGS("[eflags]", "[mask]", "[tmp]")          \
@@ -934,7 +919,6 @@  do {
                      [tmp] "=&r" (tmp_)                                 \
                    : [func] "rm" (stub.func),                           \
                      [mask] "i" (EFLG_ZF|EFLG_PF|EFLG_CF) );            \
-    put_fpu(&fic_);                                                     \
     put_stub(stub);                                                     \
 } while (0)
 
@@ -2579,6 +2563,7 @@  x86_emulate(
     struct operand src = { .reg = PTR_POISON };
     struct operand dst = { .reg = PTR_POISON };
     enum x86_swint_type swint_type;
+    struct fpu_insn_ctxt fic;
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
 
@@ -3272,15 +3257,12 @@  x86_emulate(
         break;
 
     case 0x9b:  /* wait/fwait */
-    {
-        struct fpu_insn_ctxt fic = { .insn_bytes = 1 };
-
+        fic.insn_bytes = 1;
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_wait, &fic);
         asm volatile ( "fwait" ::: "memory" );
         put_fpu(&fic);
         break;
-    }
 
     case 0x9c: /* pushf */
         generate_exception_if((_regs.eflags & EFLG_VM) &&
@@ -3670,6 +3652,7 @@  x86_emulate(
 
     case 0xd8: /* FPU 0xd8 */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fadd %stN,%st */
@@ -3715,10 +3698,12 @@  x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xd9: /* FPU 0xd9 */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xfb: /* fsincos */
@@ -3795,10 +3780,12 @@  x86_emulate(
                 generate_exception_if(true, EXC_UD);
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xda: /* FPU 0xda */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fcmovb %stN */
@@ -3844,10 +3831,12 @@  x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xdb: /* FPU 0xdb */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fcmovnb %stN */
@@ -3907,10 +3896,12 @@  x86_emulate(
                 generate_exception_if(true, EXC_UD);
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xdc: /* FPU 0xdc */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* fadd %st,%stN */
@@ -3956,10 +3947,12 @@  x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xdd: /* FPU 0xdd */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* ffree %stN */
@@ -4006,10 +3999,12 @@  x86_emulate(
                 generate_exception_if(true, EXC_UD);
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xde: /* FPU 0xde */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xc0 ... 0xc7: /* faddp %stN */
@@ -4052,10 +4047,12 @@  x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xdf: /* FPU 0xdf */
         host_and_vcpu_must_have(fpu);
+        get_fpu(X86EMUL_FPU_fpu, &fic);
         switch ( modrm )
         {
         case 0xe0:
@@ -4125,6 +4122,7 @@  x86_emulate(
                 break;
             }
         }
+        put_fpu(&fic);
         break;
 
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
@@ -4945,8 +4943,8 @@  x86_emulate(
     case X86EMUL_OPC_VEX_F2(0x0f, 0x11): /* vmovsd xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
-        struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
 
+        fic.insn_bytes = 5;
         buf[0] = 0x3e;
         buf[1] = 0x3e;
         buf[2] = 0x0f;
@@ -5215,8 +5213,8 @@  x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
-        struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
 
+        fic.insn_bytes = 5;
         buf[0] = 0x3e;
         buf[1] = 0x3e;
         buf[2] = 0x0f;