diff mbox

xen/x86: Improve hypercall page writing

Message ID 1482166510-8977-1-git-send-email-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrew Cooper Dec. 19, 2016, 4:55 p.m. UTC
Use memcpy() rather than individual writes with explicit casts.  The
__builtin_memcpy() wrapper does a better job at combining adjacent writes into
a larger word size.

This results in better generated assembly.  No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>
CC: Boris Ostrovsky <boris.ostrovsky@oracle.com>
CC: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 xen/arch/x86/hvm/svm/svm.c         | 10 +++++-----
 xen/arch/x86/hvm/vmx/vmx.c         | 10 +++++-----
 xen/arch/x86/x86_64/compat/traps.c | 15 +++++++++------
 xen/arch/x86/x86_64/traps.c        | 31 ++++++++++++++++++-------------
 4 files changed, 37 insertions(+), 29 deletions(-)

Comments

Boris Ostrovsky Dec. 19, 2016, 7:14 p.m. UTC | #1
On 12/19/2016 11:55 AM, Andrew Cooper wrote:
> Use memcpy() rather than individual writes with explicit casts.  The
> __builtin_memcpy() wrapper does a better job at combining adjacent writes into
> a larger word size.
>
> This results in better generated assembly.  No functional change.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Tian, Kevin Dec. 20, 2016, 5:11 a.m. UTC | #2
> From: Andrew Cooper [mailto:andrew.cooper3@citrix.com]
> Sent: Tuesday, December 20, 2016 12:55 AM
> 
> Use memcpy() rather than individual writes with explicit casts.  The
> __builtin_memcpy() wrapper does a better job at combining adjacent writes into
> a larger word size.
> 
> This results in better generated assembly.  No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Acked-by: Kevin Tian <kevin.tian@intel.com>
Jan Beulich Dec. 20, 2016, 7:48 a.m. UTC | #3
>>> On 19.12.16 at 17:55, <andrew.cooper3@citrix.com> wrote:
> Use memcpy() rather than individual writes with explicit casts.  The
> __builtin_memcpy() wrapper does a better job at combining adjacent writes into
> a larger word size.
> 
> This results in better generated assembly.  No functional change.

I don't think generated code matters much for these functions, so
I'd don't view open coding constants like ...

> --- a/xen/arch/x86/x86_64/compat/traps.c
> +++ b/xen/arch/x86/x86_64/compat/traps.c
> @@ -388,8 +388,10 @@ static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
>          p = (char *)(hypercall_page + (i * 32));
>          *(u8  *)(p+ 0) = 0xb8;    /* mov  $<i>,%eax */
>          *(u32 *)(p+ 1) = i;
> -        *(u16 *)(p+ 5) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int  $xx */
> -        *(u8  *)(p+ 7) = 0xc3;    /* ret */
> +        memcpy(p + 5,
> +               "\xcd\x82" /* int $HYPERCALL_VECTOR */

... here or ...

> @@ -398,10 +400,11 @@ static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
>       * calling it.
>       */
>      p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
> -    *(u8  *)(p+ 0) = 0x50;    /* push %eax */
> -    *(u8  *)(p+ 1) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
> -    *(u32 *)(p+ 2) = __HYPERVISOR_iret;
> -    *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int  $xx */
> +    memcpy(p,
> +           "\x50"                 /* push %eax */
> +           "\xb8\x17\x00\x00\x00" /* mov  $__HYPERVISOR_iret, %eax */
> +           "\xcd\x82",            /* int  $HYPERCALL_VECTOR */

here as a good idea. If you used a static const uint8_t[] instead of
a string literal (which even includes a pointless nul terminator), all of
this could be avoided afaict.

Jan
Andrew Cooper Dec. 20, 2016, 12:21 p.m. UTC | #4
On 20/12/2016 07:48, Jan Beulich wrote:
>
>> @@ -398,10 +400,11 @@ static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
>>       * calling it.
>>       */
>>      p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
>> -    *(u8  *)(p+ 0) = 0x50;    /* push %eax */
>> -    *(u8  *)(p+ 1) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
>> -    *(u32 *)(p+ 2) = __HYPERVISOR_iret;
>> -    *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int  $xx */
>> +    memcpy(p,
>> +           "\x50"                 /* push %eax */
>> +           "\xb8\x17\x00\x00\x00" /* mov  $__HYPERVISOR_iret, %eax */
>> +           "\xcd\x82",            /* int  $HYPERCALL_VECTOR */
> here as a good idea.

Well, they are fixed in the ABI.  It is not as if we could ever change them.

>  If you used a static const uint8_t[] instead of
> a string literal (which even includes a pointless nul terminator), all of
> this could be avoided afaict.

I can see how that would work for the `int $0x82` case, but how do you
propose fitting 4 bytes of __HYPERVISOR_iret into an initialiser for a
uint8_t array?

~Andrew
Jan Beulich Dec. 20, 2016, 12:57 p.m. UTC | #5
>>> On 20.12.16 at 13:21, <andrew.cooper3@citrix.com> wrote:
> On 20/12/2016 07:48, Jan Beulich wrote:
>>
>>> @@ -398,10 +400,11 @@ static void hypercall_page_initialise_ring1_kernel(void 
> *hypercall_page)
>>>       * calling it.
>>>       */
>>>      p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
>>> -    *(u8  *)(p+ 0) = 0x50;    /* push %eax */
>>> -    *(u8  *)(p+ 1) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
>>> -    *(u32 *)(p+ 2) = __HYPERVISOR_iret;
>>> -    *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int  $xx */
>>> +    memcpy(p,
>>> +           "\x50"                 /* push %eax */
>>> +           "\xb8\x17\x00\x00\x00" /* mov  $__HYPERVISOR_iret, %eax */
>>> +           "\xcd\x82",            /* int  $HYPERCALL_VECTOR */
>> here as a good idea.
> 
> Well, they are fixed in the ABI.  It is not as if we could ever change them.
> 
>>  If you used a static const uint8_t[] instead of
>> a string literal (which even includes a pointless nul terminator), all of
>> this could be avoided afaict.
> 
> I can see how that would work for the `int $0x82` case, but how do you
> propose fitting 4 bytes of __HYPERVISOR_iret into an initialiser for a
> uint8_t array?

One byte __HYPERVISOR_iret, the other three zeros. perhaps
accompanied by a BUILD_BUG_ON(). Another alternative would
be a static const struct.

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 811ea4e..962667a 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -888,12 +888,12 @@  static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
             continue;
 
         p = (char *)(hypercall_page + (i * 32));
-        *(u8  *)(p + 0) = 0xb8; /* mov imm32, %eax */
+        *(u8  *)(p + 0) = 0xb8; /* mov $<i>, %eax */
         *(u32 *)(p + 1) = i;
-        *(u8  *)(p + 5) = 0x0f; /* vmmcall */
-        *(u8  *)(p + 6) = 0x01;
-        *(u8  *)(p + 7) = 0xd9;
-        *(u8  *)(p + 8) = 0xc3; /* ret */
+        memcpy(p + 5,
+               "\x0f\x01\xd9"   /* vmmcall */
+               "\xc3",          /* ret */
+               4);
     }
 
     /* Don't support HYPERVISOR_iret at the moment */
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index d50d49e..c0e62a2 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1295,12 +1295,12 @@  static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page)
             continue;
 
         p = (char *)(hypercall_page + (i * 32));
-        *(u8  *)(p + 0) = 0xb8; /* mov imm32, %eax */
+        *(u8  *)(p + 0) = 0xb8; /* mov $<i>, %eax */
         *(u32 *)(p + 1) = i;
-        *(u8  *)(p + 5) = 0x0f; /* vmcall */
-        *(u8  *)(p + 6) = 0x01;
-        *(u8  *)(p + 7) = 0xc1;
-        *(u8  *)(p + 8) = 0xc3; /* ret */
+        memcpy(p + 5,
+               "\x0f\x01\xc1"   /* vmmcall */
+               "\xc3",          /* ret */
+               4);
     }
 
     /* Don't support HYPERVISOR_iret at the moment */
diff --git a/xen/arch/x86/x86_64/compat/traps.c b/xen/arch/x86/x86_64/compat/traps.c
index 95c5cb3..8b03291 100644
--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -388,8 +388,10 @@  static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
         p = (char *)(hypercall_page + (i * 32));
         *(u8  *)(p+ 0) = 0xb8;    /* mov  $<i>,%eax */
         *(u32 *)(p+ 1) = i;
-        *(u16 *)(p+ 5) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int  $xx */
-        *(u8  *)(p+ 7) = 0xc3;    /* ret */
+        memcpy(p + 5,
+               "\xcd\x82" /* int $HYPERCALL_VECTOR */
+               "\xc3",    /* ret */
+               3);
     }
 
     /*
@@ -398,10 +400,11 @@  static void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
      * calling it.
      */
     p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
-    *(u8  *)(p+ 0) = 0x50;    /* push %eax */
-    *(u8  *)(p+ 1) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
-    *(u32 *)(p+ 2) = __HYPERVISOR_iret;
-    *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int  $xx */
+    memcpy(p,
+           "\x50"                 /* push %eax */
+           "\xb8\x17\x00\x00\x00" /* mov  $__HYPERVISOR_iret, %eax */
+           "\xcd\x82",            /* int  $HYPERCALL_VECTOR */
+           8);
 }
 
 /*
diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
index fc8cde6..7a586cd 100644
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -591,14 +591,18 @@  static void hypercall_page_initialise_ring3_kernel(void *hypercall_page)
             continue;
 
         p = (char *)(hypercall_page + (i * 32));
-        *(u8  *)(p+ 0) = 0x51;    /* push %rcx */
-        *(u16 *)(p+ 1) = 0x5341;  /* push %r11 */
-        *(u8  *)(p+ 3) = 0xb8;    /* mov  $<i>,%eax */
+        memcpy(p,
+               "\x51"      /* push %rcx */
+               "\x41\x53"  /* push %r11 */
+               "\xb8",     /* mov  $<i>,%eax */
+               4);
         *(u32 *)(p+ 4) = i;
-        *(u16 *)(p+ 8) = 0x050f;  /* syscall */
-        *(u16 *)(p+10) = 0x5b41;  /* pop  %r11 */
-        *(u8  *)(p+12) = 0x59;    /* pop  %rcx */
-        *(u8  *)(p+13) = 0xc3;    /* ret */
+        memcpy(p + 8,
+               "\x0f\x05"  /* syscall */
+               "\x41\x5b"  /* pop  %r11 */
+               "\x59"      /* pop  %rcx */
+               "\xc3",     /* ret */
+               6);
     }
 
     /*
@@ -607,12 +611,13 @@  static void hypercall_page_initialise_ring3_kernel(void *hypercall_page)
      * calling it.
      */
     p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
-    *(u8  *)(p+ 0) = 0x51;    /* push %rcx */
-    *(u16 *)(p+ 1) = 0x5341;  /* push %r11 */
-    *(u8  *)(p+ 3) = 0x50;    /* push %rax */
-    *(u8  *)(p+ 4) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
-    *(u32 *)(p+ 5) = __HYPERVISOR_iret;
-    *(u16 *)(p+ 9) = 0x050f;  /* syscall */
+    memcpy(p,
+           "\x51"                 /* push %rcx */
+           "\x41\x53"             /* push %r11 */
+           "\x50"                 /* push %rax */
+           "\xb8\x17\x00\x00\x00" /* mov  $__HYPERVISOR_iret,%eax */
+           "\x0f\x05",            /* syscall */
+           11);
 }
 
 #include "compat/traps.c"