Message ID | 1482166510-8977-1-git-send-email-andrew.cooper3@citrix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 12/19/2016 11:55 AM, Andrew Cooper wrote: > Use memcpy() rather than individual writes with explicit casts. The > __builtin_memcpy() wrapper does a better job at combining adjacent writes into > a larger word size. > > This results in better generated assembly. No functional change. > > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> From: Andrew Cooper [mailto:andrew.cooper3@citrix.com] > Sent: Tuesday, December 20, 2016 12:55 AM > > Use memcpy() rather than individual writes with explicit casts. The > __builtin_memcpy() wrapper does a better job at combining adjacent writes into > a larger word size. > > This results in better generated assembly. No functional change. > > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Acked-by: Kevin Tian <kevin.tian@intel.com>
>>> On 19.12.16 at 17:55, <andrew.cooper3@citrix.com> wrote: > Use memcpy() rather than individual writes with explicit casts. The > __builtin_memcpy() wrapper does a better job at combining adjacent writes into > a larger word size. > > This results in better generated assembly. No functional change. I don't think generated code matters much for these functions, so I'd don't view open coding constants like ... > --- a/xen/arch/x86/x86_64/compat/traps.c > +++ b/xen/arch/x86/x86_64/compat/traps.c > @@ -388,8 +388,10 @@ static void hypercall_page_initialise_ring1_kernel(void *hypercall_page) > p = (char *)(hypercall_page + (i * 32)); > *(u8 *)(p+ 0) = 0xb8; /* mov $<i>,%eax */ > *(u32 *)(p+ 1) = i; > - *(u16 *)(p+ 5) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */ > - *(u8 *)(p+ 7) = 0xc3; /* ret */ > + memcpy(p + 5, > + "\xcd\x82" /* int $HYPERCALL_VECTOR */ ... here or ... > @@ -398,10 +400,11 @@ static void hypercall_page_initialise_ring1_kernel(void *hypercall_page) > * calling it. > */ > p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32)); > - *(u8 *)(p+ 0) = 0x50; /* push %eax */ > - *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */ > - *(u32 *)(p+ 2) = __HYPERVISOR_iret; > - *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */ > + memcpy(p, > + "\x50" /* push %eax */ > + "\xb8\x17\x00\x00\x00" /* mov $__HYPERVISOR_iret, %eax */ > + "\xcd\x82", /* int $HYPERCALL_VECTOR */ here as a good idea. If you used a static const uint8_t[] instead of a string literal (which even includes a pointless nul terminator), all of this could be avoided afaict. Jan
On 20/12/2016 07:48, Jan Beulich wrote: > >> @@ -398,10 +400,11 @@ static void hypercall_page_initialise_ring1_kernel(void *hypercall_page) >> * calling it. >> */ >> p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32)); >> - *(u8 *)(p+ 0) = 0x50; /* push %eax */ >> - *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */ >> - *(u32 *)(p+ 2) = __HYPERVISOR_iret; >> - *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */ >> + memcpy(p, >> + "\x50" /* push %eax */ >> + "\xb8\x17\x00\x00\x00" /* mov $__HYPERVISOR_iret, %eax */ >> + "\xcd\x82", /* int $HYPERCALL_VECTOR */ > here as a good idea. Well, they are fixed in the ABI. It is not as if we could ever change them. > If you used a static const uint8_t[] instead of > a string literal (which even includes a pointless nul terminator), all of > this could be avoided afaict. I can see how that would work for the `int $0x82` case, but how do you propose fitting 4 bytes of __HYPERVISOR_iret into an initialiser for a uint8_t array? ~Andrew
>>> On 20.12.16 at 13:21, <andrew.cooper3@citrix.com> wrote: > On 20/12/2016 07:48, Jan Beulich wrote: >> >>> @@ -398,10 +400,11 @@ static void hypercall_page_initialise_ring1_kernel(void > *hypercall_page) >>> * calling it. >>> */ >>> p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32)); >>> - *(u8 *)(p+ 0) = 0x50; /* push %eax */ >>> - *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */ >>> - *(u32 *)(p+ 2) = __HYPERVISOR_iret; >>> - *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */ >>> + memcpy(p, >>> + "\x50" /* push %eax */ >>> + "\xb8\x17\x00\x00\x00" /* mov $__HYPERVISOR_iret, %eax */ >>> + "\xcd\x82", /* int $HYPERCALL_VECTOR */ >> here as a good idea. > > Well, they are fixed in the ABI. It is not as if we could ever change them. > >> If you used a static const uint8_t[] instead of >> a string literal (which even includes a pointless nul terminator), all of >> this could be avoided afaict. > > I can see how that would work for the `int $0x82` case, but how do you > propose fitting 4 bytes of __HYPERVISOR_iret into an initialiser for a > uint8_t array? One byte __HYPERVISOR_iret, the other three zeros. perhaps accompanied by a BUILD_BUG_ON(). Another alternative would be a static const struct. Jan
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index 811ea4e..962667a 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -888,12 +888,12 @@ static void svm_init_hypercall_page(struct domain *d, void *hypercall_page) continue; p = (char *)(hypercall_page + (i * 32)); - *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */ + *(u8 *)(p + 0) = 0xb8; /* mov $<i>, %eax */ *(u32 *)(p + 1) = i; - *(u8 *)(p + 5) = 0x0f; /* vmmcall */ - *(u8 *)(p + 6) = 0x01; - *(u8 *)(p + 7) = 0xd9; - *(u8 *)(p + 8) = 0xc3; /* ret */ + memcpy(p + 5, + "\x0f\x01\xd9" /* vmmcall */ + "\xc3", /* ret */ + 4); } /* Don't support HYPERVISOR_iret at the moment */ diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index d50d49e..c0e62a2 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1295,12 +1295,12 @@ static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page) continue; p = (char *)(hypercall_page + (i * 32)); - *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */ + *(u8 *)(p + 0) = 0xb8; /* mov $<i>, %eax */ *(u32 *)(p + 1) = i; - *(u8 *)(p + 5) = 0x0f; /* vmcall */ - *(u8 *)(p + 6) = 0x01; - *(u8 *)(p + 7) = 0xc1; - *(u8 *)(p + 8) = 0xc3; /* ret */ + memcpy(p + 5, + "\x0f\x01\xc1" /* vmmcall */ + "\xc3", /* ret */ + 4); } /* Don't support HYPERVISOR_iret at the moment */ diff --git a/xen/arch/x86/x86_64/compat/traps.c b/xen/arch/x86/x86_64/compat/traps.c index 95c5cb3..8b03291 100644 --- a/xen/arch/x86/x86_64/compat/traps.c +++ b/xen/arch/x86/x86_64/compat/traps.c @@ -388,8 +388,10 @@ static void hypercall_page_initialise_ring1_kernel(void *hypercall_page) p = (char *)(hypercall_page + (i * 32)); *(u8 *)(p+ 0) = 0xb8; /* mov $<i>,%eax */ *(u32 *)(p+ 1) = i; - *(u16 *)(p+ 5) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */ - *(u8 *)(p+ 7) = 0xc3; /* ret */ + memcpy(p + 5, + "\xcd\x82" /* int $HYPERCALL_VECTOR */ + "\xc3", /* ret */ + 3); } /* @@ -398,10 +400,11 @@ static void hypercall_page_initialise_ring1_kernel(void *hypercall_page) * calling it. */ p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32)); - *(u8 *)(p+ 0) = 0x50; /* push %eax */ - *(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */ - *(u32 *)(p+ 2) = __HYPERVISOR_iret; - *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */ + memcpy(p, + "\x50" /* push %eax */ + "\xb8\x17\x00\x00\x00" /* mov $__HYPERVISOR_iret, %eax */ + "\xcd\x82", /* int $HYPERCALL_VECTOR */ + 8); } /* diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c index fc8cde6..7a586cd 100644 --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -591,14 +591,18 @@ static void hypercall_page_initialise_ring3_kernel(void *hypercall_page) continue; p = (char *)(hypercall_page + (i * 32)); - *(u8 *)(p+ 0) = 0x51; /* push %rcx */ - *(u16 *)(p+ 1) = 0x5341; /* push %r11 */ - *(u8 *)(p+ 3) = 0xb8; /* mov $<i>,%eax */ + memcpy(p, + "\x51" /* push %rcx */ + "\x41\x53" /* push %r11 */ + "\xb8", /* mov $<i>,%eax */ + 4); *(u32 *)(p+ 4) = i; - *(u16 *)(p+ 8) = 0x050f; /* syscall */ - *(u16 *)(p+10) = 0x5b41; /* pop %r11 */ - *(u8 *)(p+12) = 0x59; /* pop %rcx */ - *(u8 *)(p+13) = 0xc3; /* ret */ + memcpy(p + 8, + "\x0f\x05" /* syscall */ + "\x41\x5b" /* pop %r11 */ + "\x59" /* pop %rcx */ + "\xc3", /* ret */ + 6); } /* @@ -607,12 +611,13 @@ static void hypercall_page_initialise_ring3_kernel(void *hypercall_page) * calling it. */ p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32)); - *(u8 *)(p+ 0) = 0x51; /* push %rcx */ - *(u16 *)(p+ 1) = 0x5341; /* push %r11 */ - *(u8 *)(p+ 3) = 0x50; /* push %rax */ - *(u8 *)(p+ 4) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */ - *(u32 *)(p+ 5) = __HYPERVISOR_iret; - *(u16 *)(p+ 9) = 0x050f; /* syscall */ + memcpy(p, + "\x51" /* push %rcx */ + "\x41\x53" /* push %r11 */ + "\x50" /* push %rax */ + "\xb8\x17\x00\x00\x00" /* mov $__HYPERVISOR_iret,%eax */ + "\x0f\x05", /* syscall */ + 11); } #include "compat/traps.c"
Use memcpy() rather than individual writes with explicit casts. The __builtin_memcpy() wrapper does a better job at combining adjacent writes into a larger word size. This results in better generated assembly. No functional change. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Jun Nakajima <jun.nakajima@intel.com> CC: Kevin Tian <kevin.tian@intel.com> CC: Boris Ostrovsky <boris.ostrovsky@oracle.com> CC: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> --- xen/arch/x86/hvm/svm/svm.c | 10 +++++----- xen/arch/x86/hvm/vmx/vmx.c | 10 +++++----- xen/arch/x86/x86_64/compat/traps.c | 15 +++++++++------ xen/arch/x86/x86_64/traps.c | 31 ++++++++++++++++++------------- 4 files changed, 37 insertions(+), 29 deletions(-)