diff mbox series

x86: drop REX64_PREFIX

Message ID 16b45b39-aadd-4a53-bcb9-214ded193db9@suse.com (mailing list archive)
State New
Headers show
Series x86: drop REX64_PREFIX | expand

Commit Message

Jan Beulich July 17, 2024, 12:40 p.m. UTC
While we didn't copy the full Linux commentary, Linux commit
7180d4fb8308 ("x86_64: Fix 64bit FXSAVE encoding") is quite explicit
about gas 2.16 supporting FXSAVEQ / FXRSTORQ. As that's presently our
minimal required version, we can drop the workaround that was needed for
yet for older gas.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Andrew Cooper July 17, 2024, 1:05 p.m. UTC | #1
On 17/07/2024 1:40 pm, Jan Beulich wrote:
> While we didn't copy the full Linux commentary, Linux commit
> 7180d4fb8308 ("x86_64: Fix 64bit FXSAVE encoding") is quite explicit
> about gas 2.16 supporting FXSAVEQ / FXRSTORQ. As that's presently our
> minimal required version, we can drop the workaround that was needed for
> yet for older gas.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

It's especially nice to get rid of the __sun__ block, although having
looked through this, ...

>
> --- a/xen/arch/x86/i387.c
> +++ b/xen/arch/x86/i387.c
> @@ -64,13 +64,12 @@ static inline void fpu_fxrstor(struct vc
>      {
>      default:
>          asm volatile (
> -            /* See below for why the operands/constraints are this way. */
> -            "1: " REX64_PREFIX "fxrstor (%2)\n"
> +            "1: fxrstorq %0\n"
>              ".section .fixup,\"ax\"   \n"
>              "2: push %%"__OP"ax       \n"
>              "   push %%"__OP"cx       \n"
>              "   push %%"__OP"di       \n"
> -            "   mov  %2,%%"__OP"di    \n"
> +            "   lea  %0,%%"__OP"di    \n"
>              "   mov  %1,%%ecx         \n"
>              "   xor  %%eax,%%eax      \n"
>              "   rep ; stosl           \n"
> @@ -81,7 +80,7 @@ static inline void fpu_fxrstor(struct vc
>              ".previous                \n"
>              _ASM_EXTABLE(1b, 2b)

... the internals of the fixup section leave a lot to be desired.

My build happens to have emitted lea (%rdi), %rdi for this.

A better option than opencoding memset() would be to simply return
-EIO/-EFAULT like we do from *msr_safe(), writing the error path in C,
and getting the system optimised memset() rather than using a form which
is definitely sub-optimal on all 64bit processors.

~Andrew
Jan Beulich July 17, 2024, 1:16 p.m. UTC | #2
On 17.07.2024 15:05, Andrew Cooper wrote:
> On 17/07/2024 1:40 pm, Jan Beulich wrote:
>> While we didn't copy the full Linux commentary, Linux commit
>> 7180d4fb8308 ("x86_64: Fix 64bit FXSAVE encoding") is quite explicit
>> about gas 2.16 supporting FXSAVEQ / FXRSTORQ. As that's presently our
>> minimal required version, we can drop the workaround that was needed for
>> yet for older gas.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

Thanks.

> It's especially nice to get rid of the __sun__ block, although having
> looked through this, ...
> 
>> --- a/xen/arch/x86/i387.c
>> +++ b/xen/arch/x86/i387.c
>> @@ -64,13 +64,12 @@ static inline void fpu_fxrstor(struct vc
>>      {
>>      default:
>>          asm volatile (
>> -            /* See below for why the operands/constraints are this way. */
>> -            "1: " REX64_PREFIX "fxrstor (%2)\n"
>> +            "1: fxrstorq %0\n"
>>              ".section .fixup,\"ax\"   \n"
>>              "2: push %%"__OP"ax       \n"
>>              "   push %%"__OP"cx       \n"
>>              "   push %%"__OP"di       \n"
>> -            "   mov  %2,%%"__OP"di    \n"
>> +            "   lea  %0,%%"__OP"di    \n"
>>              "   mov  %1,%%ecx         \n"
>>              "   xor  %%eax,%%eax      \n"
>>              "   rep ; stosl           \n"
>> @@ -81,7 +80,7 @@ static inline void fpu_fxrstor(struct vc
>>              ".previous                \n"
>>              _ASM_EXTABLE(1b, 2b)
> 
> ... the internals of the fixup section leave a lot to be desired.
> 
> My build happens to have emitted lea (%rdi), %rdi for this.

Yeah, that was supposed to be happening somewhere. I saw %rax and %rdx
once each, and using LEA there is still kind of a waste.

> A better option than opencoding memset() would be to simply return
> -EIO/-EFAULT like we do from *msr_safe(), writing the error path in C,
> and getting the system optimised memset() rather than using a form which
> is definitely sub-optimal on all 64bit processors.

I think the reason for having done this in assembly is to be able to
wire back to the faulting instruction. On top of what you say we could
do we'd then further need to put the whole thing in a loop, or add a
3rd FXSTOR. Which isn't to say that the overall result then isn't going
to be neater. What I'm not concerned of with this fallback code is
performance, though. That fixup path better wouldn't be taken anyway.

Jan
diff mbox series

Patch

--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -64,13 +64,12 @@  static inline void fpu_fxrstor(struct vc
     {
     default:
         asm volatile (
-            /* See below for why the operands/constraints are this way. */
-            "1: " REX64_PREFIX "fxrstor (%2)\n"
+            "1: fxrstorq %0\n"
             ".section .fixup,\"ax\"   \n"
             "2: push %%"__OP"ax       \n"
             "   push %%"__OP"cx       \n"
             "   push %%"__OP"di       \n"
-            "   mov  %2,%%"__OP"di    \n"
+            "   lea  %0,%%"__OP"di    \n"
             "   mov  %1,%%ecx         \n"
             "   xor  %%eax,%%eax      \n"
             "   rep ; stosl           \n"
@@ -81,7 +80,7 @@  static inline void fpu_fxrstor(struct vc
             ".previous                \n"
             _ASM_EXTABLE(1b, 2b)
             :
-            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4), "R" (fpu_ctxt) );
+            : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
         break;
     case 4: case 2:
         asm volatile (
@@ -157,13 +156,7 @@  static inline void fpu_fxsave(struct vcp
 
     if ( fip_width != 4 )
     {
-        /*
-         * The only way to force fxsaveq on a wide range of gas versions.
-         * On older versions the rex64 prefix works only if we force an
-         * addressing mode that doesn't require extended registers.
-         */
-        asm volatile ( REX64_PREFIX "fxsave (%1)"
-                       : "=m" (*fpu_ctxt) : "R" (fpu_ctxt) );
+        asm volatile ( "fxsaveq %0" : "=m" (*fpu_ctxt) );
 
         /*
          * Some CPUs don't save/restore FDP/FIP/FOP unless an exception is
--- a/xen/arch/x86/include/asm/asm_defns.h
+++ b/xen/arch/x86/include/asm/asm_defns.h
@@ -331,14 +331,6 @@  static always_inline void stac(void)
 #define safe_swapgs                             \
         "mfence; swapgs;"
 
-#ifdef __sun__
-#define REX64_PREFIX "rex64\\"
-#elif defined(__clang__)
-#define REX64_PREFIX ".byte 0x48; "
-#else
-#define REX64_PREFIX "rex64/"
-#endif
-
 #define ELFNOTE(name, type, desc)           \
     .pushsection .note.name, "a", @note   ; \
     .p2align 2                            ; \