diff mbox

[v2,2/2] x86/xstate: also use alternative asm on xsave side

Message ID 56B2032902000078000CE03C@prv-mh.provo.novell.com
State New, archived
Headers show

Commit Message

Jan Beulich Feb. 3, 2016, 12:39 p.m. UTC
From: Shuai Ruan <shuai.ruan@linux.intel.com>

This patch use alternavtive asm on the xsave side.
As xsaves use modified optimization like xsaveopt, xsaves
may not writing the FPU portion of the save image too.
So xsaves also need some extra tweaks.

Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com>

Fix XSAVES opcode. Extend the other respective XSAVEOPT conditional to
cover XSAVES as well. Re-wrap comment being adjusted.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
x86/xstate: also use alternative asm on xsave side

From: Shuai Ruan <shuai.ruan@linux.intel.com>

This patch use alternavtive asm on the xsave side.
As xsaves use modified optimization like xsaveopt, xsaves
may not writing the FPU portion of the save image too.
So xsaves also need some extra tweaks.

Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com>

Fix XSAVES opcode. Extend the other respective XSAVEOPT conditional to
cover XSAVES as well. Re-wrap comment being adjusted.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -250,27 +250,29 @@ void xsave(struct vcpu *v, uint64_t mask
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
     int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
+#define XSAVE(pfx) \
+        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \
+                         ".byte " pfx "0x0f,0xae,0x37\n", \
+                         X86_FEATURE_XSAVEOPT, \
+                         ".byte " pfx "0x0f,0xc7,0x27\n", \
+                         X86_FEATURE_XSAVEC, \
+                         ".byte " pfx "0x0f,0xc7,0x2f\n", \
+                         X86_FEATURE_XSAVES, \
+                         "=m" (*ptr), \
+                         "a" (lmask), "d" (hmask), "D" (ptr))
 
     if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
     {
         typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
         typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
 
-        if ( cpu_has_xsaves )
-            asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsavec )
-            asm volatile ( ".byte 0x48,0x0f,0xc7,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsaveopt )
+        if ( cpu_has_xsaveopt || cpu_has_xsaves )
         {
             /*
-             * xsaveopt may not write the FPU portion even when the respective
-             * mask bit is set. For the check further down to work we hence
-             * need to put the save image back into the state that it was in
-             * right after the previous xsaveopt.
+             * XSAVEOPT/XSAVES may not write the FPU portion even when the
+             * respective mask bit is set. For the check further down to work
+             * we hence need to put the save image back into the state that
+             * it was in right after the previous XSAVEOPT.
              */
             if ( word_size > 0 &&
                  (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 ||
@@ -279,14 +281,9 @@ void xsave(struct vcpu *v, uint64_t mask
                 ptr->fpu_sse.fip.sel = 0;
                 ptr->fpu_sse.fdp.sel = 0;
             }
-            asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
         }
-        else
-            asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+
+        XSAVE("0x48,");
 
         if ( !(mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) ||
              /*
@@ -296,7 +293,7 @@ void xsave(struct vcpu *v, uint64_t mask
              (!(ptr->fpu_sse.fsw & 0x0080) &&
               boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
         {
-            if ( cpu_has_xsaveopt && word_size > 0 )
+            if ( (cpu_has_xsaveopt || cpu_has_xsaves) && word_size > 0 )
             {
                 ptr->fpu_sse.fip.sel = fcs;
                 ptr->fpu_sse.fdp.sel = fds;
@@ -317,24 +314,10 @@ void xsave(struct vcpu *v, uint64_t mask
     }
     else
     {
-        if ( cpu_has_xsaves )
-            asm volatile ( ".byte 0x0f,0xc7,0x2f"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsavec )
-            asm volatile ( ".byte 0x0f,0xc7,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsaveopt )
-            asm volatile ( ".byte 0x0f,0xae,0x37"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else
-            asm volatile ( ".byte 0x0f,0xae,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        XSAVE("");
         word_size = 4;
     }
+#undef XSAVE
     if ( word_size >= 0 )
         ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size;
 }

Comments

Andrew Cooper Feb. 3, 2016, 1:26 p.m. UTC | #1
On 03/02/16 12:39, Jan Beulich wrote:
> From: Shuai Ruan <shuai.ruan@linux.intel.com>
>
> This patch use alternavtive asm on the xsave side.
> As xsaves use modified optimization like xsaveopt, xsaves
> may not writing the FPU portion of the save image too.
> So xsaves also need some extra tweaks.
>
> Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com>
>
> Fix XSAVES opcode. Extend the other respective XSAVEOPT conditional to
> cover XSAVES as well. Re-wrap comment being adjusted.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -250,27 +250,29 @@ void xsave(struct vcpu *v, uint64_t mask
>      uint32_t hmask = mask >> 32;
>      uint32_t lmask = mask;
>      int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
> +#define XSAVE(pfx) \
> +        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \
> +                         ".byte " pfx "0x0f,0xae,0x37\n", \
> +                         X86_FEATURE_XSAVEOPT, \
> +                         ".byte " pfx "0x0f,0xc7,0x27\n", \
> +                         X86_FEATURE_XSAVEC, \
> +                         ".byte " pfx "0x0f,0xc7,0x2f\n", \
> +                         X86_FEATURE_XSAVES, \

Given that the options are a little out of order and using raw bytes,
would you mind annotating the lines with the operations. e.g.

+        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
+                         ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
+                         X86_FEATURE_XSAVEOPT, \
+                         ".byte " pfx "0x0f,0xc7,0x27\n", /* xsavec */ \
+                         X86_FEATURE_XSAVEC, \
+                         ".byte " pfx "0x0f,0xc7,0x2f\n", /* xsaves */ \
+                         X86_FEATURE_XSAVES, \

IMO, this is somewhat clearer to read.

Otherwise,

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Jan Beulich Feb. 3, 2016, 1:34 p.m. UTC | #2
>>> On 03.02.16 at 14:26, <andrew.cooper3@citrix.com> wrote:
> On 03/02/16 12:39, Jan Beulich wrote:
>> --- a/xen/arch/x86/xstate.c
>> +++ b/xen/arch/x86/xstate.c
>> @@ -250,27 +250,29 @@ void xsave(struct vcpu *v, uint64_t mask
>>      uint32_t hmask = mask >> 32;
>>      uint32_t lmask = mask;
>>      int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
>> +#define XSAVE(pfx) \
>> +        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \
>> +                         ".byte " pfx "0x0f,0xae,0x37\n", \
>> +                         X86_FEATURE_XSAVEOPT, \
>> +                         ".byte " pfx "0x0f,0xc7,0x27\n", \
>> +                         X86_FEATURE_XSAVEC, \
>> +                         ".byte " pfx "0x0f,0xc7,0x2f\n", \
>> +                         X86_FEATURE_XSAVES, \
> 
> Given that the options are a little out of order and using raw bytes,
> would you mind annotating the lines with the operations. e.g.
> 
> +        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
> +                         ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
> +                         X86_FEATURE_XSAVEOPT, \
> +                         ".byte " pfx "0x0f,0xc7,0x27\n", /* xsavec */ \
> +                         X86_FEATURE_XSAVEC, \
> +                         ".byte " pfx "0x0f,0xc7,0x2f\n", /* xsaves */ \
> +                         X86_FEATURE_XSAVES, \
> 
> IMO, this is somewhat clearer to read.

Okay, since I had been considering this too, I've just done so.

> Otherwise,
> 
> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

Thanks, Jan
Andrew Cooper Feb. 3, 2016, 1:50 p.m. UTC | #3
On 03/02/16 13:34, Jan Beulich wrote:
>>>> On 03.02.16 at 14:26, <andrew.cooper3@citrix.com> wrote:
>> On 03/02/16 12:39, Jan Beulich wrote:
>>> --- a/xen/arch/x86/xstate.c
>>> +++ b/xen/arch/x86/xstate.c
>>> @@ -250,27 +250,29 @@ void xsave(struct vcpu *v, uint64_t mask
>>>      uint32_t hmask = mask >> 32;
>>>      uint32_t lmask = mask;
>>>      int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
>>> +#define XSAVE(pfx) \
>>> +        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \
>>> +                         ".byte " pfx "0x0f,0xae,0x37\n", \
>>> +                         X86_FEATURE_XSAVEOPT, \
>>> +                         ".byte " pfx "0x0f,0xc7,0x27\n", \
>>> +                         X86_FEATURE_XSAVEC, \
>>> +                         ".byte " pfx "0x0f,0xc7,0x2f\n", \
>>> +                         X86_FEATURE_XSAVES, \
>> Given that the options are a little out of order and using raw bytes,
>> would you mind annotating the lines with the operations. e.g.
>>
>> +        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>> +                         ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>> +                         X86_FEATURE_XSAVEOPT, \
>> +                         ".byte " pfx "0x0f,0xc7,0x27\n", /* xsavec */ \
>> +                         X86_FEATURE_XSAVEC, \
>> +                         ".byte " pfx "0x0f,0xc7,0x2f\n", /* xsaves */ \
>> +                         X86_FEATURE_XSAVES, \
>>
>> IMO, this is somewhat clearer to read.
> Okay, since I had been considering this too, I've just done so.
>
>> Otherwise,
>>
>> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Thanks, Jan

On further thoughts, it would be nice to annotate the XRSTOR() side as well.

~Andrew
Shuai Ruan Feb. 4, 2016, 5:27 a.m. UTC | #4
On Wed, Feb 03, 2016 at 05:39:53AM -0700, Jan Beulich wrote:
> From: Shuai Ruan <shuai.ruan@linux.intel.com>
> 
> This patch use alternavtive asm on the xsave side.
> As xsaves use modified optimization like xsaveopt, xsaves
> may not writing the FPU portion of the save image too.
> So xsaves also need some extra tweaks.
> 
> Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com>
> 
> Fix XSAVES opcode. Extend the other respective XSAVEOPT conditional to
> cover XSAVES as well. Re-wrap comment being adjusted.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Thanks for pointing out the bug and fix it. 
And I test the V2 patch, it works well.
diff mbox

Patch

--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -250,27 +250,29 @@  void xsave(struct vcpu *v, uint64_t mask
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
     int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
+#define XSAVE(pfx) \
+        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \
+                         ".byte " pfx "0x0f,0xae,0x37\n", \
+                         X86_FEATURE_XSAVEOPT, \
+                         ".byte " pfx "0x0f,0xc7,0x27\n", \
+                         X86_FEATURE_XSAVEC, \
+                         ".byte " pfx "0x0f,0xc7,0x2f\n", \
+                         X86_FEATURE_XSAVES, \
+                         "=m" (*ptr), \
+                         "a" (lmask), "d" (hmask), "D" (ptr))
 
     if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
     {
         typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
         typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
 
-        if ( cpu_has_xsaves )
-            asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsavec )
-            asm volatile ( ".byte 0x48,0x0f,0xc7,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsaveopt )
+        if ( cpu_has_xsaveopt || cpu_has_xsaves )
         {
             /*
-             * xsaveopt may not write the FPU portion even when the respective
-             * mask bit is set. For the check further down to work we hence
-             * need to put the save image back into the state that it was in
-             * right after the previous xsaveopt.
+             * XSAVEOPT/XSAVES may not write the FPU portion even when the
+             * respective mask bit is set. For the check further down to work
+             * we hence need to put the save image back into the state that
+             * it was in right after the previous XSAVEOPT.
              */
             if ( word_size > 0 &&
                  (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 ||
@@ -279,14 +281,9 @@  void xsave(struct vcpu *v, uint64_t mask
                 ptr->fpu_sse.fip.sel = 0;
                 ptr->fpu_sse.fdp.sel = 0;
             }
-            asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
         }
-        else
-            asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+
+        XSAVE("0x48,");
 
         if ( !(mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) ||
              /*
@@ -296,7 +293,7 @@  void xsave(struct vcpu *v, uint64_t mask
              (!(ptr->fpu_sse.fsw & 0x0080) &&
               boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
         {
-            if ( cpu_has_xsaveopt && word_size > 0 )
+            if ( (cpu_has_xsaveopt || cpu_has_xsaves) && word_size > 0 )
             {
                 ptr->fpu_sse.fip.sel = fcs;
                 ptr->fpu_sse.fdp.sel = fds;
@@ -317,24 +314,10 @@  void xsave(struct vcpu *v, uint64_t mask
     }
     else
     {
-        if ( cpu_has_xsaves )
-            asm volatile ( ".byte 0x0f,0xc7,0x2f"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsavec )
-            asm volatile ( ".byte 0x0f,0xc7,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsaveopt )
-            asm volatile ( ".byte 0x0f,0xae,0x37"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else
-            asm volatile ( ".byte 0x0f,0xae,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        XSAVE("");
         word_size = 4;
     }
+#undef XSAVE
     if ( word_size >= 0 )
         ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size;
 }