diff mbox

x86/cpuid: AVX-512 Feature Detection

Message ID 1467093106-3444-1-git-send-email-luwei.kang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Luwei Kang June 28, 2016, 5:51 a.m. UTC
AVX-512 is an extention of AVX2. Its spec can be found at:
https://software.intel.com/sites/default/files/managed/b4/3a/319433-024.pdf
This patch detects AVX-512 features by CPUID.

Signed-off-by: Luwei Kang <luwei.kang@intel.com>
---
 xen/arch/x86/hvm/hvm.c                      |  8 ++++++++
 xen/arch/x86/traps.c                        | 17 ++++++++++++++++-
 xen/include/public/arch-x86/cpufeatureset.h |  9 +++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

Comments

Jan Beulich June 28, 2016, 7:49 a.m. UTC | #1
>>> On 28.06.16 at 07:51, <luwei.kang@intel.com> wrote:
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3474,6 +3474,14 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
>                                    xstate_sizes[_XSTATE_BNDCSR]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);

I think this would better be three such statements, one per bit.
Otherwise the goal of not putting in assumptions on the relative
ordering of bits and save area ranges gets undermined.

> @@ -1136,9 +1136,16 @@ void pv_cpuid(struct cpu_user_regs *regs)
>      case XSTATE_CPUID:
>  
>          if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
> +        {
>              domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
> +            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);

The neighboring line tells you that this should be 7 instead of 0x07.

> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -206,15 +206,24 @@ XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
>  XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
>  XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
>  XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
> +XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions */
> +XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword Instrs */
>  XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
>  XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
>  XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
> +XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
>  XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
>  XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
> +XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
> +XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
> +XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
>  XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
> +XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word Instructions */
> +XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions */
>  
>  /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
>  XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
> +XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
>  XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
>  XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */

This lacks an adjustment to the dependencies between features in
xen/tools/gen-cpuid.py.

Jan
Luwei Kang June 28, 2016, 8:10 a.m. UTC | #2
Thanks for your advice, I will  make a change right now.


-----Original Message-----
From: Jan Beulich [mailto:JBeulich@suse.com] 
Sent: Tuesday, June 28, 2016 3:49 PM
To: Kang, Luwei <luwei.kang@intel.com>
Cc: andrew.cooper3@citrix.com; Peng, Chao P <chao.p.peng@intel.com>; Wang, Yong Y <yong.y.wang@intel.com>; xen-devel@lists.xen.org
Subject: Re: [PATCH] x86/cpuid: AVX-512 Feature Detection

>>> On 28.06.16 at 07:51, <luwei.kang@intel.com> wrote:
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3474,6 +3474,14 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
>                                    xstate_sizes[_XSTATE_BNDCSR]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);

I think this would better be three such statements, one per bit.
Otherwise the goal of not putting in assumptions on the relative ordering of bits and save area ranges gets undermined.

> @@ -1136,9 +1136,16 @@ void pv_cpuid(struct cpu_user_regs *regs)
>      case XSTATE_CPUID:
>  
>          if ( !is_control_domain(currd) && !is_hardware_domain(currd) 
> )
> +        {
>              domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
> +            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);

The neighboring line tells you that this should be 7 instead of 0x07.

> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -206,15 +206,24 @@ XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
>  XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
>  XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
>  XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
> +XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions */
> +XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword Instrs */
>  XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
>  XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
>  XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
> +XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
>  XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
>  XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
> +XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
> +XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
> +XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
>  XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
> +XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word Instructions */
> +XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions */
>  
>  /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
>  XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
> +XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
>  XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
>  XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */

This lacks an adjustment to the dependencies between features in xen/tools/gen-cpuid.py.

Jan
Andrew Cooper June 28, 2016, 8:46 a.m. UTC | #3
On 28/06/16 06:51, Luwei Kang wrote:
> @@ -1136,9 +1136,16 @@ void pv_cpuid(struct cpu_user_regs *regs)
>      case XSTATE_CPUID:
>  
>          if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
> +        {
>              domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
> +            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);
> +        }
>          else
> +        {
>              _ecx = cpuid_ecx(1);
> +            cpuid_count(0x07, 0, &tmp, &_ebx, &tmp, &tmp);
> +        }
> +

In addition to Jan's comments, having _ecx from one leaf and _ebx from a
different leaf collected at the same time is liable to cause confusion.

Please split the cpuid call for leaf 7 out from here, and put it in the
next hunk, just like the way the hvm_cpuid() side works.

~Andrew

>          _ecx &= pv_featureset[FEATURESET_1c];
>  
>          if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 63 )
> @@ -1157,6 +1164,14 @@ void pv_cpuid(struct cpu_user_regs *regs)
>                                 xstate_sizes[_XSTATE_YMM]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);
> +            }
> +
>              a = (uint32_t)xfeature_mask;
>              d = (uint32_t)(xfeature_mask >> 32);
>              c = xstate_size;
Luwei Kang June 28, 2016, 8:51 a.m. UTC | #4
OK, no problem.


-----Original Message-----
From: Andrew Cooper [mailto:andrew.cooper3@citrix.com] 
Sent: Tuesday, June 28, 2016 4:47 PM
To: Kang, Luwei <luwei.kang@intel.com>; xen-devel@lists.xen.org
Cc: jbeulich@suse.com; Wang, Yong Y <yong.y.wang@intel.com>; Peng, Chao P <chao.p.peng@intel.com>
Subject: Re: [PATCH] x86/cpuid: AVX-512 Feature Detection

On 28/06/16 06:51, Luwei Kang wrote:
> @@ -1136,9 +1136,16 @@ void pv_cpuid(struct cpu_user_regs *regs)
>      case XSTATE_CPUID:
>  
>          if ( !is_control_domain(currd) && !is_hardware_domain(currd) 
> )
> +        {
>              domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
> +            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);
> +        }
>          else
> +        {
>              _ecx = cpuid_ecx(1);
> +            cpuid_count(0x07, 0, &tmp, &_ebx, &tmp, &tmp);
> +        }
> +

In addition to Jan's comments, having _ecx from one leaf and _ebx from a different leaf collected at the same time is liable to cause confusion.

Please split the cpuid call for leaf 7 out from here, and put it in the next hunk, just like the way the hvm_cpuid() side works.

~Andrew

>          _ecx &= pv_featureset[FEATURESET_1c];
>  
>          if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 
> 63 ) @@ -1157,6 +1164,14 @@ void pv_cpuid(struct cpu_user_regs *regs)
>                                 xstate_sizes[_XSTATE_YMM]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);
> +            }
> +
>              a = (uint32_t)xfeature_mask;
>              d = (uint32_t)(xfeature_mask >> 32);
>              c = xstate_size;
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index c89ab6e..693afd5 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3474,6 +3474,14 @@  void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                   xstate_sizes[_XSTATE_BNDCSR]);
             }
 
+            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
             if ( _ecx & cpufeat_mask(X86_FEATURE_PKU) )
             {
                 xfeature_mask |= XSTATE_PKRU;
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 767d0b0..1c75e93 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -975,7 +975,7 @@  void pv_cpuid(struct cpu_user_regs *regs)
 
     switch ( leaf )
     {
-        uint32_t tmp, _ecx;
+        uint32_t tmp, _ecx, _ebx;
 
     case 0x00000001:
         c &= pv_featureset[FEATURESET_1c];
@@ -1136,9 +1136,16 @@  void pv_cpuid(struct cpu_user_regs *regs)
     case XSTATE_CPUID:
 
         if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+        {
             domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp);
+            domain_cpuid(currd, 0x07, 0, &tmp, &_ebx, &tmp, &tmp);
+        }
         else
+        {
             _ecx = cpuid_ecx(1);
+            cpuid_count(0x07, 0, &tmp, &_ebx, &tmp, &tmp);
+        }
+
         _ecx &= pv_featureset[FEATURESET_1c];
 
         if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 63 )
@@ -1157,6 +1164,14 @@  void pv_cpuid(struct cpu_user_regs *regs)
                                xstate_sizes[_XSTATE_YMM]);
             }
 
+            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
             a = (uint32_t)xfeature_mask;
             d = (uint32_t)(xfeature_mask >> 32);
             c = xstate_size;
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index 39acf8c..9320c9e 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -206,15 +206,24 @@  XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform QoS Monitoring */
 XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
 XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
 XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
+XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions */
+XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword Instrs */
 XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
 XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
 XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
+XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
 XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
 XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
+XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
+XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
+XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
 XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
+XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word Instructions */
+XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
 XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
+XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation Instrs */
 XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
 XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */