diff mbox series

x86/cpuid: Don't use volatile asm statements

Message ID 1558365981-3175-1-git-send-email-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series x86/cpuid: Don't use volatile asm statements | expand

Commit Message

Andrew Cooper May 20, 2019, 3:26 p.m. UTC
Common use of the CPUID instruction operates without side effects.  Let the
compiler better optimise code by dropping the volatile qualifier.

The only place where order matters is for Intel microcode loading, where
executing a CPUID instruction is used for its side effect of updating
MSR_IA32_UCODE_REV.

The existing logic is buggy because GCC has been seen to reorder independent
asm volatile statements.  Opencode the two cases, with a compiler barrier to
enforce the correct ordering of operations.

While here, fix the comment, which isn't correct.  The SDM doesn't state that
a read of leaf 1 is required - just that a CPUID instruction is required.
Using leaf 0 results in better code generation, following the write of 0 to
MSR_IA32_UCODE_REV.

Suggested-by: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/microcode_intel.c  | 13 ++++++++-----
 xen/include/asm-x86/processor.h | 12 ++++++------
 2 files changed, 14 insertions(+), 11 deletions(-)

Comments

Jan Beulich May 20, 2019, 3:59 p.m. UTC | #1
>>> On 20.05.19 at 17:26, <andrew.cooper3@citrix.com> wrote:
> Common use of the CPUID instruction operates without side effects.  Let the
> compiler better optimise code by dropping the volatile qualifier.
> 
> The only place where order matters is for Intel microcode loading, where
> executing a CPUID instruction is used for its side effect of updating
> MSR_IA32_UCODE_REV.
> 
> The existing logic is buggy because GCC has been seen to reorder independent
> asm volatile statements.  Opencode the two cases, with a compiler barrier to
> enforce the correct ordering of operations.

I'm afraid I don't see how a "compiler barrier" helps here. Recall
that we only call it this way, in reality it's a memory clobber, and
that is all it means to the compiler. As a result, I don't think it'll
help order against the earlier WRMSR. If you want to enforce
order reliably, I think your only options are a single asm() doing
everything or the second asm() consuming an output (perhaps
a fake one) of the first one. I think that's also what in essence
the gcc doc says.

> While here, fix the comment, which isn't correct.  The SDM doesn't state that
> a read of leaf 1 is required - just that a CPUID instruction is required.
> Using leaf 0 results in better code generation, following the write of 0 to
> MSR_IA32_UCODE_REV.

Well, the question is - are we sure this is working as expected
nevertheless, on all models? The example in the SDM certainly
uses leaf 1.

> Suggested-by: Jan Beulich <JBeulich@suse.com>

Did I? Must have been quite some time back, as I don't really recall.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/microcode_intel.c b/xen/arch/x86/microcode_intel.c
index 22fdeca..088cfa2 100644
--- a/xen/arch/x86/microcode_intel.c
+++ b/xen/arch/x86/microcode_intel.c
@@ -99,6 +99,7 @@  static DEFINE_SPINLOCK(microcode_update_lock);
 static int collect_cpu_info(unsigned int cpu_num, struct cpu_signature *csig)
 {
     struct cpuinfo_x86 *c = &cpu_data[cpu_num];
+    unsigned long tmp;
     uint64_t msr_content;
 
     BUG_ON(cpu_num != smp_processor_id());
@@ -122,8 +123,9 @@  static int collect_cpu_info(unsigned int cpu_num, struct cpu_signature *csig)
     }
 
     wrmsrl(MSR_IA32_UCODE_REV, 0x0ULL);
-    /* As documented in the SDM: Do a CPUID 1 here */
-    cpuid_eax(1);
+    /* As documented in the SDM: Do a CPUID here. */
+    asm volatile ( "cpuid" : "=a" (tmp) : "a" (0)
+                   : "bx", "cx", "dx", "memory" );
 
     /* get the current revision from MSR 0x8B */
     rdmsrl(MSR_IA32_UCODE_REV, msr_content);
@@ -286,7 +288,7 @@  static int get_matching_microcode(const void *mc, unsigned int cpu)
 
 static int apply_microcode(unsigned int cpu)
 {
-    unsigned long flags;
+    unsigned long flags, tmp;
     uint64_t msr_content;
     unsigned int val[2];
     unsigned int cpu_num = raw_smp_processor_id();
@@ -305,8 +307,9 @@  static int apply_microcode(unsigned int cpu)
     wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)uci->mc.mc_intel->bits);
     wrmsrl(MSR_IA32_UCODE_REV, 0x0ULL);
 
-    /* As documented in the SDM: Do a CPUID 1 here */
-    cpuid_eax(1);
+    /* As documented in the SDM: Do a CPUID here. */
+    asm volatile ( "cpuid" : "=a" (tmp) : "a" (0)
+                   : "bx", "cx", "dx", "memory" );
 
     /* get the current revision from MSR 0x8B */
     rdmsrl(MSR_IA32_UCODE_REV, msr_content);
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index cef3ffb..3c76e7e 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -185,7 +185,7 @@  unsigned int apicid_to_socket(unsigned int);
  * resulting in stale register contents being returned.
  */
 #define cpuid(_op,_eax,_ebx,_ecx,_edx)          \
-    asm volatile ( "cpuid"                      \
+    asm ( "cpuid"                               \
           : "=a" (*(int *)(_eax)),              \
             "=b" (*(int *)(_ebx)),              \
             "=c" (*(int *)(_ecx)),              \
@@ -201,7 +201,7 @@  static inline void cpuid_count(
     unsigned int *ecx,
     unsigned int *edx)
 {
-    asm volatile ( "cpuid"
+    asm ( "cpuid"
           : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
           : "0" (op), "c" (count) );
 }
@@ -213,7 +213,7 @@  static always_inline unsigned int cpuid_eax(unsigned int op)
 {
     unsigned int eax;
 
-    asm volatile ( "cpuid"
+    asm ( "cpuid"
           : "=a" (eax)
           : "0" (op)
           : "bx", "cx", "dx" );
@@ -224,7 +224,7 @@  static always_inline unsigned int cpuid_ebx(unsigned int op)
 {
     unsigned int eax, ebx;
 
-    asm volatile ( "cpuid"
+    asm ( "cpuid"
           : "=a" (eax), "=b" (ebx)
           : "0" (op)
           : "cx", "dx" );
@@ -235,7 +235,7 @@  static always_inline unsigned int cpuid_ecx(unsigned int op)
 {
     unsigned int eax, ecx;
 
-    asm volatile ( "cpuid"
+    asm ( "cpuid"
           : "=a" (eax), "=c" (ecx)
           : "0" (op)
           : "bx", "dx" );
@@ -246,7 +246,7 @@  static always_inline unsigned int cpuid_edx(unsigned int op)
 {
     unsigned int eax, edx;
 
-    asm volatile ( "cpuid"
+    asm ( "cpuid"
           : "=a" (eax), "=d" (edx)
           : "0" (op)
           : "bx", "cx" );