@@ -114,6 +114,7 @@ void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
void smp_kick_mwait_play_dead(void);
+void mwait_play_dead_with_hint(unsigned int hint);
void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
@@ -151,7 +152,6 @@ static inline struct cpumask *cpu_l2c_shared_mask(int cpu)
{
return per_cpu(cpu_l2c_shared_map, cpu);
}
-
#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu) wbinvd()
static inline int wbinvd_on_all_cpus(void)
@@ -164,6 +164,8 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
return (struct cpumask *)cpumask_of(0);
}
+
+static inline void mwait_play_dead_with_hint(unsigned int eax_hint) { }
#endif /* CONFIG_SMP */
#ifdef CONFIG_DEBUG_NMI_SELFTEST
@@ -1272,13 +1272,57 @@ void play_dead_common(void)
local_irq_disable();
}
+void __noreturn mwait_play_dead_with_hint(unsigned int eax_hint)
+{
+ struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
+
+ /* Set up state for the kexec() hack below */
+ md->status = CPUDEAD_MWAIT_WAIT;
+ md->control = CPUDEAD_MWAIT_WAIT;
+
+ wbinvd();
+
+ while (1) {
+ /*
+ * The CLFLUSH is a workaround for erratum AAI65 for
+ * the Xeon 7400 series. It's not clear it is actually
+ * needed, but it should be harmless in either case.
+ * The WBINVD is insufficient due to the spurious-wakeup
+ * case where we return around the loop.
+ */
+ mb();
+ clflush(md);
+ mb();
+ __monitor(md, 0, 0);
+ mb();
+ __mwait(eax_hint, 0);
+
+ if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
+ /*
+ * Kexec is about to happen. Don't go back into mwait() as
+ * the kexec kernel might overwrite text and data including
+ * page tables and stack. So mwait() would resume when the
+ * monitor cache line is written to and then the CPU goes
+ * south due to overwritten text, page tables and stack.
+ *
+ * Note: This does _NOT_ protect against a stray MCE, NMI,
+ * SMI. They will resume execution at the instruction
+ * following the HLT instruction and run into the problem
+ * which this is trying to prevent.
+ */
+ WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
+ while(1)
+ native_halt();
+ }
+ }
+}
+
/*
* We need to flush the caches before going to sleep, lest we have
* dirty data in our caches when we come back up.
*/
static inline void mwait_play_dead(void)
{
- struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
unsigned int eax, ebx, ecx, edx;
unsigned int highest_cstate = 0;
unsigned int highest_subcstate = 0;
@@ -1316,45 +1360,7 @@ static inline void mwait_play_dead(void)
(highest_subcstate - 1);
}
- /* Set up state for the kexec() hack below */
- md->status = CPUDEAD_MWAIT_WAIT;
- md->control = CPUDEAD_MWAIT_WAIT;
-
- wbinvd();
-
- while (1) {
- /*
- * The CLFLUSH is a workaround for erratum AAI65 for
- * the Xeon 7400 series. It's not clear it is actually
- * needed, but it should be harmless in either case.
- * The WBINVD is insufficient due to the spurious-wakeup
- * case where we return around the loop.
- */
- mb();
- clflush(md);
- mb();
- __monitor(md, 0, 0);
- mb();
- __mwait(eax, 0);
-
- if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
- /*
- * Kexec is about to happen. Don't go back into mwait() as
- * the kexec kernel might overwrite text and data including
- * page tables and stack. So mwait() would resume when the
- * monitor cache line is written to and then the CPU goes
- * south due to overwritten text, page tables and stack.
- *
- * Note: This does _NOT_ protect against a stray MCE, NMI,
- * SMI. They will resume execution at the instruction
- * following the HLT instruction and run into the problem
- * which this is trying to prevent.
- */
- WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
- while(1)
- native_halt();
- }
- }
+ mwait_play_dead_with_hint(eax);
}
/*
The MWAIT instruction needs different hints on different CPUs to reach specific idle states. The current hint calculation in mwait_play_dead() code works in practice on current Intel hardware, but is not documented and fails on a recent Intel's Sierra Forest and possibly some future ones. Those newer CPUs' power efficiency suffers when the CPU is put offline. Allow cpuidle code to provide mwait_play_dead with a known hint for efficient play_dead code. Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com> --- arch/x86/include/asm/smp.h | 4 +- arch/x86/kernel/smpboot.c | 86 ++++++++++++++++++++------------------ 2 files changed, 49 insertions(+), 41 deletions(-)