From 20a54d54ea06f050650ab8923b7d9ee1d21b5317 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 11 Dec 2013 16:31:04 -0800
Subject: [PATCH] x86, mwait: Use a dedicated percpu area for mwait doorbell
We have used the cache line that includes thread_info.flags as the
mwait doorbell. However, this is liable to be dirty in the cache,
which may trigger hardware errata, plus it might cause false wakeups.
Instead, use a dedicated wakeup doorbell area.
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
arch/x86/include/asm/cpufeature.h | 1 -
arch/x86/include/asm/mwait.h | 46 ++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/processor.h | 23 -------------------
arch/x86/kernel/acpi/cstate.c | 6 +----
arch/x86/kernel/cpu/common.c | 17 ++++++++++++++
arch/x86/kernel/cpu/intel.c | 3 ---
arch/x86/kernel/setup_percpu.c | 3 +++
arch/x86/kernel/smpboot.c | 19 +---------------
drivers/acpi/acpi_pad.c | 3 +--
drivers/idle/intel_idle.c | 4 +---
drivers/thermal/intel_powerclamp.c | 2 +-
11 files changed, 71 insertions(+), 56 deletions(-)
@@ -96,7 +96,6 @@
#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */
#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
@@ -13,4 +13,50 @@
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+
+static inline void __monitor(const void *eax, unsigned long ecx,
+ unsigned long edx)
+{
+ /* "monitor %eax, %ecx, %edx;" */
+ asm volatile(".byte 0x0f, 0x01, 0xc8;"
+ :: "a" (eax), "c" (ecx), "d"(edx));
+}
+
+static inline void __mwait(unsigned long eax, unsigned long ecx)
+{
+ /* "mwait %eax, %ecx;" */
+ asm volatile(".byte 0x0f, 0x01, 0xc9;"
+ :: "a" (eax), "c" (ecx));
+}
+
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+ trace_hardirqs_on();
+ /* "mwait %eax, %ecx;" */
+ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+ :: "a" (eax), "c" (ecx));
+}
+
+extern char __percpu *mwait_doorbell;
+
+void __init setup_mwait_doorbell(void);
+
+static inline void x86_monitor_doorbell(unsigned long ecx, unsigned long edx)
+{
+ __monitor(__this_cpu_ptr(mwait_doorbell), ecx, edx);
+ mb();
+}
+
+static inline void x86_mwait_doorbell_bing_bong(int cpu)
+{
+ ACCESS_ONCE(*per_cpu_ptr(mwait_doorbell, cpu)) = 0;
+}
+
+#endif /* !__ASSEMBLY__ */
+
#endif /* _ASM_X86_MWAIT_H */
@@ -723,29 +723,6 @@ static inline void sync_core(void)
#endif
}
-static inline void __monitor(const void *eax, unsigned long ecx,
- unsigned long edx)
-{
- /* "monitor %eax, %ecx, %edx;" */
- asm volatile(".byte 0x0f, 0x01, 0xc8;"
- :: "a" (eax), "c" (ecx), "d"(edx));
-}
-
-static inline void __mwait(unsigned long eax, unsigned long ecx)
-{
- /* "mwait %eax, %ecx;" */
- asm volatile(".byte 0x0f, 0x01, 0xc9;"
- :: "a" (eax), "c" (ecx));
-}
-
-static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
-{
- trace_hardirqs_on();
- /* "mwait %eax, %ecx;" */
- asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
- :: "a" (eax), "c" (ecx));
-}
-
extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern void init_amd_e400_c1e_mask(void);
@@ -163,11 +163,7 @@ EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
if (!need_resched()) {
- if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
- clflush((void *)¤t_thread_info()->flags);
-
- __monitor((void *)¤t_thread_info()->flags, 0, 0);
- smp_mb();
+ x86_monitor_doorbell(0, 0);
if (!need_resched())
__mwait(ax, cx);
}
@@ -24,6 +24,7 @@
#include <linux/cpumask.h>
#include <asm/pgtable.h>
#include <linux/atomic.h>
+#include <asm/mwait.h>
#include <asm/proto.h>
#include <asm/setup.h>
#include <asm/apic.h>
@@ -63,6 +64,22 @@ void __init setup_cpu_local_masks(void)
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
}
+/* allocate percpu area for mwait doorbell */
+char __percpu *mwait_doorbell;
+
+void __init setup_mwait_doorbell(void)
+{
+ if (boot_cpu_has(X86_FEATURE_MWAIT)) {
+ mwait_doorbell = __alloc_percpu(boot_cpu_data.clflush_size,
+ boot_cpu_data.clflush_size);
+
+ if (!mwait_doorbell) {
+ /* This should never happen... */
+ panic("Unable to allocate mwait doorbell!\n");
+ }
+ }
+}
+
static void default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
@@ -387,9 +387,6 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_PEBS);
}
- if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
- set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
-
#ifdef CONFIG_X86_64
if (c->x86 == 15)
c->x86_cache_alignment = c->x86_clflush_size * 2;
@@ -284,4 +284,7 @@ void __init setup_per_cpu_areas(void)
/* Setup cpu initialized, callin, callout masks */
setup_cpu_local_masks();
+
+ /* Setup MWAIT doorbell */
+ setup_mwait_doorbell();
}
@@ -1368,7 +1368,6 @@ static inline void mwait_play_dead(void)
unsigned int eax, ebx, ecx, edx;
unsigned int highest_cstate = 0;
unsigned int highest_subcstate = 0;
- void *mwait_ptr;
int i;
if (!this_cpu_has(X86_FEATURE_MWAIT))
@@ -1400,26 +1399,10 @@ static inline void mwait_play_dead(void)
(highest_subcstate - 1);
}
- /*
- * This should be a memory location in a cache line which is
- * unlikely to be touched by other processors. The actual
- * content is immaterial as it is not actually modified in any way.
- */
- mwait_ptr = ¤t_thread_info()->flags;
-
wbinvd();
while (1) {
- /*
- * The CLFLUSH is a workaround for erratum AAI65 for
- * the Xeon 7400 series. It's not clear it is actually
- * needed, but it should be harmless in either case.
- * The WBINVD is insufficient due to the spurious-wakeup
- * case where we return around the loop.
- */
- clflush(mwait_ptr);
- __monitor(mwait_ptr, 0, 0);
- mb();
+ x86_monitor_doorbell(0, 0);
__mwait(eax, 0);
/*
* If NMI wants to wake up CPU0, start CPU0.
@@ -193,8 +193,7 @@ static int power_saving_thread(void *data)
CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
stop_critical_timings();
- __monitor((void *)¤t_thread_info()->flags, 0, 0);
- smp_mb();
+ x86_monitor_doorbell(0, 0);
if (!need_resched())
__mwait(power_saving_mwait_eax, 1);
@@ -376,9 +376,7 @@ static int intel_idle(struct cpuidle_device *dev,
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
if (!current_set_polling_and_test()) {
-
- __monitor((void *)¤t_thread_info()->flags, 0, 0);
- smp_mb();
+ x86_monitor_doorbell(0, 0);
if (!need_resched())
__mwait(eax, ecx);
}
@@ -438,7 +438,7 @@ static int clamp_thread(void *arg)
*/
local_touch_nmi();
stop_critical_timings();
- __monitor((void *)¤t_thread_info()->flags, 0, 0);
+ x86_monitor_doorbell(0, 0);
cpu_relax(); /* allow HT sibling to run */
__mwait(eax, ecx);
start_critical_timings();
--
1.8.3.1