diff mbox series

[v4] mce: prevent concurrent polling of MCE events

Message ID 20230717152317.GA94963@cathedrallabs.org (mailing list archive)
State New, archived
Headers show
Series [v4] mce: prevent concurrent polling of MCE events | expand

Commit Message

Aristeu Rozanski July 17, 2023, 3:23 p.m. UTC
On Intel microarchitectures that support CMCI but have it disabled (BIOS,
kernel option or CMCI storm code) the kernel will resort to polling for MCEs.
In these microarchitectures the IMC registers are shared by all CPUs in the
same package and despite the fact that the polling is set up in the kernel to
prevent all CPUs to poll at the same time, it's still possible they'll overlap
and report the same MCE multiple times.

This patch fixes this by introducing synchronization during polling only for
the affected microarchitectures.

v4: get rid of unneeded variable on intel_cmci_poll_unlock() and simplify unserialize_mc_bank_access()
v3: add {,un}serialize_mc_bank_access() as intermediate functions as requested by Tony Luck

Signed-off-by: Aristeu Rozanski <aris@ruivo.org>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Aristeu Rozanski <aris@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: linux-edac@vger.kernel.org

---
 arch/x86/kernel/cpu/mce/core.c     |   18 +++++++++++++++
 arch/x86/kernel/cpu/mce/intel.c    |   44 +++++++++++++++++++++++++++++++------
 arch/x86/kernel/cpu/mce/internal.h |    4 +++
 3 files changed, 59 insertions(+), 7 deletions(-)

Comments

Borislav Petkov July 19, 2023, 9:26 a.m. UTC | #1
On Mon, Jul 17, 2023 at 11:23:17AM -0400, Aristeu Rozanski wrote:
> On Intel microarchitectures that support CMCI but have it disabled (BIOS,
> kernel option or CMCI storm code) the kernel will resort to polling for MCEs.
> In these microarchitectures the IMC registers are shared by all CPUs in the
> same package and despite the fact that the polling is set up in the kernel to
> prevent all CPUs to poll at the same time, it's still possible they'll overlap
> and report the same MCE multiple times.
> 
> This patch fixes this by introducing synchronization during polling only for
> the affected microarchitectures.
> 
> v4: get rid of unneeded variable on intel_cmci_poll_unlock() and simplify unserialize_mc_bank_access()
> v3: add {,un}serialize_mc_bank_access() as intermediate functions as requested by Tony Luck
> 
> Signed-off-by: Aristeu Rozanski <aris@ruivo.org>
> Reviewed-by: Tony Luck <tony.luck@intel.com>
> Cc: Tony Luck <tony.luck@intel.com>
> Cc: Aristeu Rozanski <aris@redhat.com>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: linux-edac@vger.kernel.org
> 
> ---
>  arch/x86/kernel/cpu/mce/core.c     |   18 +++++++++++++++
>  arch/x86/kernel/cpu/mce/intel.c    |   44 +++++++++++++++++++++++++++++++------
>  arch/x86/kernel/cpu/mce/internal.h |    4 +++
>  3 files changed, 59 insertions(+), 7 deletions(-)

Does this work?

---

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 89e2aab5d34d..b8ad5a5b4026 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1608,6 +1608,13 @@ static void __start_timer(struct timer_list *t, unsigned long interval)
 	local_irq_restore(flags);
 }
 
+static void mc_poll_banks_default(void)
+{
+	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+}
+
+void (*mc_poll_banks)(void) = mc_poll_banks_default;
+
 static void mce_timer_fn(struct timer_list *t)
 {
 	struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1618,7 +1625,7 @@ static void mce_timer_fn(struct timer_list *t)
 	iv = __this_cpu_read(mce_next_interval);
 
 	if (mce_available(this_cpu_ptr(&cpu_info))) {
-		machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+		mc_poll_banks();
 
 		if (mce_intel_cmci_poll()) {
 			iv = mce_adjust_timer(iv);
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 95275a5e57e0..f5323551c1a9 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -56,6 +56,13 @@ static DEFINE_PER_CPU(int, cmci_backoff_cnt);
  */
 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 
+/*
+ * On systems that do support CMCI but it's disabled, polling for MCEs can
+ * cause the same event to be reported multiple times because IA32_MCi_STATUS
+ * is shared by the same package.
+ */
+static DEFINE_SPINLOCK(cmci_poll_lock);
+
 #define CMCI_THRESHOLD		1
 #define CMCI_POLL_INTERVAL	(30 * HZ)
 #define CMCI_STORM_INTERVAL	(HZ)
@@ -426,12 +433,22 @@ void cmci_disable_bank(int bank)
 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 }
 
+/* Bank polling function when CMCI is disabled. */
+static void cmci_mc_poll_banks(void)
+{
+	spin_lock(&cmci_poll_lock);
+	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+	spin_unlock(&cmci_poll_lock);
+}
+
 void intel_init_cmci(void)
 {
 	int banks;
 
-	if (!cmci_supported(&banks))
+	if (!cmci_supported(&banks)) {
+		mc_poll_banks = cmci_mc_poll_banks;
 		return;
+	}
 
 	mce_threshold_vector = intel_threshold_interrupt;
 	cmci_discover(banks);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index d2412ce2d312..ed4a71c0f093 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -274,4 +274,5 @@ static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
 	return 0;
 }
 
+extern void (*mc_poll_banks)(void);
 #endif /* __X86_MCE_INTERNAL_H__ */
Tony Luck July 19, 2023, 5:32 p.m. UTC | #2
> Does this work?

Yes.

Tested-by: Tony Luck <tony.luck@intel.com>

-Tony
Aristeu Rozanski July 19, 2023, 6:07 p.m. UTC | #3
On Wed, Jul 19, 2023 at 11:26:19AM +0200, Borislav Petkov wrote:
> Does this work?

Yes. Much better. Thanks.
diff mbox series

Patch

--- linus-2.6.orig/arch/x86/kernel/cpu/mce/core.c	2023-06-26 12:37:12.972386600 -0400
+++ linus-2.6/arch/x86/kernel/cpu/mce/core.c	2023-06-27 15:09:11.812208744 -0400
@@ -1578,6 +1578,22 @@  return 0;
 }
 #endif
 
+static bool serialize_mc_bank_access(void)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return false;
+
+	return intel_cmci_poll_lock();
+}
+
+static void unserialize_mc_bank_access(bool locked)
+{
+	if (!locked)
+		return;
+
+	intel_cmci_poll_unlock();
+}
+
 /*
  * Periodic polling timer for "silent" machine check errors.  If the
  * poller finds an MCE, poll 2x faster.  When the poller finds no more
@@ -1618,7 +1634,9 @@  static void mce_timer_fn(struct timer_li
 	iv = __this_cpu_read(mce_next_interval);
 
 	if (mce_available(this_cpu_ptr(&cpu_info))) {
+		bool locked = serialize_mc_bank_access();
 		machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+		unserialize_mc_bank_access(locked);
 
 		if (mce_intel_cmci_poll()) {
 			iv = mce_adjust_timer(iv);
--- linus-2.6.orig/arch/x86/kernel/cpu/mce/intel.c	2023-06-26 12:37:12.972386600 -0400
+++ linus-2.6/arch/x86/kernel/cpu/mce/intel.c	2023-06-27 15:09:31.672032470 -0400
@@ -73,13 +73,8 @@  enum {
 
 static atomic_t cmci_storm_on_cpus;
 
-static int cmci_supported(int *banks)
+static bool cmci_supported_hw(void)
 {
-	u64 cap;
-
-	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
-		return 0;
-
 	/*
 	 * Vendor check is not strictly needed, but the initial
 	 * initialization is vendor keyed and this
@@ -87,10 +82,24 @@  return 0;
 	 */
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
 	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
-		return 0;
+		return false;
 
 	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
+		return false;
+
+	return true;
+}
+
+static int cmci_supported(int *banks)
+{
+	u64 cap;
+
+	if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
+		return 0;
+
+	if (!cmci_supported_hw())
 		return 0;
+
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
 	return !!(cap & MCG_CMCI_P);
@@ -519,3 +528,24 @@  ((m->status & 0xa0000000ffffffff) == 0x8
 
 	return false;
 }
+
+/*
+ * On systems that do support CMCI but it's disabled, polling for MCEs can
+ * cause the same event to be reported multiple times because IA32_MCi_STATUS
+ * is shared by the same package.
+ */
+static DEFINE_SPINLOCK(cmci_poll_lock);
+bool intel_cmci_poll_lock(void)
+{
+	if (!cmci_supported_hw())
+		return false;
+
+	spin_lock(&cmci_poll_lock);
+
+	return true;
+}
+
+void intel_cmci_poll_unlock(void)
+{
+	spin_unlock(&cmci_poll_lock);
+}
--- linus-2.6.orig/arch/x86/kernel/cpu/mce/internal.h	2023-06-26 12:37:12.972386600 -0400
+++ linus-2.6/arch/x86/kernel/cpu/mce/internal.h	2023-06-27 15:09:53.180841560 -0400
@@ -49,6 +49,8 @@  void intel_init_cmci(void);
 void intel_init_lmce(void);
 void intel_clear_lmce(void);
 bool intel_filter_mce(struct mce *m);
+bool intel_cmci_poll_lock(void);
+void intel_cmci_poll_unlock(void);
 #else
 # define cmci_intel_adjust_timer mce_adjust_timer_default
 static inline bool mce_intel_cmci_poll(void) { return false; }
@@ -58,6 +60,8 @@  static inline void intel_init_cmci(void)
 static inline void intel_init_lmce(void) { }
 static inline void intel_clear_lmce(void) { }
 static inline bool intel_filter_mce(struct mce *m) { return false; }
+static inline bool intel_cmci_poll_lock(void) { return false; }
+static inline void intel_cmci_poll_unlock(void) { }
 #endif
 
 void mce_timer_kick(unsigned long interval);