Message ID | 20230717152317.GA94963@cathedrallabs.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v4] mce: prevent concurrent polling of MCE events | expand |
On Mon, Jul 17, 2023 at 11:23:17AM -0400, Aristeu Rozanski wrote: > On Intel microarchitectures that support CMCI but have it disabled (BIOS, > kernel option or CMCI storm code) the kernel will resort to polling for MCEs. > In these microarchitectures the IMC registers are shared by all CPUs in the > same package and despite the fact that the polling is set up in the kernel to > prevent all CPUs to poll at the same time, it's still possible they'll overlap > and report the same MCE multiple times. > > This patch fixes this by introducing synchronization during polling only for > the affected microarchitectures. > > v4: get rid of unneeded variable on intel_cmci_poll_unlock() and simplify unserialize_mc_bank_access() > v3: add {,un}serialize_mc_bank_access() as intermediate functions as requested by Tony Luck > > Signed-off-by: Aristeu Rozanski <aris@ruivo.org> > Reviewed-by: Tony Luck <tony.luck@intel.com> > Cc: Tony Luck <tony.luck@intel.com> > Cc: Aristeu Rozanski <aris@redhat.com> > Cc: Borislav Petkov <bp@alien8.de> > Cc: linux-edac@vger.kernel.org > > --- > arch/x86/kernel/cpu/mce/core.c | 18 +++++++++++++++ > arch/x86/kernel/cpu/mce/intel.c | 44 +++++++++++++++++++++++++++++++------ > arch/x86/kernel/cpu/mce/internal.h | 4 +++ > 3 files changed, 59 insertions(+), 7 deletions(-) Does this work? --- diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 89e2aab5d34d..b8ad5a5b4026 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1608,6 +1608,13 @@ static void __start_timer(struct timer_list *t, unsigned long interval) local_irq_restore(flags); } +static void mc_poll_banks_default(void) +{ + machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); +} + +void (*mc_poll_banks)(void) = mc_poll_banks_default; + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1618,7 +1625,7 @@ static void mce_timer_fn(struct timer_list *t) iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { - machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); + mc_poll_banks(); if (mce_intel_cmci_poll()) { iv = mce_adjust_timer(iv); diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 95275a5e57e0..f5323551c1a9 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -56,6 +56,13 @@ static DEFINE_PER_CPU(int, cmci_backoff_cnt); */ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); +/* + * On systems that do support CMCI but it's disabled, polling for MCEs can + * cause the same event to be reported multiple times because IA32_MCi_STATUS + * is shared by the same package. + */ +static DEFINE_SPINLOCK(cmci_poll_lock); + #define CMCI_THRESHOLD 1 #define CMCI_POLL_INTERVAL (30 * HZ) #define CMCI_STORM_INTERVAL (HZ) @@ -426,12 +433,22 @@ void cmci_disable_bank(int bank) raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } +/* Bank polling function when CMCI is disabled. */ +static void cmci_mc_poll_banks(void) +{ + spin_lock(&cmci_poll_lock); + machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); + spin_unlock(&cmci_poll_lock); +} + void intel_init_cmci(void) { int banks; - if (!cmci_supported(&banks)) + if (!cmci_supported(&banks)) { + mc_poll_banks = cmci_mc_poll_banks; return; + } mce_threshold_vector = intel_threshold_interrupt; cmci_discover(banks); diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index d2412ce2d312..ed4a71c0f093 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -274,4 +274,5 @@ static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg) return 0; } +extern void (*mc_poll_banks)(void); #endif /* __X86_MCE_INTERNAL_H__ */
> Does this work? Yes. Tested-by: Tony Luck <tony.luck@intel.com> -Tony
On Wed, Jul 19, 2023 at 11:26:19AM +0200, Borislav Petkov wrote:
> Does this work?
Yes. Much better. Thanks.
--- linus-2.6.orig/arch/x86/kernel/cpu/mce/core.c 2023-06-26 12:37:12.972386600 -0400 +++ linus-2.6/arch/x86/kernel/cpu/mce/core.c 2023-06-27 15:09:11.812208744 -0400 @@ -1578,6 +1578,22 @@ return 0; } #endif +static bool serialize_mc_bank_access(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + return intel_cmci_poll_lock(); +} + +static void unserialize_mc_bank_access(bool locked) +{ + if (!locked) + return; + + intel_cmci_poll_unlock(); +} + /* * Periodic polling timer for "silent" machine check errors. If the * poller finds an MCE, poll 2x faster. When the poller finds no more @@ -1618,7 +1634,9 @@ static void mce_timer_fn(struct timer_li iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { + bool locked = serialize_mc_bank_access(); machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); + unserialize_mc_bank_access(locked); if (mce_intel_cmci_poll()) { iv = mce_adjust_timer(iv); --- linus-2.6.orig/arch/x86/kernel/cpu/mce/intel.c 2023-06-26 12:37:12.972386600 -0400 +++ linus-2.6/arch/x86/kernel/cpu/mce/intel.c 2023-06-27 15:09:31.672032470 -0400 @@ -73,13 +73,8 @@ enum { static atomic_t cmci_storm_on_cpus; -static int cmci_supported(int *banks) +static bool cmci_supported_hw(void) { - u64 cap; - - if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) - return 0; - /* * Vendor check is not strictly needed, but the initial * initialization is vendor keyed and this @@ -87,10 +82,24 @@ return 0; */ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) - return 0; + return false; if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) + return false; + + return true; +} + +static int cmci_supported(int *banks) +{ + u64 cap; + + if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) + return 0; + + if (!cmci_supported_hw()) return 0; + rdmsrl(MSR_IA32_MCG_CAP, cap); *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); return !!(cap & MCG_CMCI_P); @@ -519,3 +528,24 @@ ((m->status & 0xa0000000ffffffff) == 0x8 return false; } + +/* + * On systems that do support CMCI but it's disabled, polling for MCEs can + * cause the same event to be reported multiple times because IA32_MCi_STATUS + * is shared by the same package. + */ +static DEFINE_SPINLOCK(cmci_poll_lock); +bool intel_cmci_poll_lock(void) +{ + if (!cmci_supported_hw()) + return false; + + spin_lock(&cmci_poll_lock); + + return true; +} + +void intel_cmci_poll_unlock(void) +{ + spin_unlock(&cmci_poll_lock); +} --- linus-2.6.orig/arch/x86/kernel/cpu/mce/internal.h 2023-06-26 12:37:12.972386600 -0400 +++ linus-2.6/arch/x86/kernel/cpu/mce/internal.h 2023-06-27 15:09:53.180841560 -0400 @@ -49,6 +49,8 @@ void intel_init_cmci(void); void intel_init_lmce(void); void intel_clear_lmce(void); bool intel_filter_mce(struct mce *m); +bool intel_cmci_poll_lock(void); +void intel_cmci_poll_unlock(void); #else # define cmci_intel_adjust_timer mce_adjust_timer_default static inline bool mce_intel_cmci_poll(void) { return false; } @@ -58,6 +60,8 @@ static inline void intel_init_cmci(void) static inline void intel_init_lmce(void) { } static inline void intel_clear_lmce(void) { } static inline bool intel_filter_mce(struct mce *m) { return false; } +static inline bool intel_cmci_poll_lock(void) { return false; } +static inline void intel_cmci_poll_unlock(void) { } #endif void mce_timer_kick(unsigned long interval);