Message ID | 20240909104349.3349-2-TonyWWang-oc@zhaoxin.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | x86/mce: Add Zhaoxin MCE support | expand |
On Mon, Sep 09, 2024 at 06:43:47PM +0800, Tony W Wang-oc wrote: > @@ -1970,6 +1974,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) > } > } > > + if (c->x86_vendor == X86_VENDOR_CENTAUR) { > + /* > + * All newer Centaur CPUs support MCE broadcasting. Enable > + * synchronization with a one second timeout. > + */ > + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || > + c->x86 > 6) { > + if (cfg->monarch_timeout < 0) > + cfg->monarch_timeout = USEC_PER_SEC; > + } > + } So if centaur == zhaoxin, why aren't you moving this hunk to mce_zhaoxin_feature_init() instead?
On Mon, Sep 09, 2024 at 06:43:47PM +0800, Tony W Wang-oc wrote: > From: Lyle Li <LyleLi@zhaoxin.com> > > Zhaoxin consists of two vendors, X86_VENDOR_ZHAOXIN and > X86_VENDOR_CENTAUR, so add the centaur vendor to support > Zhaoxin MCA in mce/core.c and mce/intel.c. > > Signed-off-by: Lyle Li <LyleLi@zhaoxin.com> > Reviewed-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com> If you're sending someone else's patch, then you need to add your SOB too. Please educate yourself on the process: https://kernel.org/doc/html/latest/process/submitting-patches.html
On 9/13/24 07:27, Borislav Petkov wrote: >> + if (c->x86_vendor == X86_VENDOR_CENTAUR) { >> + /* >> + * All newer Centaur CPUs support MCE broadcasting. Enable >> + * synchronization with a one second timeout. >> + */ >> + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || >> + c->x86 > 6) { >> + if (cfg->monarch_timeout < 0) >> + cfg->monarch_timeout = USEC_PER_SEC; >> + } >> + } > So if centaur == zhaoxin, why aren't you moving this hunk to > mce_zhaoxin_feature_init() instead? The centaur and zhaoxin logic is also _really_ close here: > if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { > if (cfg->monarch_timeout < 0) > cfg->monarch_timeout = USEC_PER_SEC; > } vs > if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || > c->x86 > 6) { > if (cfg->monarch_timeout < 0) > cfg->monarch_timeout = USEC_PER_SEC; > } I'd just randomly guess that the zhaoxin version is buggy because it doesn't do a c->x86 check before the "(c->x86_model == 0x19 || c->x86_model == 0x1f)". So instead of copying and pasting the same block over and over, can we consolidate it a bit? foo() { /* Older CPUs do not do MCE broadcast: */ if (c->x86 < 6) return; /* All newer ones do: */ if (c->x86 > 6) goto mce_broadcast; /* Family 6 is mixed: */ if (c->x86_vendor == X86_VENDOR_CENTAUR) { if (c->x86_model == 0xf && c->x86_stepping >= 0xe) goto mce_broadcast; } else if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { if (c->x86_model == 0x19 || c->x86_model == 0x1f)) goto mce_broadcast; } return; mce_broadcast: if (cfg->monarch_timeout < 0) cfg->monarch_timeout = USEC_PER_SEC; } Heck, the Intel code can even go in there I think. Wouldn't that tell the story a bit better?
On 2024/9/13 22:27, Borislav Petkov wrote: > > > [这封邮件来自外部发件人 谨防风险] > > On Mon, Sep 09, 2024 at 06:43:47PM +0800, Tony W Wang-oc wrote: >> @@ -1970,6 +1974,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) >> } >> } >> >> + if (c->x86_vendor == X86_VENDOR_CENTAUR) { >> + /* >> + * All newer Centaur CPUs support MCE broadcasting. Enable >> + * synchronization with a one second timeout. >> + */ >> + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || >> + c->x86 > 6) { >> + if (cfg->monarch_timeout < 0) >> + cfg->monarch_timeout = USEC_PER_SEC; >> + } >> + } > > So if centaur == zhaoxin, why aren't you moving this hunk to > mce_zhaoxin_feature_init() instead? > > Ok, will adjust the patch in the next version. Sincerely! TonyWWang-oc
On 2024/9/13 23:47, Dave Hansen wrote: > > > [这封邮件来自外部发件人 谨防风险] > > On 9/13/24 07:27, Borislav Petkov wrote: >>> + if (c->x86_vendor == X86_VENDOR_CENTAUR) { >>> + /* >>> + * All newer Centaur CPUs support MCE broadcasting. Enable >>> + * synchronization with a one second timeout. >>> + */ >>> + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || >>> + c->x86 > 6) { >>> + if (cfg->monarch_timeout < 0) >>> + cfg->monarch_timeout = USEC_PER_SEC; >>> + } >>> + } >> So if centaur == zhaoxin, why aren't you moving this hunk to >> mce_zhaoxin_feature_init() instead? > > The centaur and zhaoxin logic is also _really_ close here: > >> if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { >> if (cfg->monarch_timeout < 0) >> cfg->monarch_timeout = USEC_PER_SEC; >> } > > vs > >> if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || >> c->x86 > 6) { >> if (cfg->monarch_timeout < 0) >> cfg->monarch_timeout = USEC_PER_SEC; >> } > > I'd just randomly guess that the zhaoxin version is buggy because it > doesn't do a c->x86 check before the "(c->x86_model == 0x19 || > c->x86_model == 0x1f)". > Yes, the check for c->x86 == 6 is omitted in the zhaoxin version. > So instead of copying and pasting the same block over and over, can we > consolidate it a bit? > > foo() > { > /* Older CPUs do not do MCE broadcast: */ > if (c->x86 < 6) > return; > /* All newer ones do: */ > if (c->x86 > 6) > goto mce_broadcast; > > /* Family 6 is mixed: */ > if (c->x86_vendor == X86_VENDOR_CENTAUR) { > if (c->x86_model == 0xf && > c->x86_stepping >= 0xe) > goto mce_broadcast; > } else if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { > if (c->x86_model == 0x19 || > c->x86_model == 0x1f)) > goto mce_broadcast; > } > > return; > > mce_broadcast: > if (cfg->monarch_timeout < 0) > cfg->monarch_timeout = USEC_PER_SEC; > } > Thank you! That makes more sense, and will adopt it into zhaoxin.c Sincerely! TonyWWang-oc
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index ad0623b65..b7b98c33a 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -496,6 +496,7 @@ bool mce_usable_address(struct mce *m) case X86_VENDOR_INTEL: case X86_VENDOR_ZHAOXIN: + case X86_VENDOR_CENTAUR: return intel_mce_usable_address(m); default: @@ -513,6 +514,7 @@ bool mce_is_memory_error(struct mce *m) case X86_VENDOR_INTEL: case X86_VENDOR_ZHAOXIN: + case X86_VENDOR_CENTAUR: /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes * @@ -1247,7 +1249,8 @@ static noinstr bool mce_check_crashing_cpu(void) mcgstatus = __rdmsr(MSR_IA32_MCG_STATUS); - if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) { + if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { if (mcgstatus & MCG_STATUS_LMCES) return false; } @@ -1521,7 +1524,8 @@ noinstr void do_machine_check(struct pt_regs *regs) * on Intel, Zhaoxin only. */ if (m.cpuvendor == X86_VENDOR_INTEL || - m.cpuvendor == X86_VENDOR_ZHAOXIN) + m.cpuvendor == X86_VENDOR_ZHAOXIN || + m.cpuvendor == X86_VENDOR_CENTAUR) lmce = m.mcgstatus & MCG_STATUS_LMCES; /* @@ -1970,6 +1974,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) } } + if (c->x86_vendor == X86_VENDOR_CENTAUR) { + /* + * All newer Centaur CPUs support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || + c->x86 > 6) { + if (cfg->monarch_timeout < 0) + cfg->monarch_timeout = USEC_PER_SEC; + } + } + if (cfg->monarch_timeout < 0) cfg->monarch_timeout = 0; if (cfg->bootlog != 0) @@ -2012,21 +2028,6 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) } } -static void mce_centaur_feature_init(struct cpuinfo_x86 *c) -{ - struct mca_config *cfg = &mca_cfg; - - /* - * All newer Centaur CPUs support MCE broadcasting. Enable - * synchronization with a one second timeout. - */ - if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || - c->x86 > 6) { - if (cfg->monarch_timeout < 0) - cfg->monarch_timeout = USEC_PER_SEC; - } -} - static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); @@ -2072,9 +2073,6 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) break; case X86_VENDOR_CENTAUR: - mce_centaur_feature_init(c); - break; - case X86_VENDOR_ZHAOXIN: mce_zhaoxin_feature_init(c); break; @@ -2092,6 +2090,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) break; case X86_VENDOR_ZHAOXIN: + case X86_VENDOR_CENTAUR: mce_zhaoxin_feature_clear(c); break; @@ -2401,7 +2400,8 @@ static void vendor_disable_error_reporting(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON || boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) + boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) return; mce_disable_error_reporting(); diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index f6103e6bf..b7e67f4f7 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -88,7 +88,8 @@ static int cmci_supported(int *banks) * makes sure none of the backdoors are entered otherwise. */ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && - boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) + boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN && + boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) return 0; if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)