diff mbox series

[v2,07/16] x86/mce: Define BSP-only init

Message ID 20250213-wip-mca-updates-v2-7-3636547fe05f@amd.com (mailing list archive)
State New
Headers show
Series AMD MCA interrupts rework | expand

Commit Message

Yazen Ghannam Feb. 13, 2025, 4:45 p.m. UTC
Currently, MCA initialization is executed identically on each CPU as
they are brought online. However, a number of MCA initialization tasks
only need to be done once.

Define a function to collect all 'global' init tasks and call this from
the BSP only. Start with CPU features.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---

Notes:
    Link:
    https://lore.kernel.org/r/Y6yQpWtlnFmL04h6@zn.tnic
    
    v1->v2:
    * New in v2.

 arch/x86/include/asm/mce.h     |  2 ++
 arch/x86/kernel/cpu/common.c   |  1 +
 arch/x86/kernel/cpu/mce/amd.c  |  3 ---
 arch/x86/kernel/cpu/mce/core.c | 29 ++++++++++++++++++++++-------
 4 files changed, 25 insertions(+), 10 deletions(-)

Comments

Zhuo, Qiuxu Feb. 18, 2025, 3:16 a.m. UTC | #1
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> [...]
> diff --git a/arch/x86/kernel/cpu/mce/amd.c
> b/arch/x86/kernel/cpu/mce/amd.c index 302a310d0630..a4ef4ff1a7ff 100644
> --- a/arch/x86/kernel/cpu/mce/amd.c
> +++ b/arch/x86/kernel/cpu/mce/amd.c
> @@ -656,9 +656,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
>  	u32 low = 0, high = 0, address = 0;
>  	int offset = -1;
> 
> -	mce_flags.overflow_recov =
> cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
> -	mce_flags.succor	 =
> cpu_feature_enabled(X86_FEATURE_SUCCOR);
> -	mce_flags.smca		 =
> cpu_feature_enabled(X86_FEATURE_SMCA);
>  	mce_flags.amd_threshold	 = 1;
> 
> [...]
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> [...]
> +/* Called only on the boot CPU. */
> +void cpu_mca_init(struct cpuinfo_x86 *c) {
> +	u64 cap;
> +
> +	if (!mce_available(c))
> +		return;
> +
> +	mce_flags.overflow_recov =
> cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
> +	mce_flags.succor	 =
> cpu_feature_enabled(X86_FEATURE_SUCCOR);
> +	mce_flags.smca		 =
> cpu_feature_enabled(X86_FEATURE_SMCA);

1. Before this patch set, the above code was executed only if the following 
    condition was true. Do we still need this check?

    if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) 
    {
          The above code.
     }

2. Can " mce_flags.amd_threshold  = 1;" also be moved here?

-Qiuxu
Yazen Ghannam Feb. 19, 2025, 3:57 p.m. UTC | #2
On Tue, Feb 18, 2025 at 03:16:49AM +0000, Zhuo, Qiuxu wrote:
> > From: Yazen Ghannam <yazen.ghannam@amd.com>
> > [...]
> > diff --git a/arch/x86/kernel/cpu/mce/amd.c
> > b/arch/x86/kernel/cpu/mce/amd.c index 302a310d0630..a4ef4ff1a7ff 100644
> > --- a/arch/x86/kernel/cpu/mce/amd.c
> > +++ b/arch/x86/kernel/cpu/mce/amd.c
> > @@ -656,9 +656,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
> >  	u32 low = 0, high = 0, address = 0;
> >  	int offset = -1;
> > 
> > -	mce_flags.overflow_recov =
> > cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
> > -	mce_flags.succor	 =
> > cpu_feature_enabled(X86_FEATURE_SUCCOR);
> > -	mce_flags.smca		 =
> > cpu_feature_enabled(X86_FEATURE_SMCA);
> >  	mce_flags.amd_threshold	 = 1;
> > 
> > [...]
> > --- a/arch/x86/kernel/cpu/mce/core.c
> > +++ b/arch/x86/kernel/cpu/mce/core.c
> > [...]
> > +/* Called only on the boot CPU. */
> > +void cpu_mca_init(struct cpuinfo_x86 *c) {
> > +	u64 cap;
> > +
> > +	if (!mce_available(c))
> > +		return;
> > +
> > +	mce_flags.overflow_recov =
> > cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
> > +	mce_flags.succor	 =
> > cpu_feature_enabled(X86_FEATURE_SUCCOR);
> > +	mce_flags.smca		 =
> > cpu_feature_enabled(X86_FEATURE_SMCA);
> 
> 1. Before this patch set, the above code was executed only if the following 
>     condition was true. Do we still need this check?
> 
>     if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) 
>     {
>           The above code.
>      }

I don't think so. Feature checks should be independent of vendor, so the
vendor checks are redundant.

> 
> 2. Can " mce_flags.amd_threshold  = 1;" also be moved here?
> 

No, because this flag is not based on a CPU feature. However, I think
more cleanup can be done for this, but that will come later.

Thanks,
Yazen
Zhuo, Qiuxu Feb. 20, 2025, 1:37 a.m. UTC | #3
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> [...]
> > > --- a/arch/x86/kernel/cpu/mce/core.c
> > > +++ b/arch/x86/kernel/cpu/mce/core.c
> > > [...]
> > > +/* Called only on the boot CPU. */
> > > +void cpu_mca_init(struct cpuinfo_x86 *c) {
> > > +	u64 cap;
> > > +
> > > +	if (!mce_available(c))
> > > +		return;
> > > +
> > > +	mce_flags.overflow_recov =
> > > cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
> > > +	mce_flags.succor	 =
> > > cpu_feature_enabled(X86_FEATURE_SUCCOR);
> > > +	mce_flags.smca		 =
> > > cpu_feature_enabled(X86_FEATURE_SMCA);
> >
> > 1. Before this patch set, the above code was executed only if the following
> >     condition was true. Do we still need this check?
> >
> >     if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor ==
> X86_VENDOR_HYGON)
> >     {
> >           The above code.
> >      }
> 
> I don't think so. Feature checks should be independent of vendor, so the
> vendor checks are redundant.

Without the vendor check, the Intel CPUs will also run this code (which they should
*NOT* ) and may mistakenly set those global AMD-specific flags, which may result in
Intel CPUs performing AMD-specific error handling during the machine check.

-Qiuxu
Yazen Ghannam Feb. 20, 2025, 2:36 p.m. UTC | #4
On Thu, Feb 20, 2025 at 01:37:37AM +0000, Zhuo, Qiuxu wrote:
> > From: Yazen Ghannam <yazen.ghannam@amd.com>
> > [...]
> > > > --- a/arch/x86/kernel/cpu/mce/core.c
> > > > +++ b/arch/x86/kernel/cpu/mce/core.c
> > > > [...]
> > > > +/* Called only on the boot CPU. */
> > > > +void cpu_mca_init(struct cpuinfo_x86 *c) {
> > > > +	u64 cap;
> > > > +
> > > > +	if (!mce_available(c))
> > > > +		return;
> > > > +
> > > > +	mce_flags.overflow_recov =
> > > > cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
> > > > +	mce_flags.succor	 =
> > > > cpu_feature_enabled(X86_FEATURE_SUCCOR);
> > > > +	mce_flags.smca		 =
> > > > cpu_feature_enabled(X86_FEATURE_SMCA);
> > >
> > > 1. Before this patch set, the above code was executed only if the following
> > >     condition was true. Do we still need this check?
> > >
> > >     if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor ==
> > X86_VENDOR_HYGON)
> > >     {
> > >           The above code.
> > >      }
> > 
> > I don't think so. Feature checks should be independent of vendor, so the
> > vendor checks are redundant.
> 
> Without the vendor check, the Intel CPUs will also run this code (which they should
> *NOT* ) and may mistakenly set those global AMD-specific flags, which may result in
> Intel CPUs performing AMD-specific error handling during the machine check.
> 

It's okay if Intel CPUs run this code, because they don't support these
features. The feature flags are generally derived from CPUID bits, and
x86 vendors do try to make sure they are unique and not
overloaded/redefined between vendors.

The same is true in reverse. It's okay if AMD CPUs run code to check for
Intel-specific features. The feature checks will be false, and
feature-specific code will not be used.

Thanks,
Yazen
diff mbox series

Patch

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 36ff81c1b3b1..c98387364d6c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -241,12 +241,14 @@  struct cper_ia_proc_ctx;
 
 #ifdef CONFIG_X86_MCE
 int mcheck_init(void);
+void cpu_mca_init(struct cpuinfo_x86 *c);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
 void mcheck_cpu_clear(struct cpuinfo_x86 *c);
 int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
 			       u64 lapic_id);
 #else
 static inline int mcheck_init(void) { return 0; }
+static inline void cpu_mca_init(struct cpuinfo_x86 *c) {}
 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
 static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
 static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 76598a93a8fa..b14e2d98b45d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1649,6 +1649,7 @@  static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 		setup_clear_cpu_cap(X86_FEATURE_LA57);
 
 	detect_nopl();
+	cpu_mca_init(c);
 }
 
 void __init init_cpu_devs(void)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 302a310d0630..a4ef4ff1a7ff 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -656,9 +656,6 @@  void mce_amd_feature_init(struct cpuinfo_x86 *c)
 	u32 low = 0, high = 0, address = 0;
 	int offset = -1;
 
-	mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
-	mce_flags.succor	 = cpu_feature_enabled(X86_FEATURE_SUCCOR);
-	mce_flags.smca		 = cpu_feature_enabled(X86_FEATURE_SMCA);
 	mce_flags.amd_threshold	 = 1;
 
 	for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7fbf1c8291b8..f13d3f7ca56e 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1832,13 +1832,6 @@  static void __mcheck_cpu_cap_init(void)
 	this_cpu_write(mce_num_banks, b);
 
 	__mcheck_cpu_mce_banks_init();
-
-	/* Use accurate RIP reporting if available. */
-	if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
-		mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
-
-	if (cap & MCG_SER_P)
-		mca_cfg.ser = 1;
 }
 
 static void __mcheck_cpu_init_generic(void)
@@ -2238,6 +2231,28 @@  DEFINE_IDTENTRY_RAW(exc_machine_check)
 }
 #endif
 
+/* Called only on the boot CPU. */
+void cpu_mca_init(struct cpuinfo_x86 *c)
+{
+	u64 cap;
+
+	if (!mce_available(c))
+		return;
+
+	mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
+	mce_flags.succor	 = cpu_feature_enabled(X86_FEATURE_SUCCOR);
+	mce_flags.smca		 = cpu_feature_enabled(X86_FEATURE_SMCA);
+
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
+
+	/* Use accurate RIP reporting if available. */
+	if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
+		mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
+
+	if (cap & MCG_SER_P)
+		mca_cfg.ser = 1;
+}
+
 /*
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off: