diff mbox series

[v4,2/4] x86/mce, EDAC/mce_amd: Add support for new MCA_SYND{1,2} registers

Message ID 20240815211635.1336721-3-avadhut.naik@amd.com (mailing list archive)
State New
Headers show
Series MCE wrapper and support for new SMCA syndrome MSRs | expand

Commit Message

Avadhut Naik Aug. 15, 2024, 9:16 p.m. UTC
Starting with Zen4, AMD's Scalable MCA systems incorporate two new
registers: MCA_SYND1 and MCA_SYND2.

These registers will include supplemental error information in addition
to the existing MCA_SYND register. The data within these registers is
considered valid if MCA_STATUS[SyndV] is set.

Userspace error decoding tools like the rasdaemon gather related hardware
error information through the tracepoints. As such, these two registers
should be exported through the mce_record tracepoint so that tools like
rasdaemon can parse them and output the supplemental error information
like FRU Text contained in them.

[Yazen: Drop Yazen's Co-developed-by tag and moved SoB tag.]

Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
---
Changes in v2:
[1] https://lore.kernel.org/linux-edac/20240521125434.1555845-1-yazen.ghannam@amd.com/
[2] https://lore.kernel.org/linux-edac/20240523155641.2805411-1-yazen.ghannam@amd.com/

1. Drop dependencies on sets [1] and [2] above and rebase on top of
tip/master.

Changes in v3:
1. Move wrapper changes required in mce_read_aux() and mce_no_way_out()
from this patch to the first patch.
2. Add comments to explain the new wrapper's purpose.
3. Modify commit message per feedback received.
4. Fix SoB chain to properly reflect the patch path.

Changes in v4:
1. Rebase on top of tip/master to avoid merge conflicts.
---
 arch/x86/include/asm/mce.h      | 22 ++++++++++++++++++++++
 arch/x86/include/uapi/asm/mce.h |  3 ++-
 arch/x86/kernel/cpu/mce/amd.c   |  5 ++++-
 arch/x86/kernel/cpu/mce/core.c  |  9 ++++++++-
 drivers/edac/mce_amd.c          | 10 +++++++---
 include/trace/events/mce.h      |  9 +++++++--
 6 files changed, 50 insertions(+), 8 deletions(-)

Comments

Steven Rostedt Aug. 21, 2024, 5:40 p.m. UTC | #1
On Thu, 15 Aug 2024 16:16:33 -0500
Avadhut Naik <avadhut.naik@amd.com> wrote:

> diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
> index 65aba1afcd07..1e7d5696b3ba 100644
> --- a/include/trace/events/mce.h
> +++ b/include/trace/events/mce.h
> @@ -43,6 +43,8 @@ TRACE_EVENT(mce_record,
>  		__field(	u8,		bank		)
>  		__field(	u8,		cpuvendor	)
>  		__field(	u32,		microcode	)
> +		__field(	u8,		len	)

You don't need to save the length. It's already saved in the
__dynamic_array meta data.

> +		__dynamic_array(u8, v_data, sizeof(err->vendor))
>  	),
>  
>  	TP_fast_assign(
> @@ -65,9 +67,11 @@ TRACE_EVENT(mce_record,
>  		__entry->bank		= err->m.bank;
>  		__entry->cpuvendor	= err->m.cpuvendor;
>  		__entry->microcode	= err->m.microcode;
> +		__entry->len		= sizeof(err->vendor);
> +		memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor));
>  	),
>  
> -	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x",
> +	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s",
>  		__entry->cpu,
>  		__entry->mcgcap, __entry->mcgstatus,
>  		__entry->bank, __entry->status,
> @@ -83,7 +87,8 @@ TRACE_EVENT(mce_record,
>  		__entry->walltime,
>  		__entry->socketid,
>  		__entry->apicid,
> -		__entry->microcode)
> +		__entry->microcode,
> +		__print_array(__get_dynamic_array(v_data), __entry->len / 8, 8))

You can replace the __entry->len with:

		__print_array(__get_dynamic_array(v_data), __get_dynamic_array_len(v_data) / 8, 8))

Hmm, I should add a helper function that would do the same with just:

		__print_dynamic_array(vdata, 8);

Where it will have __print_dynamic_array(array-name, el-size)

-- Steve

>  );
>  
>  #endif /* _TRACE_MCE_H */
Naik, Avadhut Aug. 21, 2024, 6:55 p.m. UTC | #2
On 8/21/24 12:40, Steven Rostedt wrote:
> On Thu, 15 Aug 2024 16:16:33 -0500
> Avadhut Naik <avadhut.naik@amd.com> wrote:
> 
>> diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
>> index 65aba1afcd07..1e7d5696b3ba 100644
>> --- a/include/trace/events/mce.h
>> +++ b/include/trace/events/mce.h
>> @@ -43,6 +43,8 @@ TRACE_EVENT(mce_record,
>>  		__field(	u8,		bank		)
>>  		__field(	u8,		cpuvendor	)
>>  		__field(	u32,		microcode	)
>> +		__field(	u8,		len	)
> 
> You don't need to save the length. It's already saved in the
> __dynamic_array meta data.
> 
That helps! Will remove the len field.

>> +		__dynamic_array(u8, v_data, sizeof(err->vendor))
>>  	),
>>  
>>  	TP_fast_assign(
>> @@ -65,9 +67,11 @@ TRACE_EVENT(mce_record,
>>  		__entry->bank		= err->m.bank;
>>  		__entry->cpuvendor	= err->m.cpuvendor;
>>  		__entry->microcode	= err->m.microcode;
>> +		__entry->len		= sizeof(err->vendor);
>> +		memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor));
>>  	),
>>  
>> -	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x",
>> +	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s",
>>  		__entry->cpu,
>>  		__entry->mcgcap, __entry->mcgstatus,
>>  		__entry->bank, __entry->status,
>> @@ -83,7 +87,8 @@ TRACE_EVENT(mce_record,
>>  		__entry->walltime,
>>  		__entry->socketid,
>>  		__entry->apicid,
>> -		__entry->microcode)
>> +		__entry->microcode,
>> +		__print_array(__get_dynamic_array(v_data), __entry->len / 8, 8))
> 
> You can replace the __entry->len with:
> 
> 		__print_array(__get_dynamic_array(v_data), __get_dynamic_array_len(v_data) / 8, 8))
> 
> Hmm, I should add a helper function that would do the same with just:
> 
> 		__print_dynamic_array(vdata, 8);
> 
> Where it will have __print_dynamic_array(array-name, el-size)
> 
> -- Steve
> 
Thanks for the patch! Will incorporate it this set.

I did, however, have another question. This was actually raised in an earlier
version of this set by Boris and Tony. Not very sure about the answer though.

Is there a way through which we can make the tracepoints smarter i.e. have
conditional checks in them?

To provide some more context, as of now, the v_data dynamic array above will
contain vendor-specific data only on AMD systems. On Intel systems, IIUC,
its contents will be 0x0.

Consequently, the tracepoint output on Intel systems will have some thing like
"0x0,0x0,0x0" at the end.

Is there some way through which we can work around this? Can we perform a
vendor check inside the tracepoint so that the above dynamic array is logged
only on AMD systems and not on Intel systems?
diff mbox series

Patch

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 3c86b838b541..a977c10875a0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -122,6 +122,9 @@ 
 #define MSR_AMD64_SMCA_MC0_DESTAT	0xc0002008
 #define MSR_AMD64_SMCA_MC0_DEADDR	0xc0002009
 #define MSR_AMD64_SMCA_MC0_MISC1	0xc000200a
+/* Registers MISC2 to MISC4 are at offsets B to D. */
+#define MSR_AMD64_SMCA_MC0_SYND1	0xc000200e
+#define MSR_AMD64_SMCA_MC0_SYND2	0xc000200f
 #define MSR_AMD64_SMCA_MCx_CTL(x)	(MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_STATUS(x)	(MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_ADDR(x)	(MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@@ -132,6 +135,8 @@ 
 #define MSR_AMD64_SMCA_MCx_DESTAT(x)	(MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_DEADDR(x)	(MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_MISCy(x, y)	((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+#define MSR_AMD64_SMCA_MCx_SYND1(x)	(MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND2(x)	(MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))
 
 #define XEC(x, mask)			(((x) >> 16) & mask)
 
@@ -190,9 +195,26 @@  enum mce_notifier_prios {
 /**
  * struct mce_hw_err - Hardware Error Record.
  * @m:		Machine Check record.
+ * @vendor:	Vendor-specific error information.
+ *
+ * Vendor-specific fields should not be added to struct mce.
+ * Instead, vendors should export their vendor-specific data
+ * through their structure in the vendor union below.
+ *
+ * AMD's vendor data is parsed by error decoding tools for
+ * supplemental error information. Thus, current offsets of
+ * existing fields must be maintained.
+ * Only add new fields at the end of AMD's vendor structure.
  */
 struct mce_hw_err {
 	struct mce m;
+
+	union vendor_info {
+		struct {
+			u64 synd1;		/* MCA_SYND1 MSR */
+			u64 synd2;		/* MCA_SYND2 MSR */
+		} amd;
+	} vendor;
 };
 
 struct notifier_block;
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index db9adc081c5a..cb6b48a7c22b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -8,7 +8,8 @@ 
 /*
  * Fields are zero when not available. Also, this struct is shared with
  * userspace mcelog and thus must keep existing fields at current offsets.
- * Only add new fields to the end of the structure
+ * Only add new, shared fields to the end of the structure.
+ * Do not add vendor-specific fields.
  */
 struct mce {
 	__u64 status;		/* Bank's MCi_STATUS MSR */
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 5b4d266500b2..6ca80fff1fea 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -797,8 +797,11 @@  static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 	if (mce_flags.smca) {
 		rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
 
-		if (m->status & MCI_STATUS_SYNDV)
+		if (m->status & MCI_STATUS_SYNDV) {
 			rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
+			rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
+			rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
+		}
 	}
 
 	mce_log(&err);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index d9d1af7227ce..280d538dc13b 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -202,6 +202,10 @@  static void __print_mce(struct mce_hw_err *err)
 	if (mce_flags.smca) {
 		if (m->synd)
 			pr_cont("SYND %llx ", m->synd);
+		if (err->vendor.amd.synd1)
+			pr_cont("SYND1 %llx ", err->vendor.amd.synd1);
+		if (err->vendor.amd.synd2)
+			pr_cont("SYND2 %llx ", err->vendor.amd.synd2);
 		if (m->ipid)
 			pr_cont("IPID %llx ", m->ipid);
 	}
@@ -679,8 +683,11 @@  static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
 	if (mce_flags.smca) {
 		m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
 
-		if (m->status & MCI_STATUS_SYNDV)
+		if (m->status & MCI_STATUS_SYNDV) {
 			m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
+			err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i));
+			err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i));
+		}
 	}
 }
 
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index c5fae99de781..aea68999c849 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -792,7 +792,8 @@  static const char *decode_error_status(struct mce *m)
 static int
 amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 {
-	struct mce *m = &((struct mce_hw_err *)data)->m;
+	struct mce_hw_err *err = (struct mce_hw_err *)data;
+	struct mce *m = &err->m;
 	unsigned int fam = x86_family(m->cpuid);
 	int ecc;
 
@@ -850,8 +851,11 @@  amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 	if (boot_cpu_has(X86_FEATURE_SMCA)) {
 		pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
 
-		if (m->status & MCI_STATUS_SYNDV)
-			pr_cont(", Syndrome: 0x%016llx", m->synd);
+		if (m->status & MCI_STATUS_SYNDV) {
+			pr_cont(", Syndrome: 0x%016llx\n", m->synd);
+			pr_emerg(HW_ERR "Syndrome1: 0x%016llx, Syndrome2: 0x%016llx",
+				 err->vendor.amd.synd1, err->vendor.amd.synd2);
+		}
 
 		pr_cont("\n");
 
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 65aba1afcd07..1e7d5696b3ba 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -43,6 +43,8 @@  TRACE_EVENT(mce_record,
 		__field(	u8,		bank		)
 		__field(	u8,		cpuvendor	)
 		__field(	u32,		microcode	)
+		__field(	u8,		len	)
+		__dynamic_array(u8, v_data, sizeof(err->vendor))
 	),
 
 	TP_fast_assign(
@@ -65,9 +67,11 @@  TRACE_EVENT(mce_record,
 		__entry->bank		= err->m.bank;
 		__entry->cpuvendor	= err->m.cpuvendor;
 		__entry->microcode	= err->m.microcode;
+		__entry->len		= sizeof(err->vendor);
+		memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor));
 	),
 
-	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x",
+	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s",
 		__entry->cpu,
 		__entry->mcgcap, __entry->mcgstatus,
 		__entry->bank, __entry->status,
@@ -83,7 +87,8 @@  TRACE_EVENT(mce_record,
 		__entry->walltime,
 		__entry->socketid,
 		__entry->apicid,
-		__entry->microcode)
+		__entry->microcode,
+		__print_array(__get_dynamic_array(v_data), __entry->len / 8, 8))
 );
 
 #endif /* _TRACE_MCE_H */