Message ID | 20240815211635.1336721-3-avadhut.naik@amd.com (mailing list archive) |
---|---|
State | Handled Elsewhere, archived |
Headers | show |
Series | MCE wrapper and support for new SMCA syndrome MSRs | expand |
On Thu, 15 Aug 2024 16:16:33 -0500 Avadhut Naik <avadhut.naik@amd.com> wrote: > diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h > index 65aba1afcd07..1e7d5696b3ba 100644 > --- a/include/trace/events/mce.h > +++ b/include/trace/events/mce.h > @@ -43,6 +43,8 @@ TRACE_EVENT(mce_record, > __field( u8, bank ) > __field( u8, cpuvendor ) > __field( u32, microcode ) > + __field( u8, len ) You don't need to save the length. It's already saved in the __dynamic_array meta data. > + __dynamic_array(u8, v_data, sizeof(err->vendor)) > ), > > TP_fast_assign( > @@ -65,9 +67,11 @@ TRACE_EVENT(mce_record, > __entry->bank = err->m.bank; > __entry->cpuvendor = err->m.cpuvendor; > __entry->microcode = err->m.microcode; > + __entry->len = sizeof(err->vendor); > + memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor)); > ), > > - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x", > + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s", > __entry->cpu, > __entry->mcgcap, __entry->mcgstatus, > __entry->bank, __entry->status, > @@ -83,7 +87,8 @@ TRACE_EVENT(mce_record, > __entry->walltime, > __entry->socketid, > __entry->apicid, > - __entry->microcode) > + __entry->microcode, > + __print_array(__get_dynamic_array(v_data), __entry->len / 8, 8)) You can replace the __entry->len with: __print_array(__get_dynamic_array(v_data), __get_dynamic_array_len(v_data) / 8, 8)) Hmm, I should add a helper function that would do the same with just: __print_dynamic_array(vdata, 8); Where it will have __print_dynamic_array(array-name, el-size) -- Steve > ); > > #endif /* _TRACE_MCE_H */
On 8/21/24 12:40, Steven Rostedt wrote: > On Thu, 15 Aug 2024 16:16:33 -0500 > Avadhut Naik <avadhut.naik@amd.com> wrote: > >> diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h >> index 65aba1afcd07..1e7d5696b3ba 100644 >> --- a/include/trace/events/mce.h >> +++ b/include/trace/events/mce.h >> @@ -43,6 +43,8 @@ TRACE_EVENT(mce_record, >> __field( u8, bank ) >> __field( u8, cpuvendor ) >> __field( u32, microcode ) >> + __field( u8, len ) > > You don't need to save the length. It's already saved in the > __dynamic_array meta data. > That helps! Will remove the len field. >> + __dynamic_array(u8, v_data, sizeof(err->vendor)) >> ), >> >> TP_fast_assign( >> @@ -65,9 +67,11 @@ TRACE_EVENT(mce_record, >> __entry->bank = err->m.bank; >> __entry->cpuvendor = err->m.cpuvendor; >> __entry->microcode = err->m.microcode; >> + __entry->len = sizeof(err->vendor); >> + memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor)); >> ), >> >> - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x", >> + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s", >> __entry->cpu, >> __entry->mcgcap, __entry->mcgstatus, >> __entry->bank, __entry->status, >> @@ -83,7 +87,8 @@ TRACE_EVENT(mce_record, >> __entry->walltime, >> __entry->socketid, >> __entry->apicid, >> - __entry->microcode) >> + __entry->microcode, >> + __print_array(__get_dynamic_array(v_data), __entry->len / 8, 8)) > > You can replace the __entry->len with: > > __print_array(__get_dynamic_array(v_data), __get_dynamic_array_len(v_data) / 8, 8)) > > Hmm, I should add a helper function that would do the same with just: > > __print_dynamic_array(vdata, 8); > > Where it will have __print_dynamic_array(array-name, el-size) > > -- Steve > Thanks for the patch! Will incorporate it this set. I did, however, have another question. This was actually raised in an earlier version of this set by Boris and Tony. Not very sure about the answer though. Is there a way through which we can make the tracepoints smarter i.e. have conditional checks in them? To provide some more context, as of now, the v_data dynamic array above will contain vendor-specific data only on AMD systems. On Intel systems, IIUC, its contents will be 0x0. Consequently, the tracepoint output on Intel systems will have some thing like "0x0,0x0,0x0" at the end. Is there some way through which we can work around this? Can we perform a vendor check inside the tracepoint so that the above dynamic array is logged only on AMD systems and not on Intel systems?
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 3c86b838b541..a977c10875a0 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -122,6 +122,9 @@ #define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 #define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a +/* Registers MISC2 to MISC4 are at offsets B to D. */ +#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e +#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f #define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x)) @@ -132,6 +135,8 @@ #define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) +#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x)) #define XEC(x, mask) (((x) >> 16) & mask) @@ -190,9 +195,26 @@ enum mce_notifier_prios { /** * struct mce_hw_err - Hardware Error Record. * @m: Machine Check record. + * @vendor: Vendor-specific error information. + * + * Vendor-specific fields should not be added to struct mce. + * Instead, vendors should export their vendor-specific data + * through their structure in the vendor union below. + * + * AMD's vendor data is parsed by error decoding tools for + * supplemental error information. Thus, current offsets of + * existing fields must be maintained. + * Only add new fields at the end of AMD's vendor structure. */ struct mce_hw_err { struct mce m; + + union vendor_info { + struct { + u64 synd1; /* MCA_SYND1 MSR */ + u64 synd2; /* MCA_SYND2 MSR */ + } amd; + } vendor; }; struct notifier_block; diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index db9adc081c5a..cb6b48a7c22b 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -8,7 +8,8 @@ /* * Fields are zero when not available. Also, this struct is shared with * userspace mcelog and thus must keep existing fields at current offsets. - * Only add new fields to the end of the structure + * Only add new, shared fields to the end of the structure. + * Do not add vendor-specific fields. */ struct mce { __u64 status; /* Bank's MCi_STATUS MSR */ diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 5b4d266500b2..6ca80fff1fea 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -797,8 +797,11 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) if (mce_flags.smca) { rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid); - if (m->status & MCI_STATUS_SYNDV) + if (m->status & MCI_STATUS_SYNDV) { rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd); + rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1); + rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2); + } } mce_log(&err); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index d9d1af7227ce..280d538dc13b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -202,6 +202,10 @@ static void __print_mce(struct mce_hw_err *err) if (mce_flags.smca) { if (m->synd) pr_cont("SYND %llx ", m->synd); + if (err->vendor.amd.synd1) + pr_cont("SYND1 %llx ", err->vendor.amd.synd1); + if (err->vendor.amd.synd2) + pr_cont("SYND2 %llx ", err->vendor.amd.synd2); if (m->ipid) pr_cont("IPID %llx ", m->ipid); } @@ -679,8 +683,11 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) if (mce_flags.smca) { m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i)); - if (m->status & MCI_STATUS_SYNDV) + if (m->status & MCI_STATUS_SYNDV) { m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); + err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i)); + err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i)); + } } } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index c5fae99de781..aea68999c849 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -792,7 +792,8 @@ static const char *decode_error_status(struct mce *m) static int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { - struct mce *m = &((struct mce_hw_err *)data)->m; + struct mce_hw_err *err = (struct mce_hw_err *)data; + struct mce *m = &err->m; unsigned int fam = x86_family(m->cpuid); int ecc; @@ -850,8 +851,11 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (boot_cpu_has(X86_FEATURE_SMCA)) { pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid); - if (m->status & MCI_STATUS_SYNDV) - pr_cont(", Syndrome: 0x%016llx", m->synd); + if (m->status & MCI_STATUS_SYNDV) { + pr_cont(", Syndrome: 0x%016llx\n", m->synd); + pr_emerg(HW_ERR "Syndrome1: 0x%016llx, Syndrome2: 0x%016llx", + err->vendor.amd.synd1, err->vendor.amd.synd2); + } pr_cont("\n"); diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index 65aba1afcd07..1e7d5696b3ba 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -43,6 +43,8 @@ TRACE_EVENT(mce_record, __field( u8, bank ) __field( u8, cpuvendor ) __field( u32, microcode ) + __field( u8, len ) + __dynamic_array(u8, v_data, sizeof(err->vendor)) ), TP_fast_assign( @@ -65,9 +67,11 @@ TRACE_EVENT(mce_record, __entry->bank = err->m.bank; __entry->cpuvendor = err->m.cpuvendor; __entry->microcode = err->m.microcode; + __entry->len = sizeof(err->vendor); + memcpy(__get_dynamic_array(v_data), &err->vendor, sizeof(err->vendor)); ), - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR: %016Lx, MISC: %016Lx, SYND: %016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x", + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016llx, IPID: %016llx, ADDR: %016llx, MISC: %016llx, SYND: %016llx, RIP: %02x:<%016llx>, TSC: %llx, PPIN: %llx, vendor: %u, CPUID: %x, time: %llu, socket: %u, APIC: %x, microcode: %x, vendor data: %s", __entry->cpu, __entry->mcgcap, __entry->mcgstatus, __entry->bank, __entry->status, @@ -83,7 +87,8 @@ TRACE_EVENT(mce_record, __entry->walltime, __entry->socketid, __entry->apicid, - __entry->microcode) + __entry->microcode, + __print_array(__get_dynamic_array(v_data), __entry->len / 8, 8)) ); #endif /* _TRACE_MCE_H */