Message ID | 20250114120427.149260-5-Smita.KoralahalliChannabasappa@amd.com |
---|---|
State | New |
Headers | show |
Series | acpi/ghes, cper, cxl: Process CXL CPER Protocol errors | expand |
On 1/14/25 5:04 AM, Smita Koralahalli wrote: > Add support in GHES to detect and process CXL CPER Protocol errors, as > defined in UEFI v2.10, section N.2.13. > > Define struct cxl_cper_prot_err_work_data to cache CXL protocol error > information, including RAS capabilities and severity, for further > handling. > > These cached CXL CPER records will later be processed by workqueues > within the CXL subsystem. > > Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> > --- > drivers/acpi/apei/ghes.c | 54 ++++++++++++++++++++++++++++++++++++++++ > include/cxl/event.h | 6 +++++ > 2 files changed, 60 insertions(+) > > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c > index 07789f0b59bc..4ab3c8ae1360 100644 > --- a/drivers/acpi/apei/ghes.c > +++ b/drivers/acpi/apei/ghes.c > @@ -676,6 +676,56 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, > schedule_work(&entry->work); > } > > +static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, > + int severity) > +{ > +#ifdef CONFIG_ACPI_APEI_PCIEAER > + struct cxl_cper_prot_err_work_data wd; > + u8 *dvsec_start, *cap_start; > + > + if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) { > + pr_err_ratelimited("CXL CPER invalid agent type\n"); > + return; > + } > + > + if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) { > + pr_err_ratelimited("CXL CPER invalid protocol error log\n"); > + return; > + } > + > + if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) { > + pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n", > + prot_err->err_len); > + return; > + } > + > + if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER)) > + pr_warn(FW_WARN "CXL CPER no device serial number\n"); > + > + switch (prot_err->agent_type) { > + case RCD: > + case DEVICE: > + case LD: > + case FMLD: > + case RP: > + case DSP: > + case USP: > + memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err)); > + > + dvsec_start = (u8 *)(prot_err + 1); > + cap_start = dvsec_start + prot_err->dvsec_len; > + > + memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap)); > + wd.severity = cper_severity_to_aer(severity); > + break; > + default: > + pr_err_ratelimited("CXL CPER invalid agent type: %d\n", > + prot_err->agent_type); > + return; > + } > +#endif > +} > + > /* Room for 8 entries for each of the 4 event log queues */ > #define CXL_CPER_FIFO_DEPTH 32 > DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); > @@ -779,6 +829,10 @@ static bool ghes_do_proc(struct ghes *ghes, > } > else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { > queued = ghes_handle_arm_hw_error(gdata, sev, sync); > + } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { > + struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); > + > + cxl_cper_post_prot_err(prot_err, gdata->error_severity); > } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { > struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); > > diff --git a/include/cxl/event.h b/include/cxl/event.h > index 66d85fc87701..ee1c3dec62fa 100644 > --- a/include/cxl/event.h > +++ b/include/cxl/event.h > @@ -232,6 +232,12 @@ struct cxl_ras_capability_regs { > u32 header_log[16]; > }; > > +struct cxl_cper_prot_err_work_data { > + struct cxl_cper_sec_prot_err prot_err; > + struct cxl_ras_capability_regs ras_cap; > + int severity; > +}; > + > #ifdef CONFIG_ACPI_APEI_GHES > int cxl_cper_register_work(struct work_struct *work); > int cxl_cper_unregister_work(struct work_struct *work);
Smita Koralahalli wrote: > Add support in GHES to detect and process CXL CPER Protocol errors, as > defined in UEFI v2.10, section N.2.13. > > Define struct cxl_cper_prot_err_work_data to cache CXL protocol error > information, including RAS capabilities and severity, for further > handling. > > These cached CXL CPER records will later be processed by workqueues > within the CXL subsystem. > > Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> > Reviewe,-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> [snip]
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 07789f0b59bc..4ab3c8ae1360 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -676,6 +676,56 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } +static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, + int severity) +{ +#ifdef CONFIG_ACPI_APEI_PCIEAER + struct cxl_cper_prot_err_work_data wd; + u8 *dvsec_start, *cap_start; + + if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) { + pr_err_ratelimited("CXL CPER invalid agent type\n"); + return; + } + + if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) { + pr_err_ratelimited("CXL CPER invalid protocol error log\n"); + return; + } + + if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) { + pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n", + prot_err->err_len); + return; + } + + if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER)) + pr_warn(FW_WARN "CXL CPER no device serial number\n"); + + switch (prot_err->agent_type) { + case RCD: + case DEVICE: + case LD: + case FMLD: + case RP: + case DSP: + case USP: + memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err)); + + dvsec_start = (u8 *)(prot_err + 1); + cap_start = dvsec_start + prot_err->dvsec_len; + + memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap)); + wd.severity = cper_severity_to_aer(severity); + break; + default: + pr_err_ratelimited("CXL CPER invalid agent type: %d\n", + prot_err->agent_type); + return; + } +#endif +} + /* Room for 8 entries for each of the 4 event log queues */ #define CXL_CPER_FIFO_DEPTH 32 DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); @@ -779,6 +829,10 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev, sync); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { + struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); + + cxl_cper_post_prot_err(prot_err, gdata->error_severity); } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); diff --git a/include/cxl/event.h b/include/cxl/event.h index 66d85fc87701..ee1c3dec62fa 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -232,6 +232,12 @@ struct cxl_ras_capability_regs { u32 header_log[16]; }; +struct cxl_cper_prot_err_work_data { + struct cxl_cper_sec_prot_err prot_err; + struct cxl_ras_capability_regs ras_cap; + int severity; +}; + #ifdef CONFIG_ACPI_APEI_GHES int cxl_cper_register_work(struct work_struct *work); int cxl_cper_unregister_work(struct work_struct *work);