Message ID | 20240329063614.362763-7-ruansy.fnst@fujitsu.com |
---|---|
State | New, archived |
Headers | show |
Series | cxl: add poison event handler | expand |
On Fri, Mar 29, 2024 at 02:36:14PM +0800, Shiyang Ruan wrote: > Currently driver only traces cxl events, poison injection (for both vmem > and pmem type) on cxl memdev is silent. OS needs to be notified then it > could handle poison range in time. Per CXL spec, the device error event > could be signaled through FW-First and OS-First methods. > > So, add poison event handler in OS-First method: > - qemu: > - CXL device report POISON event to OS by MSI by sending GMER after > injecting a poison record > - CXL driver > a. parse the POISON event from GMER; <-- this patch > b. retrieve POISON list from memdev; > c. translate poisoned DPA to HPA; > d. enqueue poisoned PFN to memory_failure's work queue; > > Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com> > --- > > the reply to Jonathan's comment in last version: > > I'm not 100% convinced this is necessary poison causing. Also > > the text tells us we should see 'an appropriate event'. > > DRAM one seems likely to be chosen by some vendors. > I think it's right to use DRAM Event Record for volatile-memdev, but > should poison on a persistent-memdev also use DRAM Event Record too? > Though its 'Physical Address' feild has the 'Volatile' bit too, which is > same as General Media Event Record. I am a bit confused about this. Similar thought as shared in cover letter - Can the driver trigger new poison list read on any events of interest, and implement a policy to report mem failures on all poison list reads that hit mapped addresses? I guess if the answer to that question is NO - we only report memory failures on GMER/poison, can we find more synergy and not repeat so much work. --Alison > > --- > drivers/cxl/core/mbox.c | 100 ++++++++++++++++++++++++++++++++++------ > drivers/cxl/cxlmem.h | 8 ++-- > 2 files changed, 91 insertions(+), 17 deletions(-) > > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c > index 19b46fb06ed6..97ef45d808b8 100644 > --- a/drivers/cxl/core/mbox.c > +++ b/drivers/cxl/core/mbox.c > @@ -837,25 +837,99 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds) > } > EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); > > -void cxl_event_trace_record(const struct cxl_memdev *cxlmd, > - enum cxl_event_log_type type, > - enum cxl_event_type event_type, > - const uuid_t *uuid, union cxl_event *evt) > +struct cxl_event_poison_context { > + u64 dpa; > + u64 length; > +}; > + > +static int __cxl_report_poison(struct device *dev, void *arg) > +{ > + struct cxl_event_poison_context *ctx = arg; > + struct cxl_endpoint_decoder *cxled; > + struct cxl_memdev *cxlmd; > + > + cxled = to_cxl_endpoint_decoder(dev); > + if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res)) > + return 0; > + > + if (cxled->mode == CXL_DECODER_MIXED) { > + dev_dbg(dev, "poison list read unsupported in mixed mode\n"); > + return 0; > + } > + > + if (ctx->dpa > cxled->dpa_res->end || ctx->dpa < cxled->dpa_res->start) > + return 0; > + > + cxlmd = cxled_to_memdev(cxled); > + cxl_mem_get_poison(cxlmd, ctx->dpa, ctx->length, cxled->cxld.region, > + true); > + > + return 1; > +} > + > +static void cxl_event_handle_poison(struct cxl_memdev *cxlmd, > + struct cxl_event_gen_media *rec) > +{ > + struct cxl_port *port = cxlmd->endpoint; > + u64 phys_addr = le64_to_cpu(rec->phys_addr); > + struct cxl_event_poison_context ctx = { > + .dpa = phys_addr & CXL_DPA_MASK, > + }; > + > + /* No regions mapped to this memdev, that is to say no HPA is mapped */ > + if (!port || !is_cxl_endpoint(port) || > + cxl_num_decoders_committed(port) == 0) > + return; > + > + /* > + * Host Inject Poison may have a range of DPA, but the GMER only has > + * "Physical Address" field, no such one indicates length. So it's > + * better to call cxl_mem_get_poison() to find this poison record. > + */ > + ctx.length = phys_addr & CXL_DPA_VOLATILE ? > + resource_size(&cxlmd->cxlds->ram_res) : > + resource_size(&cxlmd->cxlds->pmem_res) - ctx.dpa; > + > + device_for_each_child(&port->dev, &ctx, __cxl_report_poison); > +} > + > +static void cxl_event_handle_general_media(struct cxl_memdev *cxlmd, > + enum cxl_event_log_type type, > + struct cxl_event_gen_media *rec) > +{ > + if (type == CXL_EVENT_TYPE_FAIL) { > + switch (rec->transaction_type) { > + case CXL_EVENT_TRANSACTION_READ: > + case CXL_EVENT_TRANSACTION_WRITE: > + case CXL_EVENT_TRANSACTION_INJECT_POISON: > + cxl_event_handle_poison(cxlmd, rec); > + break; > + default: > + break; > + } > + } > +} > + > +void cxl_event_handle_record(struct cxl_memdev *cxlmd, > + enum cxl_event_log_type type, > + enum cxl_event_type event_type, > + const uuid_t *uuid, union cxl_event *evt) > { > - if (event_type == CXL_CPER_EVENT_GEN_MEDIA) > + if (event_type == CXL_CPER_EVENT_GEN_MEDIA) { > trace_cxl_general_media(cxlmd, type, &evt->gen_media); > - else if (event_type == CXL_CPER_EVENT_DRAM) > + cxl_event_handle_general_media(cxlmd, type, &evt->gen_media); > + } else if (event_type == CXL_CPER_EVENT_DRAM) > trace_cxl_dram(cxlmd, type, &evt->dram); > else if (event_type == CXL_CPER_EVENT_MEM_MODULE) > trace_cxl_memory_module(cxlmd, type, &evt->mem_module); > else > trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic); > } > -EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL); > +EXPORT_SYMBOL_NS_GPL(cxl_event_handle_record, CXL); > > -static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, > - enum cxl_event_log_type type, > - struct cxl_event_record_raw *record) > +static void __cxl_event_handle_record(struct cxl_memdev *cxlmd, > + enum cxl_event_log_type type, > + struct cxl_event_record_raw *record) > { > enum cxl_event_type ev_type = CXL_CPER_EVENT_GENERIC; > const uuid_t *uuid = &record->id; > @@ -867,7 +941,7 @@ static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, > else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID)) > ev_type = CXL_CPER_EVENT_MEM_MODULE; > > - cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event); > + cxl_event_handle_record(cxlmd, type, ev_type, uuid, &record->event); > } > > static int cxl_clear_event_record(struct cxl_memdev_state *mds, > @@ -978,8 +1052,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, > break; > > for (i = 0; i < nr_rec; i++) > - __cxl_event_trace_record(cxlmd, type, > - &payload->records[i]); > + __cxl_event_handle_record(cxlmd, type, > + &payload->records[i]); > > if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW) > trace_cxl_overflow(cxlmd, type, payload); > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 1f03130b9d6a..dfd7bdd0d66a 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -822,10 +822,10 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, > void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, > unsigned long *cmds); > void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status); > -void cxl_event_trace_record(const struct cxl_memdev *cxlmd, > - enum cxl_event_log_type type, > - enum cxl_event_type event_type, > - const uuid_t *uuid, union cxl_event *evt); > +void cxl_event_handle_record(struct cxl_memdev *cxlmd, > + enum cxl_event_log_type type, > + enum cxl_event_type event_type, > + const uuid_t *uuid, union cxl_event *evt); > int cxl_set_timestamp(struct cxl_memdev_state *mds); > int cxl_poison_state_init(struct cxl_memdev_state *mds); > void cxl_mem_report_poison(struct cxl_memdev *cxlmd, > -- > 2.34.1 > >
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 19b46fb06ed6..97ef45d808b8 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -837,25 +837,99 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); -void cxl_event_trace_record(const struct cxl_memdev *cxlmd, - enum cxl_event_log_type type, - enum cxl_event_type event_type, - const uuid_t *uuid, union cxl_event *evt) +struct cxl_event_poison_context { + u64 dpa; + u64 length; +}; + +static int __cxl_report_poison(struct device *dev, void *arg) +{ + struct cxl_event_poison_context *ctx = arg; + struct cxl_endpoint_decoder *cxled; + struct cxl_memdev *cxlmd; + + cxled = to_cxl_endpoint_decoder(dev); + if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res)) + return 0; + + if (cxled->mode == CXL_DECODER_MIXED) { + dev_dbg(dev, "poison list read unsupported in mixed mode\n"); + return 0; + } + + if (ctx->dpa > cxled->dpa_res->end || ctx->dpa < cxled->dpa_res->start) + return 0; + + cxlmd = cxled_to_memdev(cxled); + cxl_mem_get_poison(cxlmd, ctx->dpa, ctx->length, cxled->cxld.region, + true); + + return 1; +} + +static void cxl_event_handle_poison(struct cxl_memdev *cxlmd, + struct cxl_event_gen_media *rec) +{ + struct cxl_port *port = cxlmd->endpoint; + u64 phys_addr = le64_to_cpu(rec->phys_addr); + struct cxl_event_poison_context ctx = { + .dpa = phys_addr & CXL_DPA_MASK, + }; + + /* No regions mapped to this memdev, that is to say no HPA is mapped */ + if (!port || !is_cxl_endpoint(port) || + cxl_num_decoders_committed(port) == 0) + return; + + /* + * Host Inject Poison may have a range of DPA, but the GMER only has + * "Physical Address" field, no such one indicates length. So it's + * better to call cxl_mem_get_poison() to find this poison record. + */ + ctx.length = phys_addr & CXL_DPA_VOLATILE ? + resource_size(&cxlmd->cxlds->ram_res) : + resource_size(&cxlmd->cxlds->pmem_res) - ctx.dpa; + + device_for_each_child(&port->dev, &ctx, __cxl_report_poison); +} + +static void cxl_event_handle_general_media(struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + struct cxl_event_gen_media *rec) +{ + if (type == CXL_EVENT_TYPE_FAIL) { + switch (rec->transaction_type) { + case CXL_EVENT_TRANSACTION_READ: + case CXL_EVENT_TRANSACTION_WRITE: + case CXL_EVENT_TRANSACTION_INJECT_POISON: + cxl_event_handle_poison(cxlmd, rec); + break; + default: + break; + } + } +} + +void cxl_event_handle_record(struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + enum cxl_event_type event_type, + const uuid_t *uuid, union cxl_event *evt) { - if (event_type == CXL_CPER_EVENT_GEN_MEDIA) + if (event_type == CXL_CPER_EVENT_GEN_MEDIA) { trace_cxl_general_media(cxlmd, type, &evt->gen_media); - else if (event_type == CXL_CPER_EVENT_DRAM) + cxl_event_handle_general_media(cxlmd, type, &evt->gen_media); + } else if (event_type == CXL_CPER_EVENT_DRAM) trace_cxl_dram(cxlmd, type, &evt->dram); else if (event_type == CXL_CPER_EVENT_MEM_MODULE) trace_cxl_memory_module(cxlmd, type, &evt->mem_module); else trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic); } -EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL); +EXPORT_SYMBOL_NS_GPL(cxl_event_handle_record, CXL); -static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, - enum cxl_event_log_type type, - struct cxl_event_record_raw *record) +static void __cxl_event_handle_record(struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + struct cxl_event_record_raw *record) { enum cxl_event_type ev_type = CXL_CPER_EVENT_GENERIC; const uuid_t *uuid = &record->id; @@ -867,7 +941,7 @@ static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID)) ev_type = CXL_CPER_EVENT_MEM_MODULE; - cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event); + cxl_event_handle_record(cxlmd, type, ev_type, uuid, &record->event); } static int cxl_clear_event_record(struct cxl_memdev_state *mds, @@ -978,8 +1052,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, break; for (i = 0; i < nr_rec; i++) - __cxl_event_trace_record(cxlmd, type, - &payload->records[i]); + __cxl_event_handle_record(cxlmd, type, + &payload->records[i]); if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW) trace_cxl_overflow(cxlmd, type, payload); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 1f03130b9d6a..dfd7bdd0d66a 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -822,10 +822,10 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status); -void cxl_event_trace_record(const struct cxl_memdev *cxlmd, - enum cxl_event_log_type type, - enum cxl_event_type event_type, - const uuid_t *uuid, union cxl_event *evt); +void cxl_event_handle_record(struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + enum cxl_event_type event_type, + const uuid_t *uuid, union cxl_event *evt); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); void cxl_mem_report_poison(struct cxl_memdev *cxlmd,