diff mbox series

[RFC,v2,6/6] cxl/core: add poison injection event handler

Message ID 20240329063614.362763-7-ruansy.fnst@fujitsu.com
State New, archived
Headers show
Series cxl: add poison event handler | expand

Commit Message

Shiyang Ruan March 29, 2024, 6:36 a.m. UTC
Currently driver only traces cxl events, poison injection (for both vmem
and pmem type) on cxl memdev is silent.  OS needs to be notified then it
could handle poison range in time.  Per CXL spec, the device error event
could be signaled through FW-First and OS-First methods.

So, add poison event handler in OS-First method:
  - qemu:
    - CXL device report POISON event to OS by MSI by sending GMER after
      injecting a poison record
  - CXL driver
    a. parse the POISON event from GMER;        <-- this patch
    b. retrieve POISON list from memdev;
    c. translate poisoned DPA to HPA;
    d. enqueue poisoned PFN to memory_failure's work queue;

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
---

the reply to Jonathan's comment in last version:
> I'm not 100% convinced this is necessary poison causing.  Also
> the text tells us we should see 'an appropriate event'.
> DRAM one seems likely to be chosen by some vendors.
I think it's right to use DRAM Event Record for volatile-memdev, but 
should poison on a persistent-memdev also use DRAM Event Record too? 
Though its 'Physical Address' feild has the 'Volatile' bit too, which is 
same as General Media Event Record.  I am a bit confused about this.

---
 drivers/cxl/core/mbox.c | 100 ++++++++++++++++++++++++++++++++++------
 drivers/cxl/cxlmem.h    |   8 ++--
 2 files changed, 91 insertions(+), 17 deletions(-)

Comments

Alison Schofield March 29, 2024, 6:27 p.m. UTC | #1
On Fri, Mar 29, 2024 at 02:36:14PM +0800, Shiyang Ruan wrote:
> Currently driver only traces cxl events, poison injection (for both vmem
> and pmem type) on cxl memdev is silent.  OS needs to be notified then it
> could handle poison range in time.  Per CXL spec, the device error event
> could be signaled through FW-First and OS-First methods.
> 
> So, add poison event handler in OS-First method:
>   - qemu:
>     - CXL device report POISON event to OS by MSI by sending GMER after
>       injecting a poison record
>   - CXL driver
>     a. parse the POISON event from GMER;        <-- this patch
>     b. retrieve POISON list from memdev;
>     c. translate poisoned DPA to HPA;
>     d. enqueue poisoned PFN to memory_failure's work queue;
> 
> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> ---
> 
> the reply to Jonathan's comment in last version:
> > I'm not 100% convinced this is necessary poison causing.  Also
> > the text tells us we should see 'an appropriate event'.
> > DRAM one seems likely to be chosen by some vendors.
> I think it's right to use DRAM Event Record for volatile-memdev, but 
> should poison on a persistent-memdev also use DRAM Event Record too? 
> Though its 'Physical Address' feild has the 'Volatile' bit too, which is 
> same as General Media Event Record.  I am a bit confused about this.


Similar thought as shared in cover letter -
Can the driver trigger new poison list read on any events of interest,
and implement a policy to report mem failures on all poison list reads
that hit mapped addresses?

I guess if the answer to that question is NO - we only report memory
failures on GMER/poison, can we find more synergy and not repeat so 
much work.

--Alison

> 
> ---
>  drivers/cxl/core/mbox.c | 100 ++++++++++++++++++++++++++++++++++------
>  drivers/cxl/cxlmem.h    |   8 ++--
>  2 files changed, 91 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 19b46fb06ed6..97ef45d808b8 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -837,25 +837,99 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
>  
> -void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> -			    enum cxl_event_log_type type,
> -			    enum cxl_event_type event_type,
> -			    const uuid_t *uuid, union cxl_event *evt)
> +struct cxl_event_poison_context {
> +	u64 dpa;
> +	u64 length;
> +};
> +
> +static int __cxl_report_poison(struct device *dev, void *arg)
> +{
> +	struct cxl_event_poison_context *ctx = arg;
> +	struct cxl_endpoint_decoder *cxled;
> +	struct cxl_memdev *cxlmd;
> +
> +	cxled = to_cxl_endpoint_decoder(dev);
> +	if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
> +		return 0;
> +
> +	if (cxled->mode == CXL_DECODER_MIXED) {
> +		dev_dbg(dev, "poison list read unsupported in mixed mode\n");
> +		return 0;
> +	}
> +
> +	if (ctx->dpa > cxled->dpa_res->end || ctx->dpa < cxled->dpa_res->start)
> +		return 0;
> +
> +	cxlmd = cxled_to_memdev(cxled);
> +	cxl_mem_get_poison(cxlmd, ctx->dpa, ctx->length, cxled->cxld.region,
> +			   true);
> +
> +	return 1;
> +}
> +
> +static void cxl_event_handle_poison(struct cxl_memdev *cxlmd,
> +				    struct cxl_event_gen_media *rec)
> +{
> +	struct cxl_port *port = cxlmd->endpoint;
> +	u64 phys_addr = le64_to_cpu(rec->phys_addr);
> +	struct cxl_event_poison_context ctx = {
> +		.dpa = phys_addr & CXL_DPA_MASK,
> +	};
> +
> +	/* No regions mapped to this memdev, that is to say no HPA is mapped */
> +	if (!port || !is_cxl_endpoint(port) ||
> +	    cxl_num_decoders_committed(port) == 0)
> +		return;
> +
> +	/*
> +	 * Host Inject Poison may have a range of DPA, but the GMER only has
> +	 * "Physical Address" field, no such one indicates length.  So it's
> +	 * better to call cxl_mem_get_poison() to find this poison record.
> +	 */
> +	ctx.length = phys_addr & CXL_DPA_VOLATILE ?
> +			resource_size(&cxlmd->cxlds->ram_res) :
> +			resource_size(&cxlmd->cxlds->pmem_res) - ctx.dpa;
> +
> +	device_for_each_child(&port->dev, &ctx, __cxl_report_poison);
> +}
> +
> +static void cxl_event_handle_general_media(struct cxl_memdev *cxlmd,
> +					   enum cxl_event_log_type type,
> +					   struct cxl_event_gen_media *rec)
> +{
> +	if (type == CXL_EVENT_TYPE_FAIL) {
> +		switch (rec->transaction_type) {
> +		case CXL_EVENT_TRANSACTION_READ:
> +		case CXL_EVENT_TRANSACTION_WRITE:
> +		case CXL_EVENT_TRANSACTION_INJECT_POISON:
> +			cxl_event_handle_poison(cxlmd, rec);
> +			break;
> +		default:
> +			break;
> +		}
> +	}
> +}
> +
> +void cxl_event_handle_record(struct cxl_memdev *cxlmd,
> +			     enum cxl_event_log_type type,
> +			     enum cxl_event_type event_type,
> +			     const uuid_t *uuid, union cxl_event *evt)
>  {
> -	if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
> +	if (event_type == CXL_CPER_EVENT_GEN_MEDIA) {
>  		trace_cxl_general_media(cxlmd, type, &evt->gen_media);
> -	else if (event_type == CXL_CPER_EVENT_DRAM)
> +		cxl_event_handle_general_media(cxlmd, type, &evt->gen_media);
> +	} else if (event_type == CXL_CPER_EVENT_DRAM)
>  		trace_cxl_dram(cxlmd, type, &evt->dram);
>  	else if (event_type == CXL_CPER_EVENT_MEM_MODULE)
>  		trace_cxl_memory_module(cxlmd, type, &evt->mem_module);
>  	else
>  		trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic);
>  }
> -EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
> +EXPORT_SYMBOL_NS_GPL(cxl_event_handle_record, CXL);
>  
> -static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> -				     enum cxl_event_log_type type,
> -				     struct cxl_event_record_raw *record)
> +static void __cxl_event_handle_record(struct cxl_memdev *cxlmd,
> +				      enum cxl_event_log_type type,
> +				      struct cxl_event_record_raw *record)
>  {
>  	enum cxl_event_type ev_type = CXL_CPER_EVENT_GENERIC;
>  	const uuid_t *uuid = &record->id;
> @@ -867,7 +941,7 @@ static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  	else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID))
>  		ev_type = CXL_CPER_EVENT_MEM_MODULE;
>  
> -	cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event);
> +	cxl_event_handle_record(cxlmd, type, ev_type, uuid, &record->event);
>  }
>  
>  static int cxl_clear_event_record(struct cxl_memdev_state *mds,
> @@ -978,8 +1052,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>  			break;
>  
>  		for (i = 0; i < nr_rec; i++)
> -			__cxl_event_trace_record(cxlmd, type,
> -						 &payload->records[i]);
> +			__cxl_event_handle_record(cxlmd, type,
> +						  &payload->records[i]);
>  
>  		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
>  			trace_cxl_overflow(cxlmd, type, payload);
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 1f03130b9d6a..dfd7bdd0d66a 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -822,10 +822,10 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
>  void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
>  				  unsigned long *cmds);
>  void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
> -void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> -			    enum cxl_event_log_type type,
> -			    enum cxl_event_type event_type,
> -			    const uuid_t *uuid, union cxl_event *evt);
> +void cxl_event_handle_record(struct cxl_memdev *cxlmd,
> +			     enum cxl_event_log_type type,
> +			     enum cxl_event_type event_type,
> +			     const uuid_t *uuid, union cxl_event *evt);
>  int cxl_set_timestamp(struct cxl_memdev_state *mds);
>  int cxl_poison_state_init(struct cxl_memdev_state *mds);
>  void cxl_mem_report_poison(struct cxl_memdev *cxlmd,
> -- 
> 2.34.1
> 
>
diff mbox series

Patch

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 19b46fb06ed6..97ef45d808b8 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -837,25 +837,99 @@  int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
 
-void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
-			    enum cxl_event_log_type type,
-			    enum cxl_event_type event_type,
-			    const uuid_t *uuid, union cxl_event *evt)
+struct cxl_event_poison_context {
+	u64 dpa;
+	u64 length;
+};
+
+static int __cxl_report_poison(struct device *dev, void *arg)
+{
+	struct cxl_event_poison_context *ctx = arg;
+	struct cxl_endpoint_decoder *cxled;
+	struct cxl_memdev *cxlmd;
+
+	cxled = to_cxl_endpoint_decoder(dev);
+	if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
+		return 0;
+
+	if (cxled->mode == CXL_DECODER_MIXED) {
+		dev_dbg(dev, "poison list read unsupported in mixed mode\n");
+		return 0;
+	}
+
+	if (ctx->dpa > cxled->dpa_res->end || ctx->dpa < cxled->dpa_res->start)
+		return 0;
+
+	cxlmd = cxled_to_memdev(cxled);
+	cxl_mem_get_poison(cxlmd, ctx->dpa, ctx->length, cxled->cxld.region,
+			   true);
+
+	return 1;
+}
+
+static void cxl_event_handle_poison(struct cxl_memdev *cxlmd,
+				    struct cxl_event_gen_media *rec)
+{
+	struct cxl_port *port = cxlmd->endpoint;
+	u64 phys_addr = le64_to_cpu(rec->phys_addr);
+	struct cxl_event_poison_context ctx = {
+		.dpa = phys_addr & CXL_DPA_MASK,
+	};
+
+	/* No regions mapped to this memdev, that is to say no HPA is mapped */
+	if (!port || !is_cxl_endpoint(port) ||
+	    cxl_num_decoders_committed(port) == 0)
+		return;
+
+	/*
+	 * Host Inject Poison may have a range of DPA, but the GMER only has
+	 * "Physical Address" field, no such one indicates length.  So it's
+	 * better to call cxl_mem_get_poison() to find this poison record.
+	 */
+	ctx.length = phys_addr & CXL_DPA_VOLATILE ?
+			resource_size(&cxlmd->cxlds->ram_res) :
+			resource_size(&cxlmd->cxlds->pmem_res) - ctx.dpa;
+
+	device_for_each_child(&port->dev, &ctx, __cxl_report_poison);
+}
+
+static void cxl_event_handle_general_media(struct cxl_memdev *cxlmd,
+					   enum cxl_event_log_type type,
+					   struct cxl_event_gen_media *rec)
+{
+	if (type == CXL_EVENT_TYPE_FAIL) {
+		switch (rec->transaction_type) {
+		case CXL_EVENT_TRANSACTION_READ:
+		case CXL_EVENT_TRANSACTION_WRITE:
+		case CXL_EVENT_TRANSACTION_INJECT_POISON:
+			cxl_event_handle_poison(cxlmd, rec);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+void cxl_event_handle_record(struct cxl_memdev *cxlmd,
+			     enum cxl_event_log_type type,
+			     enum cxl_event_type event_type,
+			     const uuid_t *uuid, union cxl_event *evt)
 {
-	if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
+	if (event_type == CXL_CPER_EVENT_GEN_MEDIA) {
 		trace_cxl_general_media(cxlmd, type, &evt->gen_media);
-	else if (event_type == CXL_CPER_EVENT_DRAM)
+		cxl_event_handle_general_media(cxlmd, type, &evt->gen_media);
+	} else if (event_type == CXL_CPER_EVENT_DRAM)
 		trace_cxl_dram(cxlmd, type, &evt->dram);
 	else if (event_type == CXL_CPER_EVENT_MEM_MODULE)
 		trace_cxl_memory_module(cxlmd, type, &evt->mem_module);
 	else
 		trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic);
 }
-EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
+EXPORT_SYMBOL_NS_GPL(cxl_event_handle_record, CXL);
 
-static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
-				     enum cxl_event_log_type type,
-				     struct cxl_event_record_raw *record)
+static void __cxl_event_handle_record(struct cxl_memdev *cxlmd,
+				      enum cxl_event_log_type type,
+				      struct cxl_event_record_raw *record)
 {
 	enum cxl_event_type ev_type = CXL_CPER_EVENT_GENERIC;
 	const uuid_t *uuid = &record->id;
@@ -867,7 +941,7 @@  static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
 	else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID))
 		ev_type = CXL_CPER_EVENT_MEM_MODULE;
 
-	cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event);
+	cxl_event_handle_record(cxlmd, type, ev_type, uuid, &record->event);
 }
 
 static int cxl_clear_event_record(struct cxl_memdev_state *mds,
@@ -978,8 +1052,8 @@  static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
 			break;
 
 		for (i = 0; i < nr_rec; i++)
-			__cxl_event_trace_record(cxlmd, type,
-						 &payload->records[i]);
+			__cxl_event_handle_record(cxlmd, type,
+						  &payload->records[i]);
 
 		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
 			trace_cxl_overflow(cxlmd, type, payload);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 1f03130b9d6a..dfd7bdd0d66a 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -822,10 +822,10 @@  void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 				  unsigned long *cmds);
 void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
-void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
-			    enum cxl_event_log_type type,
-			    enum cxl_event_type event_type,
-			    const uuid_t *uuid, union cxl_event *evt);
+void cxl_event_handle_record(struct cxl_memdev *cxlmd,
+			     enum cxl_event_log_type type,
+			     enum cxl_event_type event_type,
+			     const uuid_t *uuid, union cxl_event *evt);
 int cxl_set_timestamp(struct cxl_memdev_state *mds);
 int cxl_poison_state_init(struct cxl_memdev_state *mds);
 void cxl_mem_report_poison(struct cxl_memdev *cxlmd,