diff mbox series

[v13,3/9] cxl/mbox: Initialize the poison state

Message ID 9078d180769be28a5087288b38cdfc827cae58bf.1681838291.git.alison.schofield@intel.com
State Accepted
Commit d0abf5787adc0341a04667d3b4a23b4d0999af30
Headers show
Series CXL Poison List Retrieval & Tracing | expand

Commit Message

Alison Schofield April 18, 2023, 5:39 p.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

Driver reads of the poison list are synchronized to ensure that a
reader does not get an incomplete list because their request
overlapped (was interrupted or preceded by) another read request
of the same DPA range. (CXL Spec 3.0 Section 8.2.9.8.4.1). The
driver maintains state information to achieve this goal.

To initialize the state, first recognize the poison commands in
the CEL (Command Effects Log). If the device supports Get Poison
List, allocate a single buffer for the poison list and protect it
with a lock.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 drivers/cxl/core/mbox.c | 81 ++++++++++++++++++++++++++++++++++++++++-
 drivers/cxl/cxlmem.h    | 34 +++++++++++++++++
 drivers/cxl/pci.c       |  4 ++
 3 files changed, 117 insertions(+), 2 deletions(-)

Comments

Dan Williams April 22, 2023, 3:22 a.m. UTC | #1
alison.schofield@ wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Driver reads of the poison list are synchronized to ensure that a
> reader does not get an incomplete list because their request
> overlapped (was interrupted or preceded by) another read request
> of the same DPA range. (CXL Spec 3.0 Section 8.2.9.8.4.1). The
> driver maintains state information to achieve this goal.
> 
> To initialize the state, first recognize the poison commands in
> the CEL (Command Effects Log). If the device supports Get Poison
> List, allocate a single buffer for the poison list and protect it
> with a lock.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> ---
>  drivers/cxl/core/mbox.c | 81 ++++++++++++++++++++++++++++++++++++++++-
>  drivers/cxl/cxlmem.h    | 34 +++++++++++++++++
>  drivers/cxl/pci.c       |  4 ++
>  3 files changed, 117 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index fd1026970d3a..17737386283a 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -5,6 +5,7 @@
>  #include <linux/debugfs.h>
>  #include <linux/ktime.h>
>  #include <linux/mutex.h>
> +#include <cxlpci.h>
>  #include <cxlmem.h>
>  #include <cxl.h>
>  
> @@ -120,6 +121,43 @@ static bool cxl_is_security_command(u16 opcode)
>  	return false;
>  }
>  
> +static bool cxl_is_poison_command(u16 opcode)
> +{
> +#define CXL_MBOX_OP_POISON_CMDS 0x43
> +
> +	if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS)
> +		return true;
> +
> +	return false;
> +}
> +
> +static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison,
> +				       u16 opcode)
> +{
> +	switch (opcode) {
> +	case CXL_MBOX_OP_GET_POISON:
> +		set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_INJECT_POISON:
> +		set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_CLEAR_POISON:
> +		set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS:
> +		set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_SCAN_MEDIA:
> +		set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_GET_SCAN_MEDIA:
> +		set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds);
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +
>  static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
>  {
>  	struct cxl_mem_command *c;
> @@ -635,13 +673,18 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
>  		u16 opcode = le16_to_cpu(cel_entry[i].opcode);
>  		struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
>  
> -		if (!cmd) {
> +		if (!cmd && !cxl_is_poison_command(opcode)) {
>  			dev_dbg(cxlds->dev,
>  				"Opcode 0x%04x unsupported by driver\n", opcode);
>  			continue;
>  		}
>  
> -		set_bit(cmd->info.id, cxlds->enabled_cmds);
> +		if (cmd)
> +			set_bit(cmd->info.id, cxlds->enabled_cmds);
> +
> +		if (cxl_is_poison_command(opcode))
> +			cxl_set_poison_cmd_enabled(&cxlds->poison, opcode);
> +
>  		dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);

Ah, nice I like how you suppressed the unsupported by driver message
which could have lead to confusion. I think we should duplicate this
approach for the DCD command set.

Looks good.
Jonathan Cameron April 23, 2023, 3:28 p.m. UTC | #2
On Tue, 18 Apr 2023 10:39:03 -0700
alison.schofield@intel.com wrote:

> From: Alison Schofield <alison.schofield@intel.com>
> 
> Driver reads of the poison list are synchronized to ensure that a
> reader does not get an incomplete list because their request
> overlapped (was interrupted or preceded by) another read request
> of the same DPA range. (CXL Spec 3.0 Section 8.2.9.8.4.1). The
> driver maintains state information to achieve this goal.
> 
> To initialize the state, first recognize the poison commands in
> the CEL (Command Effects Log). If the device supports Get Poison
> List, allocate a single buffer for the poison list and protect it
> with a lock.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Nice. One passing comment inline.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  drivers/cxl/core/mbox.c | 81 ++++++++++++++++++++++++++++++++++++++++-
>  drivers/cxl/cxlmem.h    | 34 +++++++++++++++++
>  drivers/cxl/pci.c       |  4 ++
>  3 files changed, 117 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index fd1026970d3a..17737386283a 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -5,6 +5,7 @@
>  #include <linux/debugfs.h>
>  #include <linux/ktime.h>
>  #include <linux/mutex.h>
> +#include <cxlpci.h>
>  #include <cxlmem.h>
>  #include <cxl.h>
>  
> @@ -120,6 +121,43 @@ static bool cxl_is_security_command(u16 opcode)
>  	return false;
>  }
>  
> +static bool cxl_is_poison_command(u16 opcode)
> +{
> +#define CXL_MBOX_OP_POISON_CMDS 0x43
> +
> +	if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS)

Perhaps at somepoint we should add macros to split the opcodes up?
Doesn't need to be in this series, but if we get a lot of opcode >> 8 it
might be nice (there are two that I'm seeing so far).

> +		return true;
> +
> +	return false;
> +}
> +
> +static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison,
> +				       u16 opcode)
> +{
> +	switch (opcode) {
> +	case CXL_MBOX_OP_GET_POISON:
> +		set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_INJECT_POISON:
> +		set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_CLEAR_POISON:
> +		set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS:
> +		set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_SCAN_MEDIA:
> +		set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds);
> +		break;
> +	case CXL_MBOX_OP_GET_SCAN_MEDIA:
> +		set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds);
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +
>  static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
>  {
>  	struct cxl_mem_command *c;
> @@ -635,13 +673,18 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
>  		u16 opcode = le16_to_cpu(cel_entry[i].opcode);
>  		struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
>  
> -		if (!cmd) {
> +		if (!cmd && !cxl_is_poison_command(opcode)) {
>  			dev_dbg(cxlds->dev,
>  				"Opcode 0x%04x unsupported by driver\n", opcode);
>  			continue;
>  		}
>  
> -		set_bit(cmd->info.id, cxlds->enabled_cmds);
> +		if (cmd)
> +			set_bit(cmd->info.id, cxlds->enabled_cmds);
> +
> +		if (cxl_is_poison_command(opcode))
> +			cxl_set_poison_cmd_enabled(&cxlds->poison, opcode);
> +
>  		dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);
>  	}
>  }
> @@ -1108,6 +1151,40 @@ int cxl_set_timestamp(struct cxl_dev_state *cxlds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
>  
> +static void free_poison_buf(void *buf)
> +{
> +	kvfree(buf);
> +}
> +
> +/* Get Poison List output buffer is protected by cxlds->poison.lock */
> +static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds)
> +{
> +	cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL);
> +	if (!cxlds->poison.list_out)
> +		return -ENOMEM;
> +
> +	return devm_add_action_or_reset(cxlds->dev, free_poison_buf,
> +					cxlds->poison.list_out);
> +}
> +
> +int cxl_poison_state_init(struct cxl_dev_state *cxlds)
> +{
> +	int rc;
> +
> +	if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds))
> +		return 0;
> +
> +	rc = cxl_poison_alloc_buf(cxlds);
> +	if (rc) {
> +		clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds);
> +		return rc;
> +	}
> +
> +	mutex_init(&cxlds->poison.lock);
> +	return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);
> +
>  struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
>  {
>  	struct cxl_dev_state *cxlds;
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index ccbafc05a636..16e0241d72a9 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -215,6 +215,37 @@ struct cxl_event_state {
>  	struct mutex log_lock;
>  };
>  
> +/* Device enabled poison commands */
> +enum poison_cmd_enabled_bits {
> +	CXL_POISON_ENABLED_LIST,
> +	CXL_POISON_ENABLED_INJECT,
> +	CXL_POISON_ENABLED_CLEAR,
> +	CXL_POISON_ENABLED_SCAN_CAPS,
> +	CXL_POISON_ENABLED_SCAN_MEDIA,
> +	CXL_POISON_ENABLED_SCAN_RESULTS,
> +	CXL_POISON_ENABLED_MAX
> +};
> +
> +/**
> + * struct cxl_poison_state - Driver poison state info
> + *
> + * @max_errors: Maximum media error records held in device cache
> + * @enabled_cmds: All poison commands enabled in the CEL
> + * @list_out: The poison list payload returned by device
> + * @lock: Protect reads of the poison list
> + *
> + * Reads of the poison list are synchronized to ensure that a reader
> + * does not get an incomplete list because their request overlapped
> + * (was interrupted or preceded by) another read request of the same
> + * DPA range. CXL Spec 3.0 Section 8.2.9.8.4.1
> + */
> +struct cxl_poison_state {
> +	u32 max_errors;
> +	DECLARE_BITMAP(enabled_cmds, CXL_POISON_ENABLED_MAX);
> +	struct cxl_mbox_poison_out *list_out;
> +	struct mutex lock;  /* Protect reads of poison list */
> +};
> +
>  /**
>   * struct cxl_dev_state - The driver device state
>   *
> @@ -251,6 +282,7 @@ struct cxl_event_state {
>   * @serial: PCIe Device Serial Number
>   * @doe_mbs: PCI DOE mailbox array
>   * @event: event log driver state
> + * @poison: poison driver state info
>   * @mbox_send: @dev specific transport for transmitting mailbox commands
>   *
>   * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
> @@ -290,6 +322,7 @@ struct cxl_dev_state {
>  	struct xarray doe_mbs;
>  
>  	struct cxl_event_state event;
> +	struct cxl_poison_state poison;
>  
>  	int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
>  };
> @@ -608,6 +641,7 @@ void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds
>  void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
>  void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status);
>  int cxl_set_timestamp(struct cxl_dev_state *cxlds);
> +int cxl_poison_state_init(struct cxl_dev_state *cxlds);
>  
>  #ifdef CONFIG_CXL_SUSPEND
>  void cxl_mem_active_inc(void);
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 60b23624d167..827ea0895778 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -769,6 +769,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (rc)
>  		return rc;
>  
> +	rc = cxl_poison_state_init(cxlds);
> +	if (rc)
> +		return rc;
> +
>  	rc = cxl_dev_state_identify(cxlds);
>  	if (rc)
>  		return rc;
diff mbox series

Patch

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index fd1026970d3a..17737386283a 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -5,6 +5,7 @@ 
 #include <linux/debugfs.h>
 #include <linux/ktime.h>
 #include <linux/mutex.h>
+#include <cxlpci.h>
 #include <cxlmem.h>
 #include <cxl.h>
 
@@ -120,6 +121,43 @@  static bool cxl_is_security_command(u16 opcode)
 	return false;
 }
 
+static bool cxl_is_poison_command(u16 opcode)
+{
+#define CXL_MBOX_OP_POISON_CMDS 0x43
+
+	if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS)
+		return true;
+
+	return false;
+}
+
+static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison,
+				       u16 opcode)
+{
+	switch (opcode) {
+	case CXL_MBOX_OP_GET_POISON:
+		set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds);
+		break;
+	case CXL_MBOX_OP_INJECT_POISON:
+		set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds);
+		break;
+	case CXL_MBOX_OP_CLEAR_POISON:
+		set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds);
+		break;
+	case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS:
+		set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds);
+		break;
+	case CXL_MBOX_OP_SCAN_MEDIA:
+		set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds);
+		break;
+	case CXL_MBOX_OP_GET_SCAN_MEDIA:
+		set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds);
+		break;
+	default:
+		break;
+	}
+}
+
 static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
 {
 	struct cxl_mem_command *c;
@@ -635,13 +673,18 @@  static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
 		u16 opcode = le16_to_cpu(cel_entry[i].opcode);
 		struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
 
-		if (!cmd) {
+		if (!cmd && !cxl_is_poison_command(opcode)) {
 			dev_dbg(cxlds->dev,
 				"Opcode 0x%04x unsupported by driver\n", opcode);
 			continue;
 		}
 
-		set_bit(cmd->info.id, cxlds->enabled_cmds);
+		if (cmd)
+			set_bit(cmd->info.id, cxlds->enabled_cmds);
+
+		if (cxl_is_poison_command(opcode))
+			cxl_set_poison_cmd_enabled(&cxlds->poison, opcode);
+
 		dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);
 	}
 }
@@ -1108,6 +1151,40 @@  int cxl_set_timestamp(struct cxl_dev_state *cxlds)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
 
+static void free_poison_buf(void *buf)
+{
+	kvfree(buf);
+}
+
+/* Get Poison List output buffer is protected by cxlds->poison.lock */
+static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds)
+{
+	cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL);
+	if (!cxlds->poison.list_out)
+		return -ENOMEM;
+
+	return devm_add_action_or_reset(cxlds->dev, free_poison_buf,
+					cxlds->poison.list_out);
+}
+
+int cxl_poison_state_init(struct cxl_dev_state *cxlds)
+{
+	int rc;
+
+	if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds))
+		return 0;
+
+	rc = cxl_poison_alloc_buf(cxlds);
+	if (rc) {
+		clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds);
+		return rc;
+	}
+
+	mutex_init(&cxlds->poison.lock);
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);
+
 struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
 {
 	struct cxl_dev_state *cxlds;
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index ccbafc05a636..16e0241d72a9 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -215,6 +215,37 @@  struct cxl_event_state {
 	struct mutex log_lock;
 };
 
+/* Device enabled poison commands */
+enum poison_cmd_enabled_bits {
+	CXL_POISON_ENABLED_LIST,
+	CXL_POISON_ENABLED_INJECT,
+	CXL_POISON_ENABLED_CLEAR,
+	CXL_POISON_ENABLED_SCAN_CAPS,
+	CXL_POISON_ENABLED_SCAN_MEDIA,
+	CXL_POISON_ENABLED_SCAN_RESULTS,
+	CXL_POISON_ENABLED_MAX
+};
+
+/**
+ * struct cxl_poison_state - Driver poison state info
+ *
+ * @max_errors: Maximum media error records held in device cache
+ * @enabled_cmds: All poison commands enabled in the CEL
+ * @list_out: The poison list payload returned by device
+ * @lock: Protect reads of the poison list
+ *
+ * Reads of the poison list are synchronized to ensure that a reader
+ * does not get an incomplete list because their request overlapped
+ * (was interrupted or preceded by) another read request of the same
+ * DPA range. CXL Spec 3.0 Section 8.2.9.8.4.1
+ */
+struct cxl_poison_state {
+	u32 max_errors;
+	DECLARE_BITMAP(enabled_cmds, CXL_POISON_ENABLED_MAX);
+	struct cxl_mbox_poison_out *list_out;
+	struct mutex lock;  /* Protect reads of poison list */
+};
+
 /**
  * struct cxl_dev_state - The driver device state
  *
@@ -251,6 +282,7 @@  struct cxl_event_state {
  * @serial: PCIe Device Serial Number
  * @doe_mbs: PCI DOE mailbox array
  * @event: event log driver state
+ * @poison: poison driver state info
  * @mbox_send: @dev specific transport for transmitting mailbox commands
  *
  * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
@@ -290,6 +322,7 @@  struct cxl_dev_state {
 	struct xarray doe_mbs;
 
 	struct cxl_event_state event;
+	struct cxl_poison_state poison;
 
 	int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
 };
@@ -608,6 +641,7 @@  void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds
 void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
 void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status);
 int cxl_set_timestamp(struct cxl_dev_state *cxlds);
+int cxl_poison_state_init(struct cxl_dev_state *cxlds);
 
 #ifdef CONFIG_CXL_SUSPEND
 void cxl_mem_active_inc(void);
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 60b23624d167..827ea0895778 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -769,6 +769,10 @@  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		return rc;
 
+	rc = cxl_poison_state_init(cxlds);
+	if (rc)
+		return rc;
+
 	rc = cxl_dev_state_identify(cxlds);
 	if (rc)
 		return rc;