Message ID | 46c7c7339224744fce424b196da3e5566effec17.1668115235.git.alison.schofield@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | CXL Poison List Retrieval & Tracing | expand |
On Thu, 10 Nov 2022 19:12:40 -0800 alison.schofield@intel.com wrote: > From: Alison Schofield <alison.schofield@intel.com> > > CXL devices maintain a list of locations that are poisoned or result > in poison if the addresses are accessed by the host. > > Per the spec (CXL 3.0 8.2.9.8.4.1), the device returns this Poison > list as a set of Media Error Records that include the source of the > error, the starting device physical address and length. The length is > the number of adjacent DPAs in the record and is in units of 64 bytes. > > Retrieve the list and log each Media Error Record as a trace event of > type 'cxl_poison'. > > When the poison list is requested by region, include the region name > and uuid in the trace event. > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> Hi Alison, I've forgotten most of previous discussions around versions of this series so I may well repeat things that were covered earlier! A few things inline. Thanks, Jonathan > --- > drivers/cxl/core/mbox.c | 81 +++++++++++++++++++++++++++++++++++++++++ > drivers/cxl/cxlmem.h | 37 +++++++++++++++++++ > 2 files changed, 118 insertions(+) > > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c > index 0c90f13870a4..88f034e97812 100644 > --- a/drivers/cxl/core/mbox.c > +++ b/drivers/cxl/core/mbox.c > @@ -9,6 +9,9 @@ > > #include "core.h" > > +#define CREATE_TRACE_POINTS > +#include <trace/events/cxl.h> > + > static bool cxl_raw_allow_all; > > /** > @@ -752,6 +755,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) > { > /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ > struct cxl_mbox_identify id; > + __le32 val = 0; > int rc; > > rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id, > @@ -771,6 +775,9 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) > cxlds->lsa_size = le32_to_cpu(id.lsa_size); > memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision)); > > + memcpy(&val, id.poison_list_max_mer, 3); This is ugly. I've lost track of last discussion about get_unaligned_le24() and using it on elements of a packed structure. At very least can we do a memcpy to a u8[3] array and then use get_unaligned_le24() on that if we can't use it directly on the structure element? > + cxlds->poison_max = min_t(u32, le32_to_cpu(val), CXL_POISON_LIST_MAX); > + > return 0; > } > EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); > @@ -835,6 +842,79 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds) > } > EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL); > > +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, > + struct cxl_region *cxlr) > +{ > + struct cxl_dev_state *cxlds = cxlmd->cxlds; > + const char *memdev_name = dev_name(&cxlmd->dev); Could just do this where it's used rather than here. > + const char *pcidev_name = dev_name(cxlds->dev); Same with this. > + struct cxl_mbox_poison_payload_out *po; > + struct cxl_mbox_poison_payload_in pi; > + int nr_records = 0; > + int rc; > + > + po = kvmalloc(cxlds->payload_size, GFP_KERNEL); > + if (!po) > + return -ENOMEM; > + > + pi.offset = cpu_to_le64(offset); > + pi.length = cpu_to_le64(len); > + > + rc = mutex_lock_interruptible(&cxlds->poison_list_mutex); > + if (rc) > + goto out; > + > + do { > + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_POISON, &pi, > + sizeof(pi), po, cxlds->payload_size); > + if (rc) > + break; > + > + if (trace_cxl_poison_enabled()) > + cxl_trace_poison(po, cxlr, memdev_name, pcidev_name); > + > + /* Protect against an uncleared _FLAG_MORE */ > + nr_records = nr_records + le16_to_cpu(po->count); > + if (nr_records >= cxlds->poison_max) { > + dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n", > + nr_records); > + break; > + } > + } while (po->flags & CXL_POISON_FLAG_MORE); > + > + mutex_unlock(&cxlds->poison_list_mutex); > +out: > + kvfree(po); > + return rc; > +} > +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL); > + > struct cxl_dev_state *cxl_dev_state_create(struct device *dev) > { > struct cxl_dev_state *cxlds; > @@ -846,6 +926,7 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev) > } > > mutex_init(&cxlds->mbox_mutex); > + mutex_init(&cxlds->poison_list_mutex); > cxlds->dev = dev; > > return cxlds; > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 669868cc1553..49d891347e39 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -192,6 +192,8 @@ struct cxl_endpoint_dvsec_info { > * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) > * @lsa_size: Size of Label Storage Area > * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) > + * @poison_max: maximum media error records held in device cache For consistency of capitalization: Maximum > + * @poison_list_mutex: Mutex to synchronize poison list retrieval > * @mbox_mutex: Mutex to synchronize mailbox access. > * @firmware_version: Firmware version for the memory device. > * @enabled_cmds: Hardware commands found enabled in CEL. > @@ -224,6 +226,8 @@ struct cxl_dev_state { > ...
On Wed, Nov 16, 2022 at 12:41:18PM +0000, Jonathan Cameron wrote: > On Thu, 10 Nov 2022 19:12:40 -0800 > alison.schofield@intel.com wrote: > > > From: Alison Schofield <alison.schofield@intel.com> > > > > CXL devices maintain a list of locations that are poisoned or result > > in poison if the addresses are accessed by the host. > > > > Per the spec (CXL 3.0 8.2.9.8.4.1), the device returns this Poison > > list as a set of Media Error Records that include the source of the > > error, the starting device physical address and length. The length is > > the number of adjacent DPAs in the record and is in units of 64 bytes. > > > > Retrieve the list and log each Media Error Record as a trace event of > > type 'cxl_poison'. > > > > When the poison list is requested by region, include the region name > > and uuid in the trace event. > > > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > Hi Alison, > > I've forgotten most of previous discussions around versions of this series > so I may well repeat things that were covered earlier! > > A few things inline. > > Thanks, > > Jonathan Thanks Jonathan. Got 'em all. > > > > --- snip > > > > > > + memcpy(&val, id.poison_list_max_mer, 3); > > This is ugly. I've lost track of last discussion about get_unaligned_le24() > and using it on elements of a packed structure. At very least can we > do a memcpy to a u8[3] array and then use get_unaligned_le24() on that if > we can't use it directly on the structure element? > Done, like this - val = get_unaligned_le24(id.poison_list_max_mer); > snip > > + > > +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, > > + struct cxl_region *cxlr) > > +{ > > + struct cxl_dev_state *cxlds = cxlmd->cxlds; > > + const char *memdev_name = dev_name(&cxlmd->dev); > Could just do this where it's used rather than here. > > > + const char *pcidev_name = dev_name(cxlds->dev); > Same with this. > Done. > snip > ... > >
alison.schofield@ wrote: > From: Alison Schofield <alison.schofield@intel.com> > > CXL devices maintain a list of locations that are poisoned or result > in poison if the addresses are accessed by the host. > > Per the spec (CXL 3.0 8.2.9.8.4.1), the device returns this Poison > list as a set of Media Error Records that include the source of the > error, the starting device physical address and length. The length is > the number of adjacent DPAs in the record and is in units of 64 bytes. > > Retrieve the list and log each Media Error Record as a trace event of > type 'cxl_poison'. > > When the poison list is requested by region, include the region name > and uuid in the trace event. > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > --- > drivers/cxl/core/mbox.c | 81 +++++++++++++++++++++++++++++++++++++++++ > drivers/cxl/cxlmem.h | 37 +++++++++++++++++++ > 2 files changed, 118 insertions(+) > > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c > index 0c90f13870a4..88f034e97812 100644 > --- a/drivers/cxl/core/mbox.c > +++ b/drivers/cxl/core/mbox.c > @@ -9,6 +9,9 @@ > > #include "core.h" > > +#define CREATE_TRACE_POINTS > +#include <trace/events/cxl.h> > + > static bool cxl_raw_allow_all; > > /** > @@ -752,6 +755,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) > { > /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ > struct cxl_mbox_identify id; > + __le32 val = 0; > int rc; > > rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id, > @@ -771,6 +775,9 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) > cxlds->lsa_size = le32_to_cpu(id.lsa_size); > memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision)); > > + memcpy(&val, id.poison_list_max_mer, 3); I see Jonathan already commented on the need for get_unaligned_le24() here, seconded. > + cxlds->poison_max = min_t(u32, le32_to_cpu(val), CXL_POISON_LIST_MAX); > + > return 0; > } > EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); > @@ -835,6 +842,79 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds) > } > EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL); > > +static void cxl_trace_poison(struct cxl_mbox_poison_payload_out *po, > + struct cxl_region *cxlr, > + const char *memdev_name, > + const char *pcidev_name) Type-safety please. Pass a 'struct cxl_memdev *' and 'struct pci_dev *'. Might need to be 'struct device *' instead of 'struct pci_dev *' depending on if this needs to be called from cxl_test, but anything is better than a non-specific string. > +{ > + const char *region_name = cxlr ? dev_name(&cxlr->dev) : NULL; ...and push this conversion into the trace point. > + struct cxl_region_params *p = cxlr ? &cxlr->params : NULL; > + uuid_t *uuid = p ? &p->uuid : NULL; > + u64 addr, dpa, overflow_t = 0; > + u8 source; > + u32 len; > + > + if (po->flags & CXL_POISON_FLAG_OVERFLOW) > + overflow_t = le64_to_cpu(po->overflow_timestamp); > + > + for (int i = 0; i < le16_to_cpu(po->count); i++) { > + len = le32_to_cpu(po->record[i].length) * CXL_POISON_LEN_MULT; > + addr = le64_to_cpu(po->record[i].address); > + source = addr & CXL_POISON_SOURCE_MASK; > + dpa = addr & CXL_POISON_START_MASK; > + > + trace_cxl_poison(memdev_name, pcidev_name, region_name, uuid, > + dpa, len, source, po->flags, overflow_t); > + } > +} > + > +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, > + struct cxl_region *cxlr) > +{ > + struct cxl_dev_state *cxlds = cxlmd->cxlds; > + const char *memdev_name = dev_name(&cxlmd->dev); > + const char *pcidev_name = dev_name(cxlds->dev); > + struct cxl_mbox_poison_payload_out *po; > + struct cxl_mbox_poison_payload_in pi; > + int nr_records = 0; > + int rc; > + > + po = kvmalloc(cxlds->payload_size, GFP_KERNEL); > + if (!po) > + return -ENOMEM; > + > + pi.offset = cpu_to_le64(offset); > + pi.length = cpu_to_le64(len); > + > + rc = mutex_lock_interruptible(&cxlds->poison_list_mutex); So I do not know what this mutex is protecting if there is an allocation per cxl_mem_get_poison() invocation. Although I suspect that's somewhat wasteful. Just allocate one buffer at the beginning of time and then use the lock to protect that buffer. Although, I wonder if this and Event handling should share locks and one preallocated buffer? Otherwise I do think it is important for Events and Poison handling to be able to make forward progress without needing to allocate up to a megabyte of memory at runtime. The other payload_size allocations are for one-off things that run at the beginning of time, but Poison and Events run repeatedly. > + if (rc) > + goto out; > + > + do { > + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_POISON, &pi, > + sizeof(pi), po, cxlds->payload_size); > + if (rc) > + break; > + > + if (trace_cxl_poison_enabled()) > + cxl_trace_poison(po, cxlr, memdev_name, pcidev_name); > + > + /* Protect against an uncleared _FLAG_MORE */ > + nr_records = nr_records + le16_to_cpu(po->count); > + if (nr_records >= cxlds->poison_max) { > + dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n", > + nr_records); > + break; > + } > + } while (po->flags & CXL_POISON_FLAG_MORE); > + > + mutex_unlock(&cxlds->poison_list_mutex); > +out: > + kvfree(po); > + return rc; > +} > +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL); > + > struct cxl_dev_state *cxl_dev_state_create(struct device *dev) > { > struct cxl_dev_state *cxlds; > @@ -846,6 +926,7 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev) > } > > mutex_init(&cxlds->mbox_mutex); > + mutex_init(&cxlds->poison_list_mutex); > cxlds->dev = dev; > > return cxlds; > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 669868cc1553..49d891347e39 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -192,6 +192,8 @@ struct cxl_endpoint_dvsec_info { > * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) > * @lsa_size: Size of Label Storage Area > * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) > + * @poison_max: maximum media error records held in device cache > + * @poison_list_mutex: Mutex to synchronize poison list retrieval > * @mbox_mutex: Mutex to synchronize mailbox access. > * @firmware_version: Firmware version for the memory device. > * @enabled_cmds: Hardware commands found enabled in CEL. > @@ -224,6 +226,8 @@ struct cxl_dev_state { > > size_t payload_size; > size_t lsa_size; > + u32 poison_max; > + struct mutex poison_list_mutex; /* Protect reads of poison list */ > struct mutex mbox_mutex; /* Protects device mailbox and firmware */ > char firmware_version[0x10]; > DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); > @@ -349,6 +353,37 @@ struct cxl_mbox_set_partition_info { > > /* Get Poison List CXL 3.0 Spec 8.2.9.8.4.1 */ > > +struct cxl_mbox_poison_payload_in { > + __le64 offset; > + __le64 length; > +} __packed; > + > +struct cxl_mbox_poison_payload_out { > + u8 flags; > + u8 rsvd1; > + __le64 overflow_timestamp; > + __le16 count; > + u8 rsvd2[0x14]; Let's use decimal values for size. > + struct cxl_poison_record { > + __le64 address; > + __le32 length; > + __le32 rsvd; > + } __packed record[]; > +} __packed; > + > +/* > + * Get Poison List address field encodes the starting > + * address of poison, and the source of the poison. > + */ > +#define CXL_POISON_START_MASK GENMASK_ULL(63, 6) > +#define CXL_POISON_SOURCE_MASK GENMASK(2, 0) > + > +/* Get Poison List record length is in units of 64 bytes */ > +#define CXL_POISON_LEN_MULT 64 > + > +/* Kernel defined maximum for a list of poison errors */ > +#define CXL_POISON_LIST_MAX 1024 > + > /* Get Poison List: Payload out flags */ > #define CXL_POISON_FLAG_MORE BIT(0) > #define CXL_POISON_FLAG_OVERFLOW BIT(1) > @@ -395,6 +430,8 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); > struct cxl_dev_state *cxl_dev_state_create(struct device *dev); > void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); > void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); > +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, > + struct cxl_region *cxlr); > #ifdef CONFIG_CXL_SUSPEND > void cxl_mem_active_inc(void); > void cxl_mem_active_dec(void); > -- > 2.37.3 >
On Tue, Dec 06, 2022 at 06:41:34PM -0800, Dan Williams wrote: > alison.schofield@ wrote: > > From: Alison Schofield <alison.schofield@intel.com> > > > > CXL devices maintain a list of locations that are poisoned or result > > in poison if the addresses are accessed by the host. > > > > Per the spec (CXL 3.0 8.2.9.8.4.1), the device returns this Poison > > list as a set of Media Error Records that include the source of the > > error, the starting device physical address and length. The length is > > the number of adjacent DPAs in the record and is in units of 64 bytes. > > > > Retrieve the list and log each Media Error Record as a trace event of > > type 'cxl_poison'. > > > > When the poison list is requested by region, include the region name > > and uuid in the trace event. > > > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > > --- > > drivers/cxl/core/mbox.c | 81 +++++++++++++++++++++++++++++++++++++++++ > > drivers/cxl/cxlmem.h | 37 +++++++++++++++++++ > > 2 files changed, 118 insertions(+) > > > > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c > > index 0c90f13870a4..88f034e97812 100644 > > --- a/drivers/cxl/core/mbox.c > > +++ b/drivers/cxl/core/mbox.c > > @@ -9,6 +9,9 @@ > > > > #include "core.h" > > > > +#define CREATE_TRACE_POINTS > > +#include <trace/events/cxl.h> > > + > > static bool cxl_raw_allow_all; > > > > /** > > @@ -752,6 +755,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) > > { > > /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ > > struct cxl_mbox_identify id; > > + __le32 val = 0; > > int rc; > > > > rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id, > > @@ -771,6 +775,9 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) > > cxlds->lsa_size = le32_to_cpu(id.lsa_size); > > memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision)); > > > > + memcpy(&val, id.poison_list_max_mer, 3); > > I see Jonathan already commented on the need for get_unaligned_le24() > here, seconded. > Got it. > > > + cxlds->poison_max = min_t(u32, le32_to_cpu(val), CXL_POISON_LIST_MAX); > > + > > return 0; > > } > > EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); > > @@ -835,6 +842,79 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds) > > } > > EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL); > > > > +static void cxl_trace_poison(struct cxl_mbox_poison_payload_out *po, > > + struct cxl_region *cxlr, > > + const char *memdev_name, > > + const char *pcidev_name) > > Type-safety please. Pass a 'struct cxl_memdev *' and 'struct pci_dev *'. > Might need to be 'struct device *' instead of 'struct pci_dev *' > depending on if this needs to be called from cxl_test, but anything is > better than a non-specific string. > Got it. > > +{ > > + const char *region_name = cxlr ? dev_name(&cxlr->dev) : NULL; > > ...and push this conversion into the trace point. > Got that, and also pushed the decode of struct cxl_poison_record to the trace point like this: TP_PROTO(const struct device *memdev, const struct pci_dev *pcidev, const struct cxl_region *region, const struct cxl_poison_record *record, u8 flags, __le64 overflow_t), > > + struct cxl_region_params *p = cxlr ? &cxlr->params : NULL; > > + uuid_t *uuid = p ? &p->uuid : NULL; > > + u64 addr, dpa, overflow_t = 0; > > + u8 source; > > + u32 len; > > + > > + if (po->flags & CXL_POISON_FLAG_OVERFLOW) > > + overflow_t = le64_to_cpu(po->overflow_timestamp); > > + > > + for (int i = 0; i < le16_to_cpu(po->count); i++) { > > + len = le32_to_cpu(po->record[i].length) * CXL_POISON_LEN_MULT; > > + addr = le64_to_cpu(po->record[i].address); > > + source = addr & CXL_POISON_SOURCE_MASK; > > + dpa = addr & CXL_POISON_START_MASK; > > + > > + trace_cxl_poison(memdev_name, pcidev_name, region_name, uuid, > > + dpa, len, source, po->flags, overflow_t); > > + } > > +} > > + > > +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, > > + struct cxl_region *cxlr) > > +{ > > + struct cxl_dev_state *cxlds = cxlmd->cxlds; > > + const char *memdev_name = dev_name(&cxlmd->dev); > > + const char *pcidev_name = dev_name(cxlds->dev); > > + struct cxl_mbox_poison_payload_out *po; > > + struct cxl_mbox_poison_payload_in pi; > > + int nr_records = 0; > > + int rc; > > + > > + po = kvmalloc(cxlds->payload_size, GFP_KERNEL); > > + if (!po) > > + return -ENOMEM; > > + > > + pi.offset = cpu_to_le64(offset); > > + pi.length = cpu_to_le64(len); > > + > > + rc = mutex_lock_interruptible(&cxlds->poison_list_mutex); > > So I do not know what this mutex is protecting if there is an allocation > per cxl_mem_get_poison() invocation. Although I suspect that's somewhat > wasteful. Just allocate one buffer at the beginning of time and then use > the lock to protect that buffer. Intent was a single lock on the device to protect the state of the poison list retrieval - do not allow > 1 reader. With > 1 reader software may not know if it retrieved the complete list. I'm not understanding about protecting a buffer, instead of protecting the state. Here I try to protect the state. > > Although, I wonder if this and Event handling should share locks and one > preallocated buffer? Otherwise I do think it is important for Events and > Poison handling to be able to make forward progress without needing to > allocate up to a megabyte of memory at runtime. The other payload_size > allocations are for one-off things that run at the beginning of time, > but Poison and Events run repeatedly. > > > snip
Alison Schofield wrote: > On Tue, Dec 06, 2022 at 06:41:34PM -0800, Dan Williams wrote: > > alison.schofield@ wrote: [..] > > > +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, > > > + struct cxl_region *cxlr) > > > +{ > > > + struct cxl_dev_state *cxlds = cxlmd->cxlds; > > > + const char *memdev_name = dev_name(&cxlmd->dev); > > > + const char *pcidev_name = dev_name(cxlds->dev); > > > + struct cxl_mbox_poison_payload_out *po; > > > + struct cxl_mbox_poison_payload_in pi; > > > + int nr_records = 0; > > > + int rc; > > > + > > > + po = kvmalloc(cxlds->payload_size, GFP_KERNEL); > > > + if (!po) > > > + return -ENOMEM; > > > + > > > + pi.offset = cpu_to_le64(offset); > > > + pi.length = cpu_to_le64(len); > > > + > > > + rc = mutex_lock_interruptible(&cxlds->poison_list_mutex); > > > > So I do not know what this mutex is protecting if there is an allocation > > per cxl_mem_get_poison() invocation. Although I suspect that's somewhat > > wasteful. Just allocate one buffer at the beginning of time and then use > > the lock to protect that buffer. > > Intent was a single lock on the device to protect the state of the > poison list retrieval - do not allow > 1 reader. With > 1 reader > software may not know if it retrieved the complete list. > > I'm not understanding about protecting a buffer, instead of protecting > the state. Here I try to protect the state. Ah, sorry I read cxlds->poison_list_mutex and assumed it was serializing access to the buffer, not a state machine. I do think it would be worthwhile to make this a self contained structure with its own kdoc to explain what is going on, something like: diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index ab138004f644..02697d1d951c 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -193,6 +193,19 @@ struct cxl_endpoint_dvsec_info { struct range dvsec_range[2]; }; +/** + * struct cxl_poison_state - summary + * @payload: ... + * @lock: ... + * + * A bit more description of why state needs to be held over successive + * mbox commands... + */ +struct cxl_poison_state { + void *payload; + struct mutex lock; +}; + /** * struct cxl_dev_state - The driver device state * @@ -210,6 +223,7 @@ struct cxl_endpoint_dvsec_info { * @lsa_size: Size of Label Storage Area * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) * @mbox_mutex: Mutex to synchronize mailbox access. + * @poison: Poison list retrieval and tracing * @firmware_version: Firmware version for the memory device. * @enabled_cmds: Hardware commands found enabled in CEL. * @exclusive_cmds: Commands that are kernel-internal only @@ -244,6 +258,7 @@ struct cxl_dev_state { size_t payload_size; size_t lsa_size; struct mutex mbox_mutex; /* Protects device mailbox and firmware */ + struct cxl_poison_state poison; char firmware_version[0x10]; DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
On Wed, Dec 07, 2022 at 01:39:58PM -0800, Dan Williams wrote: > Alison Schofield wrote: > > On Tue, Dec 06, 2022 at 06:41:34PM -0800, Dan Williams wrote: > > > alison.schofield@ wrote: > [..] > > > > +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, > > > > + struct cxl_region *cxlr) > > > > +{ > > > > + struct cxl_dev_state *cxlds = cxlmd->cxlds; > > > > + const char *memdev_name = dev_name(&cxlmd->dev); > > > > + const char *pcidev_name = dev_name(cxlds->dev); > > > > + struct cxl_mbox_poison_payload_out *po; > > > > + struct cxl_mbox_poison_payload_in pi; > > > > + int nr_records = 0; > > > > + int rc; > > > > + > > > > + po = kvmalloc(cxlds->payload_size, GFP_KERNEL); > > > > + if (!po) > > > > + return -ENOMEM; > > > > + > > > > + pi.offset = cpu_to_le64(offset); > > > > + pi.length = cpu_to_le64(len); > > > > + > > > > + rc = mutex_lock_interruptible(&cxlds->poison_list_mutex); > > > > > > So I do not know what this mutex is protecting if there is an allocation > > > per cxl_mem_get_poison() invocation. Although I suspect that's somewhat > > > wasteful. Just allocate one buffer at the beginning of time and then use > > > the lock to protect that buffer. > > > > Intent was a single lock on the device to protect the state of the > > poison list retrieval - do not allow > 1 reader. With > 1 reader > > software may not know if it retrieved the complete list. > > > > I'm not understanding about protecting a buffer, instead of protecting > > the state. Here I try to protect the state. > > Ah, sorry I read cxlds->poison_list_mutex and assumed it was serializing > access to the buffer, not a state machine. I do think it would be > worthwhile to make this a self contained structure with its own kdoc to > explain what is going on, something like: > OK thanks, it's getting through to me now. Will do ! > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index ab138004f644..02697d1d951c 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -193,6 +193,19 @@ struct cxl_endpoint_dvsec_info { > struct range dvsec_range[2]; > }; > > +/** > + * struct cxl_poison_state - summary > + * @payload: ... > + * @lock: ... > + * > + * A bit more description of why state needs to be held over successive > + * mbox commands... > + */ > +struct cxl_poison_state { > + void *payload; > + struct mutex lock; > +}; > + > /** > * struct cxl_dev_state - The driver device state > * > @@ -210,6 +223,7 @@ struct cxl_endpoint_dvsec_info { > * @lsa_size: Size of Label Storage Area > * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) > * @mbox_mutex: Mutex to synchronize mailbox access. > + * @poison: Poison list retrieval and tracing > * @firmware_version: Firmware version for the memory device. > * @enabled_cmds: Hardware commands found enabled in CEL. > * @exclusive_cmds: Commands that are kernel-internal only > @@ -244,6 +258,7 @@ struct cxl_dev_state { > size_t payload_size; > size_t lsa_size; > struct mutex mbox_mutex; /* Protects device mailbox and firmware */ > + struct cxl_poison_state poison; > char firmware_version[0x10]; > DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); > DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 0c90f13870a4..88f034e97812 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -9,6 +9,9 @@ #include "core.h" +#define CREATE_TRACE_POINTS +#include <trace/events/cxl.h> + static bool cxl_raw_allow_all; /** @@ -752,6 +755,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) { /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ struct cxl_mbox_identify id; + __le32 val = 0; int rc; rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id, @@ -771,6 +775,9 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) cxlds->lsa_size = le32_to_cpu(id.lsa_size); memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision)); + memcpy(&val, id.poison_list_max_mer, 3); + cxlds->poison_max = min_t(u32, le32_to_cpu(val), CXL_POISON_LIST_MAX); + return 0; } EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); @@ -835,6 +842,79 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL); +static void cxl_trace_poison(struct cxl_mbox_poison_payload_out *po, + struct cxl_region *cxlr, + const char *memdev_name, + const char *pcidev_name) +{ + const char *region_name = cxlr ? dev_name(&cxlr->dev) : NULL; + struct cxl_region_params *p = cxlr ? &cxlr->params : NULL; + uuid_t *uuid = p ? &p->uuid : NULL; + u64 addr, dpa, overflow_t = 0; + u8 source; + u32 len; + + if (po->flags & CXL_POISON_FLAG_OVERFLOW) + overflow_t = le64_to_cpu(po->overflow_timestamp); + + for (int i = 0; i < le16_to_cpu(po->count); i++) { + len = le32_to_cpu(po->record[i].length) * CXL_POISON_LEN_MULT; + addr = le64_to_cpu(po->record[i].address); + source = addr & CXL_POISON_SOURCE_MASK; + dpa = addr & CXL_POISON_START_MASK; + + trace_cxl_poison(memdev_name, pcidev_name, region_name, uuid, + dpa, len, source, po->flags, overflow_t); + } +} + +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, + struct cxl_region *cxlr) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + const char *memdev_name = dev_name(&cxlmd->dev); + const char *pcidev_name = dev_name(cxlds->dev); + struct cxl_mbox_poison_payload_out *po; + struct cxl_mbox_poison_payload_in pi; + int nr_records = 0; + int rc; + + po = kvmalloc(cxlds->payload_size, GFP_KERNEL); + if (!po) + return -ENOMEM; + + pi.offset = cpu_to_le64(offset); + pi.length = cpu_to_le64(len); + + rc = mutex_lock_interruptible(&cxlds->poison_list_mutex); + if (rc) + goto out; + + do { + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_POISON, &pi, + sizeof(pi), po, cxlds->payload_size); + if (rc) + break; + + if (trace_cxl_poison_enabled()) + cxl_trace_poison(po, cxlr, memdev_name, pcidev_name); + + /* Protect against an uncleared _FLAG_MORE */ + nr_records = nr_records + le16_to_cpu(po->count); + if (nr_records >= cxlds->poison_max) { + dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n", + nr_records); + break; + } + } while (po->flags & CXL_POISON_FLAG_MORE); + + mutex_unlock(&cxlds->poison_list_mutex); +out: + kvfree(po); + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL); + struct cxl_dev_state *cxl_dev_state_create(struct device *dev) { struct cxl_dev_state *cxlds; @@ -846,6 +926,7 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev) } mutex_init(&cxlds->mbox_mutex); + mutex_init(&cxlds->poison_list_mutex); cxlds->dev = dev; return cxlds; diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 669868cc1553..49d891347e39 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -192,6 +192,8 @@ struct cxl_endpoint_dvsec_info { * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) * @lsa_size: Size of Label Storage Area * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) + * @poison_max: maximum media error records held in device cache + * @poison_list_mutex: Mutex to synchronize poison list retrieval * @mbox_mutex: Mutex to synchronize mailbox access. * @firmware_version: Firmware version for the memory device. * @enabled_cmds: Hardware commands found enabled in CEL. @@ -224,6 +226,8 @@ struct cxl_dev_state { size_t payload_size; size_t lsa_size; + u32 poison_max; + struct mutex poison_list_mutex; /* Protect reads of poison list */ struct mutex mbox_mutex; /* Protects device mailbox and firmware */ char firmware_version[0x10]; DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); @@ -349,6 +353,37 @@ struct cxl_mbox_set_partition_info { /* Get Poison List CXL 3.0 Spec 8.2.9.8.4.1 */ +struct cxl_mbox_poison_payload_in { + __le64 offset; + __le64 length; +} __packed; + +struct cxl_mbox_poison_payload_out { + u8 flags; + u8 rsvd1; + __le64 overflow_timestamp; + __le16 count; + u8 rsvd2[0x14]; + struct cxl_poison_record { + __le64 address; + __le32 length; + __le32 rsvd; + } __packed record[]; +} __packed; + +/* + * Get Poison List address field encodes the starting + * address of poison, and the source of the poison. + */ +#define CXL_POISON_START_MASK GENMASK_ULL(63, 6) +#define CXL_POISON_SOURCE_MASK GENMASK(2, 0) + +/* Get Poison List record length is in units of 64 bytes */ +#define CXL_POISON_LEN_MULT 64 + +/* Kernel defined maximum for a list of poison errors */ +#define CXL_POISON_LIST_MAX 1024 + /* Get Poison List: Payload out flags */ #define CXL_POISON_FLAG_MORE BIT(0) #define CXL_POISON_FLAG_OVERFLOW BIT(1) @@ -395,6 +430,8 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, + struct cxl_region *cxlr); #ifdef CONFIG_CXL_SUSPEND void cxl_mem_active_inc(void); void cxl_mem_active_dec(void);