Message ID | a77d5513c55177361749a27544409cb6a0c94dd5.1678468593.git.alison.schofield@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | CXL Poison List Retrieval & Tracing | expand |
alison.schofield@ wrote: > From: Alison Schofield <alison.schofield@intel.com> > > CXL devices may support the retrieval of a device poison list. > Add a new trace event that the CXL subsystem may use to log > the media-error records returned in the poison list. > > Log each media-error record as a trace event of type 'cxl_poison'. > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > --- > drivers/cxl/core/mbox.c | 4 +- > drivers/cxl/core/trace.h | 84 ++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 87 insertions(+), 1 deletion(-) > > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c > index 0da0a30511f2..77fc811bdfed 100644 > --- a/drivers/cxl/core/mbox.c > +++ b/drivers/cxl/core/mbox.c > @@ -1147,7 +1147,9 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, > if (rc) > break; > > - /* TODO TRACE the media error records */ > + for (int i = 0; i < le16_to_cpu(po->count); i++) > + trace_cxl_poison(cxlmd, cxlr, &po->record[i], > + po->flags, po->overflow_t); > > /* Protect against an uncleared _FLAG_MORE */ > nr_records = nr_records + le16_to_cpu(po->count); > diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h > index 9b8d3d997834..33a22d26e742 100644 > --- a/drivers/cxl/core/trace.h > +++ b/drivers/cxl/core/trace.h > @@ -7,6 +7,7 @@ > #define _CXL_EVENTS_H > > #include <linux/tracepoint.h> > +#include <linux/pci.h> > #include <asm-generic/unaligned.h> > > #include <cxl.h> > @@ -600,6 +601,89 @@ TRACE_EVENT(cxl_memory_module, > ) > ); > > +#define __show_poison_source(source) \ > + __print_symbolic(source, \ > + { CXL_POISON_SOURCE_UNKNOWN, "Unknown" }, \ > + { CXL_POISON_SOURCE_EXTERNAL, "External" }, \ > + { CXL_POISON_SOURCE_INTERNAL, "Internal" }, \ > + { CXL_POISON_SOURCE_INJECTED, "Injected" }, \ > + { CXL_POISON_SOURCE_VENDOR, "Vendor" }) > + > +#define show_poison_source(source) \ > + (((source > CXL_POISON_SOURCE_INJECTED) && \ > + (source != CXL_POISON_SOURCE_VENDOR)) ? "Reserved" \ > + : __show_poison_source(source)) > + > +#define show_poison_flags(flags) \ > + __print_flags(flags, "|", \ > + { CXL_POISON_FLAG_MORE, "More" }, \ > + { CXL_POISON_FLAG_OVERFLOW, "Overflow" }, \ > + { CXL_POISON_FLAG_SCANNING, "Scanning" }) > + > +#define __cxl_poison_addr(record) \ > + (le64_to_cpu(record->address)) > +#define cxl_poison_record_dpa(record) \ > + (__cxl_poison_addr(record) & CXL_POISON_START_MASK) > +#define cxl_poison_record_source(record) \ > + (__cxl_poison_addr(record) & CXL_POISON_SOURCE_MASK) > +#define cxl_poison_record_length(record) \ > + (le32_to_cpu(record->length) * CXL_POISON_LEN_MULT) > +#define cxl_poison_overflow(flags, time) \ > + (flags & CXL_POISON_FLAG_OVERFLOW ? le64_to_cpu(time) : 0) > + > +TRACE_EVENT(cxl_poison, > + > + TP_PROTO(struct cxl_memdev *cxlmd, struct cxl_region *region, > + const struct cxl_poison_record *record, > + u8 flags, __le64 overflow_t), FWIW I made event overflow a separate trace event. Will this make all of the poisons in a single GetPoison command marked with overflow in the trace buffer? Ira > + > + TP_ARGS(cxlmd, region, record, flags, overflow_t), > + > + TP_STRUCT__entry( > + __string(memdev, dev_name(&cxlmd->dev)) > + __string(host, dev_name(cxlmd->dev.parent)) > + __field(u64, serial) > + __string(region, region) > + __field(u64, overflow_t) > + __field(u64, dpa) > + __field(u32, length) > + __array(char, uuid, 16) > + __field(u8, source) > + __field(u8, flags) > + ), > + > + TP_fast_assign( > + __assign_str(memdev, dev_name(&cxlmd->dev)); > + __assign_str(host, dev_name(cxlmd->dev.parent)); > + __entry->serial = cxlmd->cxlds->serial; > + __entry->overflow_t = cxl_poison_overflow(flags, overflow_t); > + __entry->dpa = cxl_poison_record_dpa(record); > + __entry->length = cxl_poison_record_length(record); > + __entry->source = cxl_poison_record_source(record); > + __entry->flags = flags; > + if (region) { > + __assign_str(region, dev_name(®ion->dev)); > + memcpy(__entry->uuid, ®ion->params.uuid, 16); > + } else { > + __assign_str(region, ""); > + memset(__entry->uuid, 0, 16); > + } > + ), > + > + TP_printk("memdev=%s host=%s serial=%lld region=%s region_uuid=%pU dpa=0x%llx length=0x%x source=%s flags=%s overflow_time=%llu", > + __get_str(memdev), > + __get_str(host), > + __entry->serial, > + __get_str(region), > + __entry->uuid, > + __entry->dpa, > + __entry->length, > + show_poison_source(__entry->source), > + show_poison_flags(__entry->flags), > + __entry->overflow_t > + ) > +); > + > #endif /* _CXL_EVENTS_H */ > > #define TRACE_INCLUDE_FILE trace > -- > 2.37.3 >
On Mon, Mar 13, 2023 at 03:47:35PM -0700, Ira Weiny wrote: > alison.schofield@ wrote: > > From: Alison Schofield <alison.schofield@intel.com> > > > > CXL devices may support the retrieval of a device poison list. > > Add a new trace event that the CXL subsystem may use to log > > the media-error records returned in the poison list. > > > > Log each media-error record as a trace event of type 'cxl_poison'. > > > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > --- > > drivers/cxl/core/mbox.c | 4 +- > > drivers/cxl/core/trace.h | 84 ++++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 87 insertions(+), 1 deletion(-) snip > > > > +#define cxl_poison_overflow(flags, time) \ > > + (flags & CXL_POISON_FLAG_OVERFLOW ? le64_to_cpu(time) : 0) > > + > > +TRACE_EVENT(cxl_poison, > > + > > + TP_PROTO(struct cxl_memdev *cxlmd, struct cxl_region *region, > > + const struct cxl_poison_record *record, > > + u8 flags, __le64 overflow_t), > > FWIW I made event overflow a separate trace event. > > Will this make all of the poisons in a single GetPoison command marked > with overflow in the trace buffer? Yes. Every record returned within a poison payload, gets the same flags and overflow_t reported in its trace events. I took a peek at what you did. (Perhaps we should have called that cxl_event_overflow). I don't think the poison reporting allows a similar, singular overflow trace event. The overflow setting means the device has overflowed its poison list and the list may be incomplete. I think we repeat the overflow state on every cxl_poison trace event until the overflow status goes away. (Scan Media) Alison > > Ira > > > + snip > >
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 0da0a30511f2..77fc811bdfed 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1147,7 +1147,9 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, if (rc) break; - /* TODO TRACE the media error records */ + for (int i = 0; i < le16_to_cpu(po->count); i++) + trace_cxl_poison(cxlmd, cxlr, &po->record[i], + po->flags, po->overflow_t); /* Protect against an uncleared _FLAG_MORE */ nr_records = nr_records + le16_to_cpu(po->count); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 9b8d3d997834..33a22d26e742 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -7,6 +7,7 @@ #define _CXL_EVENTS_H #include <linux/tracepoint.h> +#include <linux/pci.h> #include <asm-generic/unaligned.h> #include <cxl.h> @@ -600,6 +601,89 @@ TRACE_EVENT(cxl_memory_module, ) ); +#define __show_poison_source(source) \ + __print_symbolic(source, \ + { CXL_POISON_SOURCE_UNKNOWN, "Unknown" }, \ + { CXL_POISON_SOURCE_EXTERNAL, "External" }, \ + { CXL_POISON_SOURCE_INTERNAL, "Internal" }, \ + { CXL_POISON_SOURCE_INJECTED, "Injected" }, \ + { CXL_POISON_SOURCE_VENDOR, "Vendor" }) + +#define show_poison_source(source) \ + (((source > CXL_POISON_SOURCE_INJECTED) && \ + (source != CXL_POISON_SOURCE_VENDOR)) ? "Reserved" \ + : __show_poison_source(source)) + +#define show_poison_flags(flags) \ + __print_flags(flags, "|", \ + { CXL_POISON_FLAG_MORE, "More" }, \ + { CXL_POISON_FLAG_OVERFLOW, "Overflow" }, \ + { CXL_POISON_FLAG_SCANNING, "Scanning" }) + +#define __cxl_poison_addr(record) \ + (le64_to_cpu(record->address)) +#define cxl_poison_record_dpa(record) \ + (__cxl_poison_addr(record) & CXL_POISON_START_MASK) +#define cxl_poison_record_source(record) \ + (__cxl_poison_addr(record) & CXL_POISON_SOURCE_MASK) +#define cxl_poison_record_length(record) \ + (le32_to_cpu(record->length) * CXL_POISON_LEN_MULT) +#define cxl_poison_overflow(flags, time) \ + (flags & CXL_POISON_FLAG_OVERFLOW ? le64_to_cpu(time) : 0) + +TRACE_EVENT(cxl_poison, + + TP_PROTO(struct cxl_memdev *cxlmd, struct cxl_region *region, + const struct cxl_poison_record *record, + u8 flags, __le64 overflow_t), + + TP_ARGS(cxlmd, region, record, flags, overflow_t), + + TP_STRUCT__entry( + __string(memdev, dev_name(&cxlmd->dev)) + __string(host, dev_name(cxlmd->dev.parent)) + __field(u64, serial) + __string(region, region) + __field(u64, overflow_t) + __field(u64, dpa) + __field(u32, length) + __array(char, uuid, 16) + __field(u8, source) + __field(u8, flags) + ), + + TP_fast_assign( + __assign_str(memdev, dev_name(&cxlmd->dev)); + __assign_str(host, dev_name(cxlmd->dev.parent)); + __entry->serial = cxlmd->cxlds->serial; + __entry->overflow_t = cxl_poison_overflow(flags, overflow_t); + __entry->dpa = cxl_poison_record_dpa(record); + __entry->length = cxl_poison_record_length(record); + __entry->source = cxl_poison_record_source(record); + __entry->flags = flags; + if (region) { + __assign_str(region, dev_name(®ion->dev)); + memcpy(__entry->uuid, ®ion->params.uuid, 16); + } else { + __assign_str(region, ""); + memset(__entry->uuid, 0, 16); + } + ), + + TP_printk("memdev=%s host=%s serial=%lld region=%s region_uuid=%pU dpa=0x%llx length=0x%x source=%s flags=%s overflow_time=%llu", + __get_str(memdev), + __get_str(host), + __entry->serial, + __get_str(region), + __entry->uuid, + __entry->dpa, + __entry->length, + show_poison_source(__entry->source), + show_poison_flags(__entry->flags), + __entry->overflow_t + ) +); + #endif /* _CXL_EVENTS_H */ #define TRACE_INCLUDE_FILE trace