Message ID | 17ee0f309e4287510e4e68f2cbcfc9d111a6e69d.1665606782.git.alison.schofield@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | CXL Poison List Retrieval & Tracing | expand |
On Wed, 12 Oct 2022 14:28:17 -0700 alison.schofield@intel.com wrote: > +TRACE_EVENT(cxl_poison, > + > + TP_PROTO(pid_t pid, const char *region, const char *memdev, > + const char *pcidev, u64 hpa, u64 dpa, u32 length, > + u8 source, u8 flags, u64 overflow_t), > + > + TP_ARGS(pid, region, memdev, pcidev, hpa, dpa, > + length, source, flags, overflow_t), > + > + TP_STRUCT__entry( > + __field(pid_t, pid) > + __string(region, region ? region : "") > + __string(memdev, memdev) > + __string(pcidev, pcidev) > + __field(u64, hpa) > + __field(u64, dpa) > + __field(u32, length) > + __field(u8, source) > + __field(u8, flags) > + __field(u64, overflow_t) The above looks nice and compact. > + ), > + > + TP_fast_assign( > + __entry->pid = pid; > + __assign_str(region, region ? region : ""); > + __assign_str(memdev, memdev); > + __assign_str(pcidev, pcidev); > + __entry->hpa = hpa; > + __entry->dpa = dpa; > + __entry->length = length; > + __entry->source = source; > + __entry->flags = flags; > + __entry->overflow_t = overflow_t; But I wonder if it would be better to move the computation of patch 2 here? That is, this: > + for (i = 0; i < le16_to_cpu(po->count); i++) { > + u32 len = le32_to_cpu(po->record[i].length) * > + CXL_POISON_LEN_MULT; > + u64 addr = le64_to_cpu(po->record[i].address); > + u8 source = addr & CXL_POISON_SOURCE_MASK; > + u64 dpa = addr & CXL_POISON_START_MASK; > + u64 hpa = 0; > + > + trace_cxl_poison(current->pid, region_name, > + dev_name(&cxlmd->dev), > + dev_name(cxlds->dev), hpa, dpa, len, > + source, po->flags, overflow_t); > + } As: // The trace_*_enabled() is a static branch which is true when the // tracepoint is enabled, and a nop when not (skipping the for block) for (i = 0; trace_cxl_poison_enabled() && i < le16_to_cpu(po->count); i++) trace_cxl_poison(region_name, cxlmd, cxlds, &po->record[i]); And then have: TP_fast_assign( u32 len = le32_to_cpu(record->length) * CXL_POISON_LEN_MULT; u64 addr = le64_to_cpu(record->address); u8 source = addr & CXL_POISON_SOURCE_MASK; u64 dpa = addr & CXL_POISON_START_MASK; u64 hpa = 0; __entry->pid = current->pid; { the above isn't needed as the trace event will have common_pid = current->pid } __assign_str(region, region ? region : ""); __assign_str(memdev, dev_name(&cxlmd->dev)); __assign_str(pcidev, dev_name(cxlds->dev)); __entry->hpa = hpa; __entry->dpa = dpa; __entry->length = length; __entry->source = source; __entry->flags = flags; __entry->overflow_t = overflow_t; Or something similar. This will keep the work out of the code path. -- Steve > + ), > + > + TP_printk("pid:%d region:%s memdev:%s pcidev:%s hpa:0x%llx dpa:0x%llx length:0x%x source:%s flags:%s overflow_time:%llu", > + __entry->pid, > + __get_str(region), > + __get_str(memdev), > + __get_str(pcidev), > + __entry->hpa, > + __entry->dpa, > + __entry->length, > + show_poison_source(__entry->source), > + show_poison_flags(__entry->flags), > + __entry->overflow_t) > +); > +#endif /* _CXL_TRACE_H */
On Wed, Oct 12, 2022 at 05:46:20PM -0400, Steven Rostedt wrote: > On Wed, 12 Oct 2022 14:28:17 -0700 > alison.schofield@intel.com wrote: > > > +TRACE_EVENT(cxl_poison, > > + > > + TP_PROTO(pid_t pid, const char *region, const char *memdev, > > + const char *pcidev, u64 hpa, u64 dpa, u32 length, > > + u8 source, u8 flags, u64 overflow_t), > > + > > + TP_ARGS(pid, region, memdev, pcidev, hpa, dpa, > > + length, source, flags, overflow_t), > > + > > + TP_STRUCT__entry( > > + __field(pid_t, pid) > > + __string(region, region ? region : "") > > + __string(memdev, memdev) > > + __string(pcidev, pcidev) > > + __field(u64, hpa) > > + __field(u64, dpa) > > + __field(u32, length) > > + __field(u8, source) > > + __field(u8, flags) > > + __field(u64, overflow_t) > > The above looks nice and compact. > > > + ), > > + > > + TP_fast_assign( > > + __entry->pid = pid; > > + __assign_str(region, region ? region : ""); > > + __assign_str(memdev, memdev); > > + __assign_str(pcidev, pcidev); > > + __entry->hpa = hpa; > > + __entry->dpa = dpa; > > + __entry->length = length; > > + __entry->source = source; > > + __entry->flags = flags; > > + __entry->overflow_t = overflow_t; > > But I wonder if it would be better to move the computation of patch 2 > here? > > That is, this: > > > + for (i = 0; i < le16_to_cpu(po->count); i++) { > > + u32 len = le32_to_cpu(po->record[i].length) * > > + CXL_POISON_LEN_MULT; > > + u64 addr = le64_to_cpu(po->record[i].address); > > + u8 source = addr & CXL_POISON_SOURCE_MASK; > > + u64 dpa = addr & CXL_POISON_START_MASK; > > + u64 hpa = 0; > > + > > + trace_cxl_poison(current->pid, region_name, > > + dev_name(&cxlmd->dev), > > + dev_name(cxlds->dev), hpa, dpa, len, > > + source, po->flags, overflow_t); > > + } > > As: > > // The trace_*_enabled() is a static branch which is true when the > // tracepoint is enabled, and a nop when not (skipping the for block) Thanks for pointing it out Steve. I will take advantage of that in next version. In this case, we'd expect, but not enforce, that the tracepoint would be enabled, because otherwise we're just reading the data from the device and throwing it away. I appreciate your help here! Alison > > for (i = 0; trace_cxl_poison_enabled() && i < le16_to_cpu(po->count); i++) > trace_cxl_poison(region_name, cxlmd, cxlds, &po->record[i]); > > And then have: > > TP_fast_assign( > u32 len = le32_to_cpu(record->length) * > CXL_POISON_LEN_MULT; > u64 addr = le64_to_cpu(record->address); > u8 source = addr & CXL_POISON_SOURCE_MASK; > u64 dpa = addr & CXL_POISON_START_MASK; > u64 hpa = 0; > > __entry->pid = current->pid; > > { the above isn't needed as the trace event will have common_pid = current->pid } got it, thanks. > > __assign_str(region, region ? region : ""); > __assign_str(memdev, dev_name(&cxlmd->dev)); > __assign_str(pcidev, dev_name(cxlds->dev)); > __entry->hpa = hpa; > __entry->dpa = dpa; > __entry->length = length; > __entry->source = source; > __entry->flags = flags; > __entry->overflow_t = overflow_t; > > > Or something similar. This will keep the work out of the code path. > > -- Steve > > > > + ), > > + > > + TP_printk("pid:%d region:%s memdev:%s pcidev:%s hpa:0x%llx dpa:0x%llx length:0x%x source:%s flags:%s overflow_time:%llu", > > + __entry->pid, > > + __get_str(region), > > + __get_str(memdev), > > + __get_str(pcidev), > > + __entry->hpa, > > + __entry->dpa, > > + __entry->length, > > + show_poison_source(__entry->source), > > + show_poison_flags(__entry->flags), > > + __entry->overflow_t) > > +); > > +#endif /* _CXL_TRACE_H */
diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h new file mode 100644 index 000000000000..9613b0f18011 --- /dev/null +++ b/include/trace/events/cxl.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cxl + +#if !defined(_CXL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _CXL_TRACE_H + +#include <linux/tracepoint.h> + +/* CXL 8.2.9.5.4.1 Get Poison List: Poison Source */ +#define CXL_POISON_SOURCE_UNKNOWN 0 +#define CXL_POISON_SOURCE_EXTERNAL 1 +#define CXL_POISON_SOURCE_INTERNAL 2 +#define CXL_POISON_SOURCE_INJECTED 3 +#define CXL_POISON_SOURCE_VENDOR 7 + +#define show_poison_source(source) \ + __print_symbolic(source, \ + { CXL_POISON_SOURCE_UNKNOWN, "Unknown" }, \ + { CXL_POISON_SOURCE_EXTERNAL, "External" }, \ + { CXL_POISON_SOURCE_INTERNAL, "Internal" }, \ + { CXL_POISON_SOURCE_INJECTED, "Injected" }, \ + { CXL_POISON_SOURCE_VENDOR, "Vendor" }) + +/* CXL 8.2.9.5.4.1 Get Poison List: Payload out flags */ +#define CXL_POISON_FLAG_MORE BIT(0) +#define CXL_POISON_FLAG_OVERFLOW BIT(1) +#define CXL_POISON_FLAG_SCANNING BIT(2) + +#define show_poison_flags(flags) \ + __print_flags(flags, "|", \ + { CXL_POISON_FLAG_MORE, "More" }, \ + { CXL_POISON_FLAG_OVERFLOW, "Overflow" }, \ + { CXL_POISON_FLAG_SCANNING, "Scanning" }) + +TRACE_EVENT(cxl_poison, + + TP_PROTO(pid_t pid, const char *region, const char *memdev, + const char *pcidev, u64 hpa, u64 dpa, u32 length, + u8 source, u8 flags, u64 overflow_t), + + TP_ARGS(pid, region, memdev, pcidev, hpa, dpa, + length, source, flags, overflow_t), + + TP_STRUCT__entry( + __field(pid_t, pid) + __string(region, region ? region : "") + __string(memdev, memdev) + __string(pcidev, pcidev) + __field(u64, hpa) + __field(u64, dpa) + __field(u32, length) + __field(u8, source) + __field(u8, flags) + __field(u64, overflow_t) + ), + + TP_fast_assign( + __entry->pid = pid; + __assign_str(region, region ? region : ""); + __assign_str(memdev, memdev); + __assign_str(pcidev, pcidev); + __entry->hpa = hpa; + __entry->dpa = dpa; + __entry->length = length; + __entry->source = source; + __entry->flags = flags; + __entry->overflow_t = overflow_t; + ), + + TP_printk("pid:%d region:%s memdev:%s pcidev:%s hpa:0x%llx dpa:0x%llx length:0x%x source:%s flags:%s overflow_time:%llu", + __entry->pid, + __get_str(region), + __get_str(memdev), + __get_str(pcidev), + __entry->hpa, + __entry->dpa, + __entry->length, + show_poison_source(__entry->source), + show_poison_flags(__entry->flags), + __entry->overflow_t) +); +#endif /* _CXL_TRACE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cxl +#include <trace/define_trace.h>