diff mbox series

[2/2] cxl: Move opcode reporting from dev_dbg() to traceevent

Message ID 169480883568.2690926.2900058137618374817.stgit@djiang5-mobl3
State New, archived
Headers show
Series [1/2] cxl: Move command enumeration from dev_dbg() to traceevent | expand

Commit Message

Dave Jiang Sept. 15, 2023, 8:13 p.m. UTC
Alison has reported that against certain hardware devices the opcode
discovery dev_dbg() can emit several hundred "unsupported by driver"
messages while parsing the CEL. Move the emission to traceevent to reduce
dmesg spamming and let software parse the output if there are interested
parties.

Reported-by: Alison Schofield <alison.schofield@intel.com>
Suggested-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/cxl/core/mbox.c  |    7 +++----
 drivers/cxl/core/trace.h |   30 ++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 4 deletions(-)

Comments

Alison Schofield Sept. 18, 2023, 4:51 p.m. UTC | #1
On Fri, Sep 15, 2023 at 01:13:55PM -0700, Dave Jiang wrote:
> Alison has reported that against certain hardware devices the opcode
> discovery dev_dbg() can emit several hundred "unsupported by driver"
> messages while parsing the CEL. Move the emission to traceevent to reduce
> dmesg spamming and let software parse the output if there are interested
> parties.

Thanks for reducing the spew Dave.

Considering that tracing may or may not be 'on' can we just not
spew anything at this point - no dev_dbg(), no trace. Let the user
ask for the opcode list at their leisure, at which time we'd dump
it to trace log.

Is there a mechanism in place for user to ask for logs?
(cxl list -m mem1 'show me my opcodes')
Barring that, I'm assuming users can do a pass thru of this cmd
and get whatever they want.

Counterpoint - is there a subset of opcodes that we'd really like
to dev_dbg() about at this point in time?  ie missing opcodes that
are going to make the device useless.

Alison

> 
> Reported-by: Alison Schofield <alison.schofield@intel.com>
> Suggested-by: Alison Schofield <alison.schofield@intel.com>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/cxl/core/mbox.c  |    7 +++----
>  drivers/cxl/core/trace.h |   30 ++++++++++++++++++++++++++++++
>  2 files changed, 33 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index ab6b6c4d7a48..59089b540add 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -707,7 +707,7 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
>  {
>  	struct cxl_cel_entry *cel_entry;
>  	const int cel_entries = size / sizeof(*cel_entry);
> -	struct device *dev = mds->cxlds.dev;
> +	struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
>  	int i;
>  
>  	cel_entry = (struct cxl_cel_entry *) cel;
> @@ -718,8 +718,7 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
>  
>  		if (!cmd && (!cxl_is_poison_command(opcode) ||
>  			     !cxl_is_security_command(opcode))) {
> -			dev_dbg(dev,
> -				"Opcode 0x%04x unsupported by driver\n", opcode);
> +			trace_cxl_opcode(cxlmd, opcode, false);
>  			continue;
>  		}
>  
> @@ -732,7 +731,7 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
>  		if (cxl_is_security_command(opcode))
>  			cxl_set_security_cmd_enabled(&mds->security, opcode);
>  
> -		dev_dbg(dev, "Opcode 0x%04x enabled\n", opcode);
> +		trace_cxl_opcode(cxlmd, opcode, true);
>  	}
>  }
>  
> diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
> index 817c5377eca2..c48e4c836d77 100644
> --- a/drivers/cxl/core/trace.h
> +++ b/drivers/cxl/core/trace.h
> @@ -734,6 +734,36 @@ TRACE_EVENT(cxl_log_type,
>  	)
>  );
>  
> +TRACE_EVENT(cxl_opcode,
> +
> +	TP_PROTO(const struct cxl_memdev *cxlmd, u16 opcode, bool enabled),
> +
> +	TP_ARGS(cxlmd, opcode, enabled),
> +
> +	TP_STRUCT__entry(
> +		__string(memdev, dev_name(&cxlmd->dev))
> +		__string(host, dev_name(cxlmd->dev.parent))
> +		__field(u64, serial)
> +		__field(u16, opcode)
> +		__field(bool, enabled)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(memdev, dev_name(&cxlmd->dev));
> +		__assign_str(host, dev_name(cxlmd->dev.parent));
> +		__entry->serial = cxlmd->cxlds->serial;
> +		__entry->opcode = opcode;
> +		__entry->enabled = enabled;
> +	),
> +
> +	TP_printk("memdev=%s host=%s serial=%lld opcode=%d state=%s",
> +		  __get_str(memdev),
> +		  __get_str(host),
> +		  __entry->serial,
> +		  __entry->opcode,
> +		  __entry->enabled ? "enabled" : "unsupported"
> +	)
> +);
>  #endif /* _CXL_EVENTS_H */
>  
>  #define TRACE_INCLUDE_FILE trace
> 
>
Ira Weiny Sept. 18, 2023, 9:25 p.m. UTC | #2
Alison Schofield wrote:
> On Fri, Sep 15, 2023 at 01:13:55PM -0700, Dave Jiang wrote:
> > Alison has reported that against certain hardware devices the opcode
> > discovery dev_dbg() can emit several hundred "unsupported by driver"
> > messages while parsing the CEL. Move the emission to traceevent to reduce
> > dmesg spamming and let software parse the output if there are interested
> > parties.
> 
> Thanks for reducing the spew Dave.
> 
> Considering that tracing may or may not be 'on' can we just not
> spew anything at this point - no dev_dbg(), no trace. Let the user
> ask for the opcode list at their leisure, at which time we'd dump
> it to trace log.
> 
> Is there a mechanism in place for user to ask for logs?
> (cxl list -m mem1 'show me my opcodes')
> Barring that, I'm assuming users can do a pass thru of this cmd
> and get whatever they want.
> 
> Counterpoint - is there a subset of opcodes that we'd really like
> to dev_dbg() about at this point in time?  ie missing opcodes that
> are going to make the device useless.

IMO any command which fails and is fatal to the device coming up should
trigger a dev_err() somewhere.

I don't think that is a guarantee currently though.  Even something which
we expect but does not fail the device is IMO a candidate for dev_err()
because it is unexpected behavior.

Does anyone oppose removing the dev_dbg()'s in this series?  Is anyone
looking at the list of opcodes this way?

Ira
diff mbox series

Patch

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index ab6b6c4d7a48..59089b540add 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -707,7 +707,7 @@  static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
 {
 	struct cxl_cel_entry *cel_entry;
 	const int cel_entries = size / sizeof(*cel_entry);
-	struct device *dev = mds->cxlds.dev;
+	struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
 	int i;
 
 	cel_entry = (struct cxl_cel_entry *) cel;
@@ -718,8 +718,7 @@  static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
 
 		if (!cmd && (!cxl_is_poison_command(opcode) ||
 			     !cxl_is_security_command(opcode))) {
-			dev_dbg(dev,
-				"Opcode 0x%04x unsupported by driver\n", opcode);
+			trace_cxl_opcode(cxlmd, opcode, false);
 			continue;
 		}
 
@@ -732,7 +731,7 @@  static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
 		if (cxl_is_security_command(opcode))
 			cxl_set_security_cmd_enabled(&mds->security, opcode);
 
-		dev_dbg(dev, "Opcode 0x%04x enabled\n", opcode);
+		trace_cxl_opcode(cxlmd, opcode, true);
 	}
 }
 
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index 817c5377eca2..c48e4c836d77 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -734,6 +734,36 @@  TRACE_EVENT(cxl_log_type,
 	)
 );
 
+TRACE_EVENT(cxl_opcode,
+
+	TP_PROTO(const struct cxl_memdev *cxlmd, u16 opcode, bool enabled),
+
+	TP_ARGS(cxlmd, opcode, enabled),
+
+	TP_STRUCT__entry(
+		__string(memdev, dev_name(&cxlmd->dev))
+		__string(host, dev_name(cxlmd->dev.parent))
+		__field(u64, serial)
+		__field(u16, opcode)
+		__field(bool, enabled)
+	),
+
+	TP_fast_assign(
+		__assign_str(memdev, dev_name(&cxlmd->dev));
+		__assign_str(host, dev_name(cxlmd->dev.parent));
+		__entry->serial = cxlmd->cxlds->serial;
+		__entry->opcode = opcode;
+		__entry->enabled = enabled;
+	),
+
+	TP_printk("memdev=%s host=%s serial=%lld opcode=%d state=%s",
+		  __get_str(memdev),
+		  __get_str(host),
+		  __entry->serial,
+		  __entry->opcode,
+		  __entry->enabled ? "enabled" : "unsupported"
+	)
+);
 #endif /* _CXL_EVENTS_H */
 
 #define TRACE_INCLUDE_FILE trace