diff mbox series

[2/6] qla2xxx: Add a generic tracing framework

Message ID 20220715060227.23923-3-njavali@marvell.com (mailing list archive)
State Changes Requested
Headers show
Series qla2xxx driver features | expand

Commit Message

Nilesh Javali July 15, 2022, 6:02 a.m. UTC
From: Arun Easi <aeasi@marvell.com>

Adding a message based tracing mechanism where
a rotating number of messages are captured in
a trace structure. Disable/enable/resize
operations are allowed via debugfs interfaces.

Signed-off-by: Arun Easi <aeasi@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
---
 drivers/scsi/qla2xxx/qla_dbg.c |  12 +++
 drivers/scsi/qla2xxx/qla_dbg.h | 140 +++++++++++++++++++++++++++++++++
 drivers/scsi/qla2xxx/qla_def.h |  31 ++++++++
 drivers/scsi/qla2xxx/qla_dfs.c | 102 ++++++++++++++++++++++++
 drivers/scsi/qla2xxx/qla_os.c  |   5 ++
 5 files changed, 290 insertions(+)

Comments

Daniel Wagner July 18, 2022, 8:54 a.m. UTC | #1
Hi

[adding Steven Rostedt]

On Thu, Jul 14, 2022 at 11:02:23PM -0700, Nilesh Javali wrote:
> From: Arun Easi <aeasi@marvell.com>
> 
> Adding a message based tracing mechanism where
> a rotating number of messages are captured in
> a trace structure. Disable/enable/resize
> operations are allowed via debugfs interfaces.

I really wonder why you need to this kind of infrastructure to your
driver? As far I know the qla2xxx is already able to log into
ftrace. Why can't we just use this infrastructure?

Thanks,
Daniel

> 
> Signed-off-by: Arun Easi <aeasi@marvell.com>
> Signed-off-by: Nilesh Javali <njavali@marvell.com>
> ---
>  drivers/scsi/qla2xxx/qla_dbg.c |  12 +++
>  drivers/scsi/qla2xxx/qla_dbg.h | 140 +++++++++++++++++++++++++++++++++
>  drivers/scsi/qla2xxx/qla_def.h |  31 ++++++++
>  drivers/scsi/qla2xxx/qla_dfs.c | 102 ++++++++++++++++++++++++
>  drivers/scsi/qla2xxx/qla_os.c  |   5 ++
>  5 files changed, 290 insertions(+)
> 
> diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
> index 7cf1f78cbaee..c4ba8ac51d27 100644
> --- a/drivers/scsi/qla2xxx/qla_dbg.c
> +++ b/drivers/scsi/qla2xxx/qla_dbg.c
> @@ -2777,3 +2777,15 @@ ql_dbg_qp(uint32_t level, struct qla_qpair *qpair, int32_t id,
>  	va_end(va);
>  
>  }
> +
> +#ifdef QLA_TRACING
> +void qla_tracing_init(void)
> +{
> +	if (is_kdump_kernel())
> +		return;
> +}
> +
> +void qla_tracing_exit(void)
> +{
> +}
> +#endif /* QLA_TRACING */
> diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
> index feeb1666227f..e0d91a1f81c0 100644
> --- a/drivers/scsi/qla2xxx/qla_dbg.h
> +++ b/drivers/scsi/qla2xxx/qla_dbg.h
> @@ -5,6 +5,7 @@
>   */
>  
>  #include "qla_def.h"
> +#include <linux/delay.h>
>  
>  /*
>   * Firmware Dump structure definition
> @@ -321,6 +322,145 @@ struct qla2xxx_fw_dump {
>  
>  extern uint ql_errlev;
>  
> +#ifdef QLA_TRACING
> +#include <linux/crash_dump.h>
> +
> +extern void qla_tracing_init(void);
> +extern void qla_tracing_exit(void);
> +
> +static inline int
> +ql_mask_match_ext(uint level, int *log_tunable)
> +{
> +	if (*log_tunable == 1)
> +		*log_tunable = QL_DBG_DEFAULT1_MASK;
> +
> +	return (level & *log_tunable) == level;
> +}
> +
> +static inline int
> +__qla_trace_get(struct qla_trace *trc)
> +{
> +	if (test_bit(QLA_TRACE_QUIESCE, &trc->flags))
> +		return -EIO;
> +	atomic_inc(&trc->ref_count);
> +	return 0;
> +}
> +
> +static inline int
> +qla_trace_get(struct qla_trace *trc)
> +{
> +	unsigned long flags;
> +	int ret;
> +
> +	spin_lock_irqsave(&trc->trc_lock, flags);
> +	ret = __qla_trace_get(trc);
> +	spin_unlock_irqrestore(&trc->trc_lock, flags);
> +
> +	return ret;
> +}
> +
> +static inline void
> +qla_trace_put(struct qla_trace *trc)
> +{
> +	wmb();
> +	atomic_dec(&trc->ref_count);
> +}
> +
> +static inline char *
> +qla_get_trace_next(struct qla_trace *trc)
> +{
> +	uint32_t t_ind;
> +	char *buf;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&trc->trc_lock, flags);
> +	if (!test_bit(QLA_TRACE_ENABLED, &trc->flags) ||
> +	    __qla_trace_get(trc)) {
> +		spin_unlock_irqrestore(&trc->trc_lock, flags);
> +		return NULL;
> +	}
> +	t_ind = trc->trace_ind = qla_trace_ind_norm(trc, trc->trace_ind + 1);
> +	spin_unlock_irqrestore(&trc->trc_lock, flags);
> +
> +	if (!t_ind)
> +		set_bit(QLA_TRACE_WRAPPED, &trc->flags);
> +
> +	buf = qla_trace_record(trc, t_ind);
> +	/* Put an end marker '>' for the next record. */
> +	qla_trace_record(trc, qla_trace_ind_norm(trc, t_ind + 1))[0] = '>';
> +
> +	return buf;
> +}
> +
> +static inline int
> +qla_trace_quiesce(struct qla_trace *trc)
> +{
> +	unsigned long flags;
> +	u32 cnt = 0;
> +	int ret = 0;
> +
> +	set_bit(QLA_TRACE_QUIESCE, &trc->flags);
> +
> +	spin_lock_irqsave(&trc->trc_lock, flags);
> +	while (atomic_read(&trc->ref_count)) {
> +		spin_unlock_irqrestore(&trc->trc_lock, flags);
> +
> +		msleep(1);
> +
> +		spin_lock_irqsave(&trc->trc_lock, flags);
> +		cnt++;
> +		if (cnt > 10 * 1000) {
> +			pr_info("qla2xxx: Trace could not be quiesced now (count=%d).",
> +				atomic_read(&trc->ref_count));
> +			/* Leave trace enabled */
> +			clear_bit(QLA_TRACE_QUIESCE, &trc->flags);
> +			ret = -EIO;
> +			break;
> +		}
> +	}
> +	spin_unlock_irqrestore(&trc->trc_lock, flags);
> +	return ret;
> +}
> +
> +static inline void
> +qla_trace_init(struct qla_trace *trc, char *name, u32 num_entries)
> +{
> +	if (trc->recs)
> +		return;
> +
> +	memset(trc, 0, sizeof(*trc));
> +
> +	trc->name = name;
> +	spin_lock_init(&trc->trc_lock);
> +	if (!num_entries)
> +		return;
> +	trc->num_entries = num_entries;
> +	trc->recs = vzalloc(trc->num_entries *
> +				sizeof(struct qla_trace_rec));
> +	if (!trc->recs)
> +		return;
> +
> +	set_bit(QLA_TRACE_ENABLED, &trc->flags);
> +}
> +
> +static inline void
> +qla_trace_uninit(struct qla_trace *trc)
> +{
> +	if (!trc->recs)
> +		return;
> +
> +	vfree(trc->recs);
> +	trc->recs = NULL;
> +	clear_bit(QLA_TRACE_ENABLED, &trc->flags);
> +}
> +
> +#else /* QLA_TRACING */
> +#define qla_trace_init(trc, name, num)
> +#define qla_trace_uninit(trc)
> +#define qla_tracing_init()
> +#define qla_tracing_exit()
> +#endif /* QLA_TRACING */
> +
>  void __attribute__((format (printf, 4, 5)))
>  ql_dbg(uint, scsi_qla_host_t *vha, uint, const char *fmt, ...);
>  void __attribute__((format (printf, 4, 5)))
> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
> index 22274b405d01..39322105e7be 100644
> --- a/drivers/scsi/qla2xxx/qla_def.h
> +++ b/drivers/scsi/qla2xxx/qla_def.h
> @@ -35,6 +35,37 @@
>  
>  #include <uapi/scsi/fc/fc_els.h>
>  
> +#define QLA_TRACING /* Captures driver messages to buffer */
> +
> +#ifdef QLA_TRACING
> +#define QLA_TRACE_LINE_SIZE		256	/* Biggest so far is ~215 */
> +#define qla_trace_ind_norm(_trc, _ind)	((_ind) >= (_trc)->num_entries ? \
> +							0 : (_ind))
> +#define qla_trace_record(_trc, __ind)	((_trc)->recs[__ind].buf)
> +#define qla_trace_record_len		(sizeof(struct qla_trace_rec))
> +#define qla_trace_start(_trc)		qla_trace_record(_trc, 0)
> +#define qla_trace_len(_trc)		((_trc)->num_entries)
> +#define qla_trace_size(_trc)		(qla_trace_record_len * \
> +						(_trc)->num_entries)
> +#define qla_trace_cur_ind(_trc)		((_trc)->trace_ind)
> +struct qla_trace_rec {
> +	char buf[QLA_TRACE_LINE_SIZE];
> +};
> +
> +struct qla_trace {
> +#define QLA_TRACE_ENABLED	0 /* allow trace writes or not */
> +#define QLA_TRACE_WRAPPED	1
> +#define QLA_TRACE_QUIESCE	2
> +	unsigned long	flags;
> +	atomic_t	ref_count;
> +	u32		num_entries;
> +	u32		trace_ind;
> +	spinlock_t	trc_lock;
> +	char		*name;
> +	struct qla_trace_rec *recs;
> +};
> +#endif /* QLA_TRACING */
> +
>  #define QLA_DFS_DEFINE_DENTRY(_debugfs_file_name) \
>  	struct dentry *dfs_##_debugfs_file_name
>  #define QLA_DFS_ROOT_DEFINE_DENTRY(_debugfs_file_name) \
> diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
> index c3c8b9536ef6..98c6390ad1f1 100644
> --- a/drivers/scsi/qla2xxx/qla_dfs.c
> +++ b/drivers/scsi/qla2xxx/qla_dfs.c
> @@ -489,6 +489,108 @@ qla_dfs_naqp_show(struct seq_file *s, void *unused)
>  	return 0;
>  }
>  
> +#ifdef QLA_TRACING
> +static char *trace_help = "\
> +# Format:\n\
> +#   <msec> <cpu#> <message>\n\
> +#\n\
> +# Trace control by writing:\n\
> +#   'enable' - to enable this trace\n\
> +#   'disable' - to disable this trace\n\
> +#   'resize=<nlines>' - to resize this trace to <nlines>\n\
> +#\n";
> +
> +static int
> +qla_dfs_trace_show(struct seq_file *s, void *unused)
> +{
> +	struct qla_trace *trc = s->private;
> +	char *buf;
> +	u32 t_ind = 0, i;
> +
> +	seq_puts(s, trace_help);
> +
> +	if (qla_trace_get(trc))
> +		return 0;
> +
> +	seq_printf(s, "# Trace max lines = %d, writes = %s\n#\n",
> +		   trc->num_entries, test_bit(QLA_TRACE_ENABLED,
> +		   &trc->flags) ? "enabled" : "disabled");
> +
> +	if (test_bit(QLA_TRACE_WRAPPED, &trc->flags))
> +		t_ind = qla_trace_cur_ind(trc) + 1;
> +
> +	for (i = 0; i < qla_trace_len(trc); i++, t_ind++) {
> +		t_ind = qla_trace_ind_norm(trc, t_ind);
> +		buf = qla_trace_record(trc, t_ind);
> +		if (!buf[0])
> +			continue;
> +		seq_puts(s, buf);
> +	}
> +
> +	mb();
> +	qla_trace_put(trc);
> +	return 0;
> +}
> +
> +#define string_is(_buf, _str_val) \
> +	(strncmp(_str_val, _buf, strlen(_str_val)) == 0)
> +
> +static ssize_t
> +qla_dfs_trace_write(struct file *file, const char __user *buffer,
> +		    size_t count, loff_t *pos)
> +{
> +	struct seq_file *s = file->private_data;
> +	struct qla_trace *trc = s->private;
> +	char buf[32];
> +	ssize_t ret = count;
> +
> +	memset(buf, 0, sizeof(buf));
> +	if (copy_from_user(buf, buffer, min(sizeof(buf), count)))
> +		return -EFAULT;
> +
> +	if (string_is(buf, "enable")) {
> +		if (!trc->recs) {
> +			pr_warn("qla2xxx: '%s' is empty, resize before enabling.\n",
> +					trc->name);
> +			return -EINVAL;
> +		}
> +		pr_info("qla2xxx: Enabling trace '%s'\n", trc->name);
> +		set_bit(QLA_TRACE_ENABLED, &trc->flags);
> +	} else if (string_is(buf, "disable")) {
> +		pr_info("qla2xxx: Disabling trace '%s'\n", trc->name);
> +		clear_bit(QLA_TRACE_ENABLED, &trc->flags);
> +	} else if (string_is(buf, "resize")) {
> +		u32 new_len;
> +
> +		if (sscanf(buf, "resize=%d", &new_len) != 1)
> +			return -EINVAL;
> +		if (new_len == trc->num_entries) {
> +			pr_info("qla2xxx: New trace size is same as old.\n");
> +			return count;
> +		}
> +		pr_info("qla2xxx: Changing trace '%s' size to %d\n",
> +			trc->name, new_len);
> +		if (qla_trace_quiesce(trc)) {
> +			ret = -EBUSY;
> +			goto done;
> +		}
> +		qla_trace_uninit(trc);
> +		/*
> +		 * Go through init once again to start creating traces
> +		 * based on the respective tunable.
> +		 */
> +		qla_trace_init(trc, trc->name, new_len);
> +		if (!trc->recs) {
> +			pr_warn("qla2xxx: Trace allocation failed for '%s'\n",
> +				trc->name);
> +			ret = -ENOMEM;
> +		}
> +	}
> +done:
> +	return ret;
> +}
> +#endif /* QLA_TRACING */
> +
>  /*
>   * Helper macros for setting up debugfs entries.
>   * _name: The name of the debugfs entry
> diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
> index 0bd0fd1042df..0d2397069cac 100644
> --- a/drivers/scsi/qla2xxx/qla_os.c
> +++ b/drivers/scsi/qla2xxx/qla_os.c
> @@ -8191,6 +8191,8 @@ qla2x00_module_init(void)
>  	BUILD_BUG_ON(sizeof(sw_info_t) != 32);
>  	BUILD_BUG_ON(sizeof(target_id_t) != 2);
>  
> +	qla_tracing_init();
> +
>  	/* Allocate cache for SRBs. */
>  	srb_cachep = kmem_cache_create("qla2xxx_srbs", sizeof(srb_t), 0,
>  	    SLAB_HWCACHE_ALIGN, NULL);
> @@ -8269,6 +8271,7 @@ qla2x00_module_init(void)
>  
>  destroy_cache:
>  	kmem_cache_destroy(srb_cachep);
> +	qla_tracing_exit();
>  	return ret;
>  }
>  
> @@ -8287,6 +8290,8 @@ qla2x00_module_exit(void)
>  	fc_release_transport(qla2xxx_transport_template);
>  	qlt_exit();
>  	kmem_cache_destroy(srb_cachep);
> +
> +	qla_tracing_exit();
>  }
>  
>  module_init(qla2x00_module_init);
> -- 
> 2.19.0.rc0
>
Arun Easi July 18, 2022, 7:02 p.m. UTC | #2
On Mon, 18 Jul 2022, 1:54am, Daniel Wagner wrote:

> Hi
> 
> [adding Steven Rostedt]
> 
> On Thu, Jul 14, 2022 at 11:02:23PM -0700, Nilesh Javali wrote:
> > From: Arun Easi <aeasi@marvell.com>
> > 
> > Adding a message based tracing mechanism where
> > a rotating number of messages are captured in
> > a trace structure. Disable/enable/resize
> > operations are allowed via debugfs interfaces.
> 
> I really wonder why you need to this kind of infrastructure to your
> driver? As far I know the qla2xxx is already able to log into
> ftrace. Why can't we just use this infrastructure?
> 

Many times when a problem was reported on the driver, we had to request 
for a repro with extended error logging (via driver module parameter) 
turned on. With this internal tracing in place, log messages that appear 
only with extended error logging are captured by default in the internal 
trace buffer.

AFAIK, kernel tracing requires a user initiated action to be turned on, 
like enabling individual tracepoints. Though a script (either startup or 
udev) can do this job, enabling tracepoints by default for a single 
driver, I think, may not be a preferred choice -- particularly when the 
trace buffer is shared across the kernel. That also brings the extra 
overhead of finding how this could be done across various distros.

For cases where the memory/driver size matters, there is an option to 
compile out this feature, plus choosing a lower default trace buffer size.

Regards,
-Arun
Steven Rostedt July 18, 2022, 7:22 p.m. UTC | #3
On Mon, 18 Jul 2022 12:02:26 -0700
Arun Easi <aeasi@marvell.com> wrote:

> Many times when a problem was reported on the driver, we had to request 
> for a repro with extended error logging (via driver module parameter) 
> turned on. With this internal tracing in place, log messages that appear 
> only with extended error logging are captured by default in the internal 
> trace buffer.
> 
> AFAIK, kernel tracing requires a user initiated action to be turned on, 
> like enabling individual tracepoints. Though a script (either startup or 
> udev) can do this job, enabling tracepoints by default for a single 
> driver, I think, may not be a preferred choice -- particularly when the 
> trace buffer is shared across the kernel. That also brings the extra 
> overhead of finding how this could be done across various distros.
> 
> For cases where the memory/driver size matters, there is an option to 
> compile out this feature, plus choosing a lower default trace buffer size.

You can enable an ftrace instance from inside the kernel, and make it a
compile time option.

	#include <linux/trace_events.h>
	#include <linux/trace.h>

	struct trace_array *tr;

	tr = trace_array_get_by_name("qla2xxx");
	trace_array_set_clr_event(tr, "qla", NULL, true);

And now you have trace events running:

 # cat /sys/kernel/tracing/instances/qla/trace

-- Steve
Daniel Wagner July 19, 2022, 8:25 a.m. UTC | #4
Hi Arun,

On Mon, Jul 18, 2022 at 03:22:43PM -0400, Steven Rostedt wrote:
> On Mon, 18 Jul 2022 12:02:26 -0700
> Arun Easi <aeasi@marvell.com> wrote:
> 
> > Many times when a problem was reported on the driver, we had to request 
> > for a repro with extended error logging (via driver module parameter) 
> > turned on. With this internal tracing in place, log messages that appear 
> > only with extended error logging are captured by default in the internal 
> > trace buffer.
> > 
> > AFAIK, kernel tracing requires a user initiated action to be turned on, 
> > like enabling individual tracepoints. Though a script (either startup or 
> > udev) can do this job, enabling tracepoints by default for a single 
> > driver, I think, may not be a preferred choice -- particularly when the 
> > trace buffer is shared across the kernel. That also brings the extra 
> > overhead of finding how this could be done across various distros.
> > 
> > For cases where the memory/driver size matters, there is an option to 
> > compile out this feature, plus choosing a lower default trace buffer size.

I am really asking the question why do we need to add special debugging
code to every single driver? Can't we try to make more use of generic
code and extend it if necessary?

Both FC drivers qla2xxx and lpfc are doing their own thing for
debugging/logging and I really fail to see why we can't not move towards
a more generic way. Dumping logs to the kernel log was the simplest way
but as this series shows, this is not something you can do in production
systems.

> You can enable an ftrace instance from inside the kernel, and make it a
> compile time option.
>
> 	#include <linux/trace_events.h>
> 	#include <linux/trace.h>
> 
> 	struct trace_array *tr;
> 
> 	tr = trace_array_get_by_name("qla2xxx");
> 	trace_array_set_clr_event(tr, "qla", NULL, true);
> 
> And now you have trace events running:
> 
>  # cat /sys/kernel/tracing/instances/qla/trace

Thanks Steve for the tip!

Daniel
Steven Rostedt July 19, 2022, 1:05 p.m. UTC | #5
On Tue, 19 Jul 2022 10:25:14 +0200
Daniel Wagner <dwagner@suse.de> wrote:

> > You can enable an ftrace instance from inside the kernel, and make it a
> > compile time option.
> >
> > 	#include <linux/trace_events.h>
> > 	#include <linux/trace.h>
> > 
> > 	struct trace_array *tr;
> > 
> > 	tr = trace_array_get_by_name("qla2xxx");
> > 	trace_array_set_clr_event(tr, "qla", NULL, true);
> > 
> > And now you have trace events running:
> > 
> >  # cat /sys/kernel/tracing/instances/qla/trace  

The above should be:

     # cat /sys/kernel/tracing/instances/qla2xxx/trace

as the instance name is the string sent to trace_array_get_by_name().

-- Steve
Arun Easi July 19, 2022, 9:06 p.m. UTC | #6
On Mon, 18 Jul 2022, 12:22pm, Steven Rostedt wrote:

> External Email
> 
> ----------------------------------------------------------------------
> On Mon, 18 Jul 2022 12:02:26 -0700
> Arun Easi <aeasi@marvell.com> wrote:
> 
> > Many times when a problem was reported on the driver, we had to request 
> > for a repro with extended error logging (via driver module parameter) 
> > turned on. With this internal tracing in place, log messages that appear 
> > only with extended error logging are captured by default in the internal 
> > trace buffer.
> > 
> > AFAIK, kernel tracing requires a user initiated action to be turned on, 
> > like enabling individual tracepoints. Though a script (either startup or 
> > udev) can do this job, enabling tracepoints by default for a single 
> > driver, I think, may not be a preferred choice -- particularly when the 
> > trace buffer is shared across the kernel. That also brings the extra 
> > overhead of finding how this could be done across various distros.
> > 
> > For cases where the memory/driver size matters, there is an option to 
> > compile out this feature, plus choosing a lower default trace buffer size.
> 
> You can enable an ftrace instance from inside the kernel, and make it a
> compile time option.
> 
> 	#include <linux/trace_events.h>
> 	#include <linux/trace.h>
> 
> 	struct trace_array *tr;
> 
> 	tr = trace_array_get_by_name("qla2xxx");
> 	trace_array_set_clr_event(tr, "qla", NULL, true);
> 
> And now you have trace events running:
> 
>  # cat /sys/kernel/tracing/instances/qla/trace
> 

Thanks Steve. I was not aware of this relatively newer interface. This 
looks promising.

I have a question on the behavior of this interface.

It appears by calling the above two interfaces, I get a separate instance 
of "qla" only traces, but with only the "qla"-only instance being enabled, 
leaving the main (/sys/kernel/tracing/events/qla/enable) one disabled. No 
issues there, but when I enable both of them, I get garbage values on one.

/sys/kernel/tracing/instances/qla2xxx/trace:
             cat-56106   [012] ..... 2419873.470098: ql_dbg_log: qla2xxx [0000:05:00.0]-1054:14:  Entered (null).
             cat-56106   [012] ..... 2419873.470101: ql_dbg_log: qla2xxx [0000:05:00.0]-1000:14:  Entered ×+<96>²Ü<98>^H.
             cat-56106   [012] ..... 2419873.470102: ql_dbg_log: qla2xxx [0000:05:00.0]-1006:14:  Prepare to issue mbox cmd=0xde589000.

/sys/kernel/tracing/trace:
             cat-56106   [012] ..... 2419873.470097: ql_dbg_log: qla2xxx [0000:05:00.0]-1054:14:  Entered qla2x00_get_firmware_state.
             cat-56106   [012] ..... 2419873.470100: ql_dbg_log: qla2xxx [0000:05:00.0]-1000:14:  Entered qla2x00_mailbox_command.
             cat-56106   [012] ..... 2419873.470102: ql_dbg_log: qla2xxx [0000:05:00.0]-1006:14:  Prepare to issue mbox cmd=0x69.

It appears that only one should be enabled at a time. Per my read of 
Documentation/trace/ftrace.rst, the main directory and instances have 
separate trace buffers, so I am a bit confused with the above output.

Regards,
-Arun
Steven Rostedt July 19, 2022, 9:40 p.m. UTC | #7
On Tue, 19 Jul 2022 14:06:11 -0700
Arun Easi <aeasi@marvell.com> wrote:

> It appears by calling the above two interfaces, I get a separate instance 
> of "qla" only traces, but with only the "qla"-only instance being enabled, 
> leaving the main (/sys/kernel/tracing/events/qla/enable) one disabled. No 
> issues there, but when I enable both of them, I get garbage values on one.
> 
> /sys/kernel/tracing/instances/qla2xxx/trace:
>              cat-56106   [012] ..... 2419873.470098: ql_dbg_log: qla2xxx [0000:05:00.0]-1054:14:  Entered (null).
>              cat-56106   [012] ..... 2419873.470101: ql_dbg_log: qla2xxx [0000:05:00.0]-1000:14:  Entered ×+<96>²Ü<98>^H.
>              cat-56106   [012] ..... 2419873.470102: ql_dbg_log: qla2xxx [0000:05:00.0]-1006:14:  Prepare to issue mbox cmd=0xde589000.
> 
> /sys/kernel/tracing/trace:
>              cat-56106   [012] ..... 2419873.470097: ql_dbg_log: qla2xxx [0000:05:00.0]-1054:14:  Entered qla2x00_get_firmware_state.
>              cat-56106   [012] ..... 2419873.470100: ql_dbg_log: qla2xxx [0000:05:00.0]-1000:14:  Entered qla2x00_mailbox_command.
>              cat-56106   [012] ..... 2419873.470102: ql_dbg_log: qla2xxx [0000:05:00.0]-1006:14:  Prepare to issue mbox cmd=0x69.
> 
> It appears that only one should be enabled at a time. Per my read of 
> Documentation/trace/ftrace.rst, the main directory and instances have 
> separate trace buffers, so I am a bit confused with the above output.

That's because it uses va_list, and va_list can only be used once.

I just added helpers for va_list use cases:

   https://lore.kernel.org/all/20220705224453.120955146@goodmis.org/

But it will likely suffer the same issue, but I can easily fix that with
this on top:

Not even compiled tested:

-- Steve

diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h
index 0f51f6b3ab70..3c554a585320 100644
--- a/include/trace/stages/stage6_event_callback.h
+++ b/include/trace/stages/stage6_event_callback.h
@@ -40,7 +40,12 @@
 
 #undef __assign_vstr
 #define __assign_vstr(dst, fmt, va)					\
-	vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, *(va))
+	do {								\
+		va_list __cp_va;					\
+		va_copy(__cp_va, *(va));				\
+		vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, __cp_va); \
+		va_end(__cp_va);					\
+	} while (0)
 
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
Arun Easi July 19, 2022, 10:09 p.m. UTC | #8
On Tue, 19 Jul 2022, 2:40pm, Steven Rostedt wrote:

> On Tue, 19 Jul 2022 14:06:11 -0700
> Arun Easi <aeasi@marvell.com> wrote:
> 
> > It appears by calling the above two interfaces, I get a separate instance 
> > of "qla" only traces, but with only the "qla"-only instance being enabled, 
> > leaving the main (/sys/kernel/tracing/events/qla/enable) one disabled. No 
> > issues there, but when I enable both of them, I get garbage values on one.
> > 
> > /sys/kernel/tracing/instances/qla2xxx/trace:
> >              cat-56106   [012] ..... 2419873.470098: ql_dbg_log: qla2xxx [0000:05:00.0]-1054:14:  Entered (null).
> >              cat-56106   [012] ..... 2419873.470101: ql_dbg_log: qla2xxx [0000:05:00.0]-1000:14:  Entered ×+<96>²Ü<98>^H.
> >              cat-56106   [012] ..... 2419873.470102: ql_dbg_log: qla2xxx [0000:05:00.0]-1006:14:  Prepare to issue mbox cmd=0xde589000.
> > 
> > /sys/kernel/tracing/trace:
> >              cat-56106   [012] ..... 2419873.470097: ql_dbg_log: qla2xxx [0000:05:00.0]-1054:14:  Entered qla2x00_get_firmware_state.
> >              cat-56106   [012] ..... 2419873.470100: ql_dbg_log: qla2xxx [0000:05:00.0]-1000:14:  Entered qla2x00_mailbox_command.
> >              cat-56106   [012] ..... 2419873.470102: ql_dbg_log: qla2xxx [0000:05:00.0]-1006:14:  Prepare to issue mbox cmd=0x69.
> > 
> > It appears that only one should be enabled at a time. Per my read of 
> > Documentation/trace/ftrace.rst, the main directory and instances have 
> > separate trace buffers, so I am a bit confused with the above output.
> 
> That's because it uses va_list, and va_list can only be used once.

Ah, that makes sense.

> 
> I just added helpers for va_list use cases:
> 
>    https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_all_20220705224453.120955146-40goodmis.org_&d=DwIFaQ&c=nKjWec2b6R0mOyPaz7xtfQ&r=P-q_Qkt75qFy33SvdD2nAxAyN87eO1d-mFO-lqNOomw&m=cDBXba_lNDGk7c1Qm4elDe1Co-RV8zR-c1A9xV7vc4nMcQO7iRle72qNxljHsDNA&s=74PzgMj6nmfRCLXd3oMIqt-_DZbQeukUl0nK7MzfUjs&e= 
> 
> But it will likely suffer the same issue, but I can easily fix that with
> this on top:
> 
> Not even compiled tested:
> 
> -- Steve
> 
> diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h
> index 0f51f6b3ab70..3c554a585320 100644
> --- a/include/trace/stages/stage6_event_callback.h
> +++ b/include/trace/stages/stage6_event_callback.h
> @@ -40,7 +40,12 @@
>  
>  #undef __assign_vstr
>  #define __assign_vstr(dst, fmt, va)					\
> -	vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, *(va))
> +	do {								\
> +		va_list __cp_va;					\
> +		va_copy(__cp_va, *(va));				\
> +		vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, __cp_va); \
> +		va_end(__cp_va);					\
> +	} while (0)
>  
>  #undef __bitmask
>  #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
> 

This will be nice.

Thanks for your help, Steve.

Regards,
-Arun
Arun Easi July 19, 2022, 10:09 p.m. UTC | #9
Hi Daniel,

On Tue, 19 Jul 2022, 1:25am, Daniel Wagner wrote:

> External Email
> 
> ----------------------------------------------------------------------
> Hi Arun,
> 
> On Mon, Jul 18, 2022 at 03:22:43PM -0400, Steven Rostedt wrote:
> > On Mon, 18 Jul 2022 12:02:26 -0700
> > Arun Easi <aeasi@marvell.com> wrote:
> > 
> > > Many times when a problem was reported on the driver, we had to request 
> > > for a repro with extended error logging (via driver module parameter) 
> > > turned on. With this internal tracing in place, log messages that appear 
> > > only with extended error logging are captured by default in the internal 
> > > trace buffer.
> > > 
> > > AFAIK, kernel tracing requires a user initiated action to be turned on, 
> > > like enabling individual tracepoints. Though a script (either startup or 
> > > udev) can do this job, enabling tracepoints by default for a single 
> > > driver, I think, may not be a preferred choice -- particularly when the 
> > > trace buffer is shared across the kernel. That also brings the extra 
> > > overhead of finding how this could be done across various distros.
> > > 
> > > For cases where the memory/driver size matters, there is an option to 
> > > compile out this feature, plus choosing a lower default trace buffer size.
> 
> I am really asking the question why do we need to add special debugging
> code to every single driver? Can't we try to make more use of generic
> code and extend it if necessary?
> 
> Both FC drivers qla2xxx and lpfc are doing their own thing for
> debugging/logging and I really fail to see why we can't not move towards
> a more generic way. Dumping logs to the kernel log was the simplest way
> but as this series shows, this is not something you can do in production
> systems.

No contention here on a generic way. The per instance trace creation and 
enabling from within the kernel looks like such a one. Let me revisit the 
trace patches with this new info.

Regards,
-Arun

> 
> > You can enable an ftrace instance from inside the kernel, and make it a
> > compile time option.
> >
> > 	#include <linux/trace_events.h>
> > 	#include <linux/trace.h>
> > 
> > 	struct trace_array *tr;
> > 
> > 	tr = trace_array_get_by_name("qla2xxx");
> > 	trace_array_set_clr_event(tr, "qla", NULL, true);
> > 
> > And now you have trace events running:
> > 
> >  # cat /sys/kernel/tracing/instances/qla/trace
> 
> Thanks Steve for the tip!
> 
> Daniel
>
Steven Rostedt July 19, 2022, 10:17 p.m. UTC | #10
On Tue, 19 Jul 2022 15:09:30 -0700
Arun Easi <aeasi@marvell.com> wrote:

> This will be nice.
> 
> Thanks for your help, Steve.

No problem.

What a coincidence that I just finished this work earlier this month.

-- Steve
kernel test robot July 20, 2022, 4:43 a.m. UTC | #11
Hi Nilesh,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on f095c3cd1b694a73a5de276dae919f05a8dd1811]

url:    https://github.com/intel-lab-lkp/linux/commits/Nilesh-Javali/qla2xxx-driver-features/20220715-140608
base:   f095c3cd1b694a73a5de276dae919f05a8dd1811
config: mips-allyesconfig (https://download.01.org/0day-ci/archive/20220720/202207201230.esVUm6qp-lkp@intel.com/config)
compiler: mips-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/d5f3010956e8a08bd2742acc3715478d1b961178
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Nilesh-Javali/qla2xxx-driver-features/20220715-140608
        git checkout d5f3010956e8a08bd2742acc3715478d1b961178
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash drivers/scsi/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

Note: the linux-review/Nilesh-Javali/qla2xxx-driver-features/20220715-140608 HEAD d044ae5809919c7fbaa3ca3c374d1dfd00403990 builds fine.
      It only hurts bisectability.

All error/warnings (new ones prefixed by >>):

   In file included from drivers/scsi/qla2xxx/qla_def.h:5498,
                    from drivers/scsi/qla2xxx/qla_os.c:6:
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_init':
>> drivers/scsi/qla2xxx/qla_dbg.h:438:21: error: implicit declaration of function 'vzalloc'; did you mean 'kvzalloc'? [-Werror=implicit-function-declaration]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                     ^~~~~~~
         |                     kvzalloc
>> drivers/scsi/qla2xxx/qla_dbg.h:438:19: warning: assignment to 'struct qla_trace_rec *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                   ^
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_uninit':
>> drivers/scsi/qla2xxx/qla_dbg.h:452:9: error: implicit declaration of function 'vfree'; did you mean 'kvfree'? [-Werror=implicit-function-declaration]
     452 |         vfree(trc->recs);
         |         ^~~~~
         |         kvfree
   In file included from drivers/scsi/qla2xxx/qla_os.c:9:
   include/linux/vmalloc.h: At top level:
>> include/linux/vmalloc.h:143:14: error: conflicting types for 'vzalloc'; have 'void *(long unsigned int)'
     143 | extern void *vzalloc(unsigned long size) __alloc_size(1);
         |              ^~~~~~~
   drivers/scsi/qla2xxx/qla_dbg.h:438:21: note: previous implicit declaration of 'vzalloc' with type 'int()'
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                     ^~~~~~~
>> include/linux/vmalloc.h:163:13: warning: conflicting types for 'vfree'; have 'void(const void *)'
     163 | extern void vfree(const void *addr);
         |             ^~~~~
   drivers/scsi/qla2xxx/qla_dbg.h:452:9: note: previous implicit declaration of 'vfree' with type 'void(const void *)'
     452 |         vfree(trc->recs);
         |         ^~~~~
   cc1: some warnings being treated as errors
--
   In file included from drivers/scsi/qla2xxx/qla_def.h:5498,
                    from drivers/scsi/qla2xxx/qla_init.c:6:
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_init':
>> drivers/scsi/qla2xxx/qla_dbg.h:438:21: error: implicit declaration of function 'vzalloc'; did you mean 'kvzalloc'? [-Werror=implicit-function-declaration]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                     ^~~~~~~
         |                     kvzalloc
>> drivers/scsi/qla2xxx/qla_dbg.h:438:19: warning: assignment to 'struct qla_trace_rec *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                   ^
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_uninit':
>> drivers/scsi/qla2xxx/qla_dbg.h:452:9: error: implicit declaration of function 'vfree'; did you mean 'kvfree'? [-Werror=implicit-function-declaration]
     452 |         vfree(trc->recs);
         |         ^~~~~
         |         kvfree
   In file included from drivers/scsi/qla2xxx/qla_init.c:11:
   include/linux/vmalloc.h: At top level:
>> include/linux/vmalloc.h:143:14: error: conflicting types for 'vzalloc'; have 'void *(long unsigned int)'
     143 | extern void *vzalloc(unsigned long size) __alloc_size(1);
         |              ^~~~~~~
   drivers/scsi/qla2xxx/qla_dbg.h:438:21: note: previous implicit declaration of 'vzalloc' with type 'int()'
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                     ^~~~~~~
>> include/linux/vmalloc.h:163:13: warning: conflicting types for 'vfree'; have 'void(const void *)'
     163 | extern void vfree(const void *addr);
         |             ^~~~~
   drivers/scsi/qla2xxx/qla_dbg.h:452:9: note: previous implicit declaration of 'vfree' with type 'void(const void *)'
     452 |         vfree(trc->recs);
         |         ^~~~~
   drivers/scsi/qla2xxx/qla_init.c: In function 'qla24xx_async_abort_cmd':
   drivers/scsi/qla2xxx/qla_init.c:171:17: warning: variable 'bail' set but not used [-Wunused-but-set-variable]
     171 |         uint8_t bail;
         |                 ^~~~
   drivers/scsi/qla2xxx/qla_init.c: In function 'qla2x00_async_tm_cmd':
   drivers/scsi/qla2xxx/qla_init.c:2023:17: warning: variable 'bail' set but not used [-Wunused-but-set-variable]
    2023 |         uint8_t bail;
         |                 ^~~~
   cc1: some warnings being treated as errors
--
   In file included from drivers/scsi/qla2xxx/qla_def.h:5498,
                    from drivers/scsi/qla2xxx/qla_mbx.c:6:
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_init':
>> drivers/scsi/qla2xxx/qla_dbg.h:438:21: error: implicit declaration of function 'vzalloc'; did you mean 'kvzalloc'? [-Werror=implicit-function-declaration]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                     ^~~~~~~
         |                     kvzalloc
>> drivers/scsi/qla2xxx/qla_dbg.h:438:19: warning: assignment to 'struct qla_trace_rec *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                   ^
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_uninit':
>> drivers/scsi/qla2xxx/qla_dbg.h:452:9: error: implicit declaration of function 'vfree'; did you mean 'kvfree'? [-Werror=implicit-function-declaration]
     452 |         vfree(trc->recs);
         |         ^~~~~
         |         kvfree
   cc1: some warnings being treated as errors
--
   In file included from drivers/scsi/qla2xxx/qla_def.h:5498,
                    from drivers/scsi/qla2xxx/qla_iocb.c:6:
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_init':
>> drivers/scsi/qla2xxx/qla_dbg.h:438:21: error: implicit declaration of function 'vzalloc'; did you mean 'kvzalloc'? [-Werror=implicit-function-declaration]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                     ^~~~~~~
         |                     kvzalloc
>> drivers/scsi/qla2xxx/qla_dbg.h:438:19: warning: assignment to 'struct qla_trace_rec *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                   ^
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_uninit':
>> drivers/scsi/qla2xxx/qla_dbg.h:452:9: error: implicit declaration of function 'vfree'; did you mean 'kvfree'? [-Werror=implicit-function-declaration]
     452 |         vfree(trc->recs);
         |         ^~~~~
         |         kvfree
   In file included from include/linux/string.h:253,
                    from include/linux/bitmap.h:11,
                    from include/linux/cpumask.h:12,
                    from include/linux/smp.h:13,
                    from arch/mips/include/asm/cpu-type.h:12,
                    from arch/mips/include/asm/timex.h:19,
                    from include/linux/timex.h:67,
                    from include/linux/time32.h:13,
                    from include/linux/time.h:60,
                    from include/linux/stat.h:19,
                    from include/linux/module.h:13,
                    from drivers/scsi/qla2xxx/qla_def.h:12:
   In function 'fortify_memcpy_chk',
       inlined from 'qla24xx_els_dcmd2_iocb' at drivers/scsi/qla2xxx/qla_iocb.c:3065:2:
   include/linux/fortify-string.h:352:25: warning: call to '__read_overflow2_field' declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning]
     352 |                         __read_overflow2_field(q_size_field, size);
         |                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors
--
   In file included from drivers/scsi/qla2xxx/qla_def.h:5498,
                    from drivers/scsi/qla2xxx/qla_dfs.c:6:
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_init':
>> drivers/scsi/qla2xxx/qla_dbg.h:438:21: error: implicit declaration of function 'vzalloc'; did you mean 'kvzalloc'? [-Werror=implicit-function-declaration]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                     ^~~~~~~
         |                     kvzalloc
>> drivers/scsi/qla2xxx/qla_dbg.h:438:19: warning: assignment to 'struct qla_trace_rec *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
     438 |         trc->recs = vzalloc(trc->num_entries *
         |                   ^
   drivers/scsi/qla2xxx/qla_dbg.h: In function 'qla_trace_uninit':
>> drivers/scsi/qla2xxx/qla_dbg.h:452:9: error: implicit declaration of function 'vfree'; did you mean 'kvfree'? [-Werror=implicit-function-declaration]
     452 |         vfree(trc->recs);
         |         ^~~~~
         |         kvfree
   drivers/scsi/qla2xxx/qla_dfs.c: At top level:
   drivers/scsi/qla2xxx/qla_dfs.c:539:1: warning: 'qla_dfs_trace_write' defined but not used [-Wunused-function]
     539 | qla_dfs_trace_write(struct file *file, const char __user *buffer,
         | ^~~~~~~~~~~~~~~~~~~
   drivers/scsi/qla2xxx/qla_dfs.c:504:1: warning: 'qla_dfs_trace_show' defined but not used [-Wunused-function]
     504 | qla_dfs_trace_show(struct seq_file *s, void *unused)
         | ^~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +438 drivers/scsi/qla2xxx/qla_dbg.h

   424	
   425	static inline void
   426	qla_trace_init(struct qla_trace *trc, char *name, u32 num_entries)
   427	{
   428		if (trc->recs)
   429			return;
   430	
   431		memset(trc, 0, sizeof(*trc));
   432	
   433		trc->name = name;
   434		spin_lock_init(&trc->trc_lock);
   435		if (!num_entries)
   436			return;
   437		trc->num_entries = num_entries;
 > 438		trc->recs = vzalloc(trc->num_entries *
   439					sizeof(struct qla_trace_rec));
   440		if (!trc->recs)
   441			return;
   442	
   443		set_bit(QLA_TRACE_ENABLED, &trc->flags);
   444	}
   445	
   446	static inline void
   447	qla_trace_uninit(struct qla_trace *trc)
   448	{
   449		if (!trc->recs)
   450			return;
   451	
 > 452		vfree(trc->recs);
   453		trc->recs = NULL;
   454		clear_bit(QLA_TRACE_ENABLED, &trc->flags);
   455	}
   456
Steffen Maier July 23, 2022, 7:47 a.m. UTC | #12
On 7/20/22 00:09, Arun Easi wrote:
> On Tue, 19 Jul 2022, 1:25am, Daniel Wagner wrote:
>> On Mon, Jul 18, 2022 at 03:22:43PM -0400, Steven Rostedt wrote:
>>> On Mon, 18 Jul 2022 12:02:26 -0700
>>> Arun Easi <aeasi@marvell.com> wrote:
>>>
>>>> Many times when a problem was reported on the driver, we had to request
>>>> for a repro with extended error logging (via driver module parameter)
>>>> turned on. With this internal tracing in place, log messages that appear
>>>> only with extended error logging are captured by default in the internal
>>>> trace buffer.
>>>>
>>>> AFAIK, kernel tracing requires a user initiated action to be turned on,
>>>> like enabling individual tracepoints. Though a script (either startup or
>>>> udev) can do this job, enabling tracepoints by default for a single
>>>> driver, I think, may not be a preferred choice -- particularly when the
>>>> trace buffer is shared across the kernel. That also brings the extra
>>>> overhead of finding how this could be done across various distros.
>>>>
>>>> For cases where the memory/driver size matters, there is an option to
>>>> compile out this feature, plus choosing a lower default trace buffer size.
>>
>> I am really asking the question why do we need to add special debugging
>> code to every single driver? Can't we try to make more use of generic
>> code and extend it if necessary?
>>
>> Both FC drivers qla2xxx and lpfc are doing their own thing for

All three FC drivers:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/s390/scsi/zfcp_dbf.c

We have this flight recorder (multiple topical ring buffers per vHBA to avoid 
some higher frequency topics lose history of others) since a long time [maybe 
pre-dating ftrace], including crash tool support to extract it from post mortem 
kernel dumps. We use binary trace records, like tracepoints, with offline 
decoding/formatting (zfcpdbf in s390-tools package).
Other s390 kernel components share the underlying s390dbf infrastructure.

The crash extension "ftrace" is probably able to do an export from dump for the 
approach Steven suggested. I had used it with kernel function tracer. Very useful.

>> debugging/logging and I really fail to see why we can't not move towards
>> a more generic way. Dumping logs to the kernel log was the simplest way
>> but as this series shows, this is not something you can do in production
>> systems.
> 
> No contention here on a generic way. The per instance trace creation and
> enabling from within the kernel looks like such a one. Let me revisit the
> trace patches with this new info.
> 
> Regards,
> -Arun
> 
>>
>>> You can enable an ftrace instance from inside the kernel, and make it a
>>> compile time option.
>>>
>>> 	#include <linux/trace_events.h>
>>> 	#include <linux/trace.h>
>>>
>>> 	struct trace_array *tr;
>>>
>>> 	tr = trace_array_get_by_name("qla2xxx");
>>> 	trace_array_set_clr_event(tr, "qla", NULL, true);
>>>
>>> And now you have trace events running:
>>>
>>>   # cat /sys/kernel/tracing/instances/qla/trace
>>
>> Thanks Steve for the tip!
>>
>> Daniel
>>
Steven Rostedt July 25, 2022, 1:56 p.m. UTC | #13
On Sat, 23 Jul 2022 09:47:21 +0200
Steffen Maier <maier@linux.ibm.com> wrote:

> The crash extension "ftrace" is probably able to do an export from dump for the 
> approach Steven suggested. I had used it with kernel function tracer. Very useful.

Note, the crash extension is just called "trace" and you can get it here:

  https://github.com/fujitsu/crash-trace

-- Steve
diff mbox series

Patch

diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 7cf1f78cbaee..c4ba8ac51d27 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -2777,3 +2777,15 @@  ql_dbg_qp(uint32_t level, struct qla_qpair *qpair, int32_t id,
 	va_end(va);
 
 }
+
+#ifdef QLA_TRACING
+void qla_tracing_init(void)
+{
+	if (is_kdump_kernel())
+		return;
+}
+
+void qla_tracing_exit(void)
+{
+}
+#endif /* QLA_TRACING */
diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index feeb1666227f..e0d91a1f81c0 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -5,6 +5,7 @@ 
  */
 
 #include "qla_def.h"
+#include <linux/delay.h>
 
 /*
  * Firmware Dump structure definition
@@ -321,6 +322,145 @@  struct qla2xxx_fw_dump {
 
 extern uint ql_errlev;
 
+#ifdef QLA_TRACING
+#include <linux/crash_dump.h>
+
+extern void qla_tracing_init(void);
+extern void qla_tracing_exit(void);
+
+static inline int
+ql_mask_match_ext(uint level, int *log_tunable)
+{
+	if (*log_tunable == 1)
+		*log_tunable = QL_DBG_DEFAULT1_MASK;
+
+	return (level & *log_tunable) == level;
+}
+
+static inline int
+__qla_trace_get(struct qla_trace *trc)
+{
+	if (test_bit(QLA_TRACE_QUIESCE, &trc->flags))
+		return -EIO;
+	atomic_inc(&trc->ref_count);
+	return 0;
+}
+
+static inline int
+qla_trace_get(struct qla_trace *trc)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&trc->trc_lock, flags);
+	ret = __qla_trace_get(trc);
+	spin_unlock_irqrestore(&trc->trc_lock, flags);
+
+	return ret;
+}
+
+static inline void
+qla_trace_put(struct qla_trace *trc)
+{
+	wmb();
+	atomic_dec(&trc->ref_count);
+}
+
+static inline char *
+qla_get_trace_next(struct qla_trace *trc)
+{
+	uint32_t t_ind;
+	char *buf;
+	unsigned long flags;
+
+	spin_lock_irqsave(&trc->trc_lock, flags);
+	if (!test_bit(QLA_TRACE_ENABLED, &trc->flags) ||
+	    __qla_trace_get(trc)) {
+		spin_unlock_irqrestore(&trc->trc_lock, flags);
+		return NULL;
+	}
+	t_ind = trc->trace_ind = qla_trace_ind_norm(trc, trc->trace_ind + 1);
+	spin_unlock_irqrestore(&trc->trc_lock, flags);
+
+	if (!t_ind)
+		set_bit(QLA_TRACE_WRAPPED, &trc->flags);
+
+	buf = qla_trace_record(trc, t_ind);
+	/* Put an end marker '>' for the next record. */
+	qla_trace_record(trc, qla_trace_ind_norm(trc, t_ind + 1))[0] = '>';
+
+	return buf;
+}
+
+static inline int
+qla_trace_quiesce(struct qla_trace *trc)
+{
+	unsigned long flags;
+	u32 cnt = 0;
+	int ret = 0;
+
+	set_bit(QLA_TRACE_QUIESCE, &trc->flags);
+
+	spin_lock_irqsave(&trc->trc_lock, flags);
+	while (atomic_read(&trc->ref_count)) {
+		spin_unlock_irqrestore(&trc->trc_lock, flags);
+
+		msleep(1);
+
+		spin_lock_irqsave(&trc->trc_lock, flags);
+		cnt++;
+		if (cnt > 10 * 1000) {
+			pr_info("qla2xxx: Trace could not be quiesced now (count=%d).",
+				atomic_read(&trc->ref_count));
+			/* Leave trace enabled */
+			clear_bit(QLA_TRACE_QUIESCE, &trc->flags);
+			ret = -EIO;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&trc->trc_lock, flags);
+	return ret;
+}
+
+static inline void
+qla_trace_init(struct qla_trace *trc, char *name, u32 num_entries)
+{
+	if (trc->recs)
+		return;
+
+	memset(trc, 0, sizeof(*trc));
+
+	trc->name = name;
+	spin_lock_init(&trc->trc_lock);
+	if (!num_entries)
+		return;
+	trc->num_entries = num_entries;
+	trc->recs = vzalloc(trc->num_entries *
+				sizeof(struct qla_trace_rec));
+	if (!trc->recs)
+		return;
+
+	set_bit(QLA_TRACE_ENABLED, &trc->flags);
+}
+
+static inline void
+qla_trace_uninit(struct qla_trace *trc)
+{
+	if (!trc->recs)
+		return;
+
+	vfree(trc->recs);
+	trc->recs = NULL;
+	clear_bit(QLA_TRACE_ENABLED, &trc->flags);
+}
+
+#else /* QLA_TRACING */
+#define qla_trace_init(trc, name, num)
+#define qla_trace_uninit(trc)
+#define qla_tracing_init()
+#define qla_tracing_exit()
+#endif /* QLA_TRACING */
+
 void __attribute__((format (printf, 4, 5)))
 ql_dbg(uint, scsi_qla_host_t *vha, uint, const char *fmt, ...);
 void __attribute__((format (printf, 4, 5)))
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 22274b405d01..39322105e7be 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -35,6 +35,37 @@ 
 
 #include <uapi/scsi/fc/fc_els.h>
 
+#define QLA_TRACING /* Captures driver messages to buffer */
+
+#ifdef QLA_TRACING
+#define QLA_TRACE_LINE_SIZE		256	/* Biggest so far is ~215 */
+#define qla_trace_ind_norm(_trc, _ind)	((_ind) >= (_trc)->num_entries ? \
+							0 : (_ind))
+#define qla_trace_record(_trc, __ind)	((_trc)->recs[__ind].buf)
+#define qla_trace_record_len		(sizeof(struct qla_trace_rec))
+#define qla_trace_start(_trc)		qla_trace_record(_trc, 0)
+#define qla_trace_len(_trc)		((_trc)->num_entries)
+#define qla_trace_size(_trc)		(qla_trace_record_len * \
+						(_trc)->num_entries)
+#define qla_trace_cur_ind(_trc)		((_trc)->trace_ind)
+struct qla_trace_rec {
+	char buf[QLA_TRACE_LINE_SIZE];
+};
+
+struct qla_trace {
+#define QLA_TRACE_ENABLED	0 /* allow trace writes or not */
+#define QLA_TRACE_WRAPPED	1
+#define QLA_TRACE_QUIESCE	2
+	unsigned long	flags;
+	atomic_t	ref_count;
+	u32		num_entries;
+	u32		trace_ind;
+	spinlock_t	trc_lock;
+	char		*name;
+	struct qla_trace_rec *recs;
+};
+#endif /* QLA_TRACING */
+
 #define QLA_DFS_DEFINE_DENTRY(_debugfs_file_name) \
 	struct dentry *dfs_##_debugfs_file_name
 #define QLA_DFS_ROOT_DEFINE_DENTRY(_debugfs_file_name) \
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index c3c8b9536ef6..98c6390ad1f1 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -489,6 +489,108 @@  qla_dfs_naqp_show(struct seq_file *s, void *unused)
 	return 0;
 }
 
+#ifdef QLA_TRACING
+static char *trace_help = "\
+# Format:\n\
+#   <msec> <cpu#> <message>\n\
+#\n\
+# Trace control by writing:\n\
+#   'enable' - to enable this trace\n\
+#   'disable' - to disable this trace\n\
+#   'resize=<nlines>' - to resize this trace to <nlines>\n\
+#\n";
+
+static int
+qla_dfs_trace_show(struct seq_file *s, void *unused)
+{
+	struct qla_trace *trc = s->private;
+	char *buf;
+	u32 t_ind = 0, i;
+
+	seq_puts(s, trace_help);
+
+	if (qla_trace_get(trc))
+		return 0;
+
+	seq_printf(s, "# Trace max lines = %d, writes = %s\n#\n",
+		   trc->num_entries, test_bit(QLA_TRACE_ENABLED,
+		   &trc->flags) ? "enabled" : "disabled");
+
+	if (test_bit(QLA_TRACE_WRAPPED, &trc->flags))
+		t_ind = qla_trace_cur_ind(trc) + 1;
+
+	for (i = 0; i < qla_trace_len(trc); i++, t_ind++) {
+		t_ind = qla_trace_ind_norm(trc, t_ind);
+		buf = qla_trace_record(trc, t_ind);
+		if (!buf[0])
+			continue;
+		seq_puts(s, buf);
+	}
+
+	mb();
+	qla_trace_put(trc);
+	return 0;
+}
+
+#define string_is(_buf, _str_val) \
+	(strncmp(_str_val, _buf, strlen(_str_val)) == 0)
+
+static ssize_t
+qla_dfs_trace_write(struct file *file, const char __user *buffer,
+		    size_t count, loff_t *pos)
+{
+	struct seq_file *s = file->private_data;
+	struct qla_trace *trc = s->private;
+	char buf[32];
+	ssize_t ret = count;
+
+	memset(buf, 0, sizeof(buf));
+	if (copy_from_user(buf, buffer, min(sizeof(buf), count)))
+		return -EFAULT;
+
+	if (string_is(buf, "enable")) {
+		if (!trc->recs) {
+			pr_warn("qla2xxx: '%s' is empty, resize before enabling.\n",
+					trc->name);
+			return -EINVAL;
+		}
+		pr_info("qla2xxx: Enabling trace '%s'\n", trc->name);
+		set_bit(QLA_TRACE_ENABLED, &trc->flags);
+	} else if (string_is(buf, "disable")) {
+		pr_info("qla2xxx: Disabling trace '%s'\n", trc->name);
+		clear_bit(QLA_TRACE_ENABLED, &trc->flags);
+	} else if (string_is(buf, "resize")) {
+		u32 new_len;
+
+		if (sscanf(buf, "resize=%d", &new_len) != 1)
+			return -EINVAL;
+		if (new_len == trc->num_entries) {
+			pr_info("qla2xxx: New trace size is same as old.\n");
+			return count;
+		}
+		pr_info("qla2xxx: Changing trace '%s' size to %d\n",
+			trc->name, new_len);
+		if (qla_trace_quiesce(trc)) {
+			ret = -EBUSY;
+			goto done;
+		}
+		qla_trace_uninit(trc);
+		/*
+		 * Go through init once again to start creating traces
+		 * based on the respective tunable.
+		 */
+		qla_trace_init(trc, trc->name, new_len);
+		if (!trc->recs) {
+			pr_warn("qla2xxx: Trace allocation failed for '%s'\n",
+				trc->name);
+			ret = -ENOMEM;
+		}
+	}
+done:
+	return ret;
+}
+#endif /* QLA_TRACING */
+
 /*
  * Helper macros for setting up debugfs entries.
  * _name: The name of the debugfs entry
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 0bd0fd1042df..0d2397069cac 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -8191,6 +8191,8 @@  qla2x00_module_init(void)
 	BUILD_BUG_ON(sizeof(sw_info_t) != 32);
 	BUILD_BUG_ON(sizeof(target_id_t) != 2);
 
+	qla_tracing_init();
+
 	/* Allocate cache for SRBs. */
 	srb_cachep = kmem_cache_create("qla2xxx_srbs", sizeof(srb_t), 0,
 	    SLAB_HWCACHE_ALIGN, NULL);
@@ -8269,6 +8271,7 @@  qla2x00_module_init(void)
 
 destroy_cache:
 	kmem_cache_destroy(srb_cachep);
+	qla_tracing_exit();
 	return ret;
 }
 
@@ -8287,6 +8290,8 @@  qla2x00_module_exit(void)
 	fc_release_transport(qla2xxx_transport_template);
 	qlt_exit();
 	kmem_cache_destroy(srb_cachep);
+
+	qla_tracing_exit();
 }
 
 module_init(qla2x00_module_init);