diff mbox series

[v2] block: introduce block_rq_error tracepoint

Message ID 20200120222618.1456-1-xiyou.wangcong@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] block: introduce block_rq_error tracepoint | expand

Commit Message

Cong Wang Jan. 20, 2020, 10:26 p.m. UTC
Currently, rasdaemon uses the existing tracepoint block_rq_complete
and filters out non-error cases in order to capture block disk errors.

But there are a few problems with this approach:

1. Even kernel trace filter could do the filtering work, there is
   still some overhead after we enable this tracepoint.

2. The filter is merely based on errno, which does not align with kernel
   logic to check the errors for print_req_error().

3. block_rq_complete only provides dev major and minor to identify
   the block device, it is not convenient to use in user-space.

So introduce a new tracepoint block_rq_error just for the error case
and provides the device name for convenience too. With this patch,
rasdaemon could switch to block_rq_error.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 block/blk-core.c             |  4 +++-
 include/trace/events/block.h | 43 ++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

Comments

Steven Rostedt Jan. 22, 2020, midnight UTC | #1
On Mon, 20 Jan 2020 14:26:18 -0800
Cong Wang <xiyou.wangcong@gmail.com> wrote:

> ---
>  block/blk-core.c             |  4 +++-
>  include/trace/events/block.h | 43 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 089e890ab208..0c7ad70d06be 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1450,8 +1450,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
>  #endif
>  
>  	if (unlikely(error && !blk_rq_is_passthrough(req) &&
> -		     !(req->rq_flags & RQF_QUIET)))
> +		     !(req->rq_flags & RQF_QUIET))) {
> +		trace_block_rq_error(req, blk_status_to_errno(error), nr_bytes);
>  		print_req_error(req, error, __func__);
> +	}
>  
>  	blk_account_io_completion(req, nr_bytes);
>  
> diff --git a/include/trace/events/block.h b/include/trace/events/block.h
> index 81b43f5bdf23..37e99be19536 100644
> --- a/include/trace/events/block.h
> +++ b/include/trace/events/block.h
> @@ -145,6 +145,49 @@ TRACE_EVENT(block_rq_complete,
>  		  __entry->nr_sector, __entry->error)
>  );
>  
> +/**
> + * block_rq_error - block IO operation error reported by device driver
> + * @rq: block operations request
> + * @error: status code
> + * @nr_bytes: number of completed bytes
> + *
> + * The block_rq_error tracepoint event indicates that some portion
> + * of operation request has failed as reported by the device driver.
> + */
> +TRACE_EVENT(block_rq_error,
> +
> +	TP_PROTO(struct request *rq, int error, unsigned int nr_bytes),
> +
> +	TP_ARGS(rq, error, nr_bytes),
> +
> +	TP_STRUCT__entry(
> +		__field(  dev_t,	dev			)
> +		__dynamic_array( char,  name,	DISK_NAME_LEN	)

Hmm, looks like I need to go and do a clean up of the kernel, and
educate people on how to use dynamic arrays :-/

The "len" field of a __dynamic_array() is to be a function to determine
the length needed for each instance of an event. By having a constant
there, it will be the same for every events, plus the meta data to hold
the "dynamic" part of the array. This would be much better to simple
use __array() instead.

But as you use "__assign_str()" below, then it's expected that name is
a nul terminated string. In which case, you want to define this as:

		__string( name, rq->rq_disk ? rq->rq_disk->disk_name : "?"  )


> +		__field(  sector_t,	sector			)
> +		__field(  unsigned int,	nr_sector		)
> +		__field(  int,		error			)
> +		__array(  char,		rwbs,	RWBS_LEN	)
> +		__dynamic_array( char,	cmd,	1		)

Not sure what you are doing with cmd. It appears to be always hard
coded as an empty string?

-- Steve


> +	),
> +
> +	TP_fast_assign(
> +		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
> +		__assign_str(name,   rq->rq_disk ? rq->rq_disk->disk_name : "?");
> +		__entry->sector    = blk_rq_pos(rq);
> +		__entry->nr_sector = nr_bytes >> 9;
> +		__entry->error     = error;
> +
> +		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
> +		__get_str(cmd)[0] = '\0';
> +	),
> +
> +	TP_printk("%d,%d %s %s (%s) %llu + %u [%d]",
> +		  MAJOR(__entry->dev), MINOR(__entry->dev),
> +		  __get_str(name), __entry->rwbs, __get_str(cmd),
> +		  (unsigned long long)__entry->sector,
> +		  __entry->nr_sector, __entry->error)
> +);
> +
>  DECLARE_EVENT_CLASS(block_rq,
>  
>  	TP_PROTO(struct request_queue *q, struct request *rq),
Cong Wang Jan. 26, 2020, 8:26 p.m. UTC | #2
On Tue, Jan 21, 2020 at 4:01 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Mon, 20 Jan 2020 14:26:18 -0800
> Cong Wang <xiyou.wangcong@gmail.com> wrote:
> > +/**
> > + * block_rq_error - block IO operation error reported by device driver
> > + * @rq: block operations request
> > + * @error: status code
> > + * @nr_bytes: number of completed bytes
> > + *
> > + * The block_rq_error tracepoint event indicates that some portion
> > + * of operation request has failed as reported by the device driver.
> > + */
> > +TRACE_EVENT(block_rq_error,
> > +
> > +     TP_PROTO(struct request *rq, int error, unsigned int nr_bytes),
> > +
> > +     TP_ARGS(rq, error, nr_bytes),
> > +
> > +     TP_STRUCT__entry(
> > +             __field(  dev_t,        dev                     )
> > +             __dynamic_array( char,  name,   DISK_NAME_LEN   )
>
> Hmm, looks like I need to go and do a clean up of the kernel, and
> educate people on how to use dynamic arrays :-/

Yeah.

>
> The "len" field of a __dynamic_array() is to be a function to determine
> the length needed for each instance of an event. By having a constant
> there, it will be the same for every events, plus the meta data to hold
> the "dynamic" part of the array. This would be much better to simple
> use __array() instead.
>
> But as you use "__assign_str()" below, then it's expected that name is
> a nul terminated string. In which case, you want to define this as:
>
>                 __string( name, rq->rq_disk ? rq->rq_disk->disk_name : "?"  )

Ah, I wanted to use string() but all the existing users initialize/assign
them twice: once in TP_STRUCT__entry() and once in TP_fast_assign().


>
>
> > +             __field(  sector_t,     sector                  )
> > +             __field(  unsigned int, nr_sector               )
> > +             __field(  int,          error                   )
> > +             __array(  char,         rwbs,   RWBS_LEN        )
> > +             __dynamic_array( char,  cmd,    1               )
>
> Not sure what you are doing with cmd. It appears to be always hard
> coded as an empty string?

It is supposed to be a string of one-char commands. This is a
copy-n-paste from existing block_rq_requeue(). I don't know why
its length is 1, but yeah it looks wrong to me too.

Thanks!
diff mbox series

Patch

diff --git a/block/blk-core.c b/block/blk-core.c
index 089e890ab208..0c7ad70d06be 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1450,8 +1450,10 @@  bool blk_update_request(struct request *req, blk_status_t error,
 #endif
 
 	if (unlikely(error && !blk_rq_is_passthrough(req) &&
-		     !(req->rq_flags & RQF_QUIET)))
+		     !(req->rq_flags & RQF_QUIET))) {
+		trace_block_rq_error(req, blk_status_to_errno(error), nr_bytes);
 		print_req_error(req, error, __func__);
+	}
 
 	blk_account_io_completion(req, nr_bytes);
 
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 81b43f5bdf23..37e99be19536 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -145,6 +145,49 @@  TRACE_EVENT(block_rq_complete,
 		  __entry->nr_sector, __entry->error)
 );
 
+/**
+ * block_rq_error - block IO operation error reported by device driver
+ * @rq: block operations request
+ * @error: status code
+ * @nr_bytes: number of completed bytes
+ *
+ * The block_rq_error tracepoint event indicates that some portion
+ * of operation request has failed as reported by the device driver.
+ */
+TRACE_EVENT(block_rq_error,
+
+	TP_PROTO(struct request *rq, int error, unsigned int nr_bytes),
+
+	TP_ARGS(rq, error, nr_bytes),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__dynamic_array( char,  name,	DISK_NAME_LEN	)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		error			)
+		__array(  char,		rwbs,	RWBS_LEN	)
+		__dynamic_array( char,	cmd,	1		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__assign_str(name,   rq->rq_disk ? rq->rq_disk->disk_name : "?");
+		__entry->sector    = blk_rq_pos(rq);
+		__entry->nr_sector = nr_bytes >> 9;
+		__entry->error     = error;
+
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
+		__get_str(cmd)[0] = '\0';
+	),
+
+	TP_printk("%d,%d %s %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __get_str(name), __entry->rwbs, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->error)
+);
+
 DECLARE_EVENT_CLASS(block_rq,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),