diff mbox series

[1/9] btrfs: add a trace point for reserve tickets

Message ID 35017faea237f88452785b208e4fe36002b46fc9.1601495426.git.josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show
Series Improve preemptive ENOSPC flushing | expand

Commit Message

Josef Bacik Sept. 30, 2020, 8:01 p.m. UTC
While debugging a ENOSPC related performance problem I needed to see the
time difference between start and end of a reserve ticket, so add a
tracepoint where we add the ticket, and where we end the ticket.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/space-info.c        |  3 +++
 include/trace/events/btrfs.h | 40 ++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

Comments

Nikolay Borisov Oct. 1, 2020, 5:54 a.m. UTC | #1
On 30.09.20 г. 23:01 ч., Josef Bacik wrote:
> While debugging a ENOSPC related performance problem I needed to see the
> time difference between start and end of a reserve ticket, so add a
> tracepoint where we add the ticket, and where we end the ticket.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>

I wonder can't we get away with a single tracepoint - simply record the
queue time when we create the ticket in __reserve_bytes and then have a
single tracepoint in handle_reserve_ticket which calculates the
difference between curr_time and the written one while the ticket was
queued? IMO it will be more user friendly for manual inspection, I
assume you have analyzed the duration with a bpf script rather than
looking at every pair of start/end events manually? Also won't it be
useful to also have the flush type printed in the end event so that at
least we know what is the type of the ticket ? Let's not forget
tracepoint from the POV of Linus are more or less ABI so it's preferable
if we get them right from the beginning.

> ---
>  fs/btrfs/space-info.c        |  3 +++
>  include/trace/events/btrfs.h | 40 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 43 insertions(+)
> 
> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> index 64099565ab8f..40726c8888f7 100644
> --- a/fs/btrfs/space-info.c
> +++ b/fs/btrfs/space-info.c
> @@ -1279,6 +1279,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
>  	 * space wasn't reserved at all).
>  	 */
>  	ASSERT(!(ticket->bytes == 0 && ticket->error));
> +	trace_btrfs_end_reserve_ticket(fs_info, ticket->error);
>  	return ret;
>  }
>  
> @@ -1380,6 +1381,8 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
>  			list_add_tail(&ticket.list,
>  				      &space_info->priority_tickets);
>  		}
> +		trace_btrfs_add_reserve_ticket(fs_info, space_info->flags,
> +					       flush, orig_bytes);
>  	} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
>  		used += orig_bytes;
>  		/*
> diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
> index ecd24c719de4..68d1622623c7 100644
> --- a/include/trace/events/btrfs.h
> +++ b/include/trace/events/btrfs.h
> @@ -2025,6 +2025,46 @@ TRACE_EVENT(btrfs_convert_extent_bit,
>  		  __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
>  );
>  
> +TRACE_EVENT(btrfs_add_reserve_ticket,
> +	TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, int flush,
> +		 u64 bytes),
> +
> +	TP_ARGS(fs_info, flags, flush, bytes),
> +
> +	TP_STRUCT__entry_btrfs(
> +		__field(	u64,	flags	)
> +		__field(	int,	flush	)
> +		__field(	u64,	bytes	)
> +	),
> +
> +	TP_fast_assign_btrfs(fs_info,
> +		__entry->flags	= flags;
> +		__entry->flush	= flush;
> +		__entry->bytes	= bytes;
> +	),
> +
> +	TP_printk_btrfs("flags=%s flush=%s bytes=%llu",
> +			__print_symbolic(__entry->flush, FLUSH_ACTIONS),
> +			__print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS),
> +			__entry->bytes)
> +);
> +
> +TRACE_EVENT(btrfs_end_reserve_ticket,
> +	TP_PROTO(const struct btrfs_fs_info *fs_info, int error),
> +
> +	TP_ARGS(fs_info, error),
> +
> +	TP_STRUCT__entry_btrfs(
> +		__field(	int,	error	)
> +	),
> +
> +	TP_fast_assign_btrfs(fs_info,
> +		__entry->error	= error;
> +	),
> +
> +	TP_printk_btrfs("error=%d", __entry->error)
> +);
> +
>  DECLARE_EVENT_CLASS(btrfs_sleep_tree_lock,
>  	TP_PROTO(const struct extent_buffer *eb, u64 start_ns),
>  
>
Josef Bacik Oct. 1, 2020, 9:33 p.m. UTC | #2
On 10/1/20 1:54 AM, Nikolay Borisov wrote:
> 
> 
> On 30.09.20 г. 23:01 ч., Josef Bacik wrote:
>> While debugging a ENOSPC related performance problem I needed to see the
>> time difference between start and end of a reserve ticket, so add a
>> tracepoint where we add the ticket, and where we end the ticket.
>>
>> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> 
> I wonder can't we get away with a single tracepoint - simply record the
> queue time when we create the ticket in __reserve_bytes and then have a
> single tracepoint in handle_reserve_ticket which calculates the
> difference between curr_time and the written one while the ticket was
> queued? IMO it will be more user friendly for manual inspection, I
> assume you have analyzed the duration with a bpf script rather than
> looking at every pair of start/end events manually? Also won't it be
> useful to also have the flush type printed in the end event so that at
> least we know what is the type of the ticket ? Let's not forget
> tracepoint from the POV of Linus are more or less ABI so it's preferable
> if we get them right from the beginning.

I'm kind of opposed to tracking time for tracepoints.  Yes I used bpf, but it's 
not hard to do, you can just do something like

bpftrace -e 'tracepoint:btrfs:btrfs_add_reserve_ticket {@start[pid] = time();} \
	tracepoint:btrfs:btrfs_end_reserve_ticket { \
		@times = hist(time() - @start[pid]); }'

nothing overly complicated, and leaves all the computation work to the thing 
doing the tracing, not in the kernel itself.

I didn't want to bring the flush type into the end one because I have to store 
it in the ticket, and you can get it from the start trace point.  However it 
probably would be useful if we want to just catch error events, so I'll add 
that.  Thanks,

Josef
diff mbox series

Patch

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 64099565ab8f..40726c8888f7 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1279,6 +1279,7 @@  static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
 	 * space wasn't reserved at all).
 	 */
 	ASSERT(!(ticket->bytes == 0 && ticket->error));
+	trace_btrfs_end_reserve_ticket(fs_info, ticket->error);
 	return ret;
 }
 
@@ -1380,6 +1381,8 @@  static int __reserve_bytes(struct btrfs_fs_info *fs_info,
 			list_add_tail(&ticket.list,
 				      &space_info->priority_tickets);
 		}
+		trace_btrfs_add_reserve_ticket(fs_info, space_info->flags,
+					       flush, orig_bytes);
 	} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
 		used += orig_bytes;
 		/*
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index ecd24c719de4..68d1622623c7 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2025,6 +2025,46 @@  TRACE_EVENT(btrfs_convert_extent_bit,
 		  __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
 );
 
+TRACE_EVENT(btrfs_add_reserve_ticket,
+	TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, int flush,
+		 u64 bytes),
+
+	TP_ARGS(fs_info, flags, flush, bytes),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	flags	)
+		__field(	int,	flush	)
+		__field(	u64,	bytes	)
+	),
+
+	TP_fast_assign_btrfs(fs_info,
+		__entry->flags	= flags;
+		__entry->flush	= flush;
+		__entry->bytes	= bytes;
+	),
+
+	TP_printk_btrfs("flags=%s flush=%s bytes=%llu",
+			__print_symbolic(__entry->flush, FLUSH_ACTIONS),
+			__print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS),
+			__entry->bytes)
+);
+
+TRACE_EVENT(btrfs_end_reserve_ticket,
+	TP_PROTO(const struct btrfs_fs_info *fs_info, int error),
+
+	TP_ARGS(fs_info, error),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	int,	error	)
+	),
+
+	TP_fast_assign_btrfs(fs_info,
+		__entry->error	= error;
+	),
+
+	TP_printk_btrfs("error=%d", __entry->error)
+);
+
 DECLARE_EVENT_CLASS(btrfs_sleep_tree_lock,
 	TP_PROTO(const struct extent_buffer *eb, u64 start_ns),