diff mbox series

[RFC,05/39] blktrace: add trace note APIs

Message ID 20210225070231.21136-6-chaitanya.kulkarni@wdc.com (mailing list archive)
State New
Headers show
Series blktrace: add block trace extension support | expand

Commit Message

Chaitanya Kulkarni Feb. 25, 2021, 7:01 a.m. UTC
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 kernel/trace/blktrace.c | 113 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

Comments

Damien Le Moal Feb. 26, 2021, 4:39 a.m. UTC | #1
No commit message. Add one please.

On 2021/02/25 16:03, Chaitanya Kulkarni wrote:
> Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
> ---
>  kernel/trace/blktrace.c | 113 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 113 insertions(+)
> 
> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index e45bbfcb5daf..4871934b9717 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -114,6 +114,52 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
>  	}
>  }
>  
> +static void trace_note_ext(struct blk_trace_ext *bt, pid_t pid, u64 action,
> +			   const void *data, size_t len, u64 cgid, u32 ioprio)
> +{
> +	struct blk_io_trace_ext *t;
> +	struct ring_buffer_event *event = NULL;
> +	struct trace_buffer *buffer = NULL;
> +	int pc = 0;
> +	int cpu = smp_processor_id();
> +	bool blk_tracer = blk_tracer_enabled;
> +	ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
> +
> +	if (blk_tracer) {
> +		buffer = blk_tr->array_buffer.buffer;
> +		pc = preempt_count();
> +		event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
> +						  sizeof(*t) + len + cgid_len,
> +						  0, pc);
> +		if (!event)
> +			return;
> +		t = ring_buffer_event_data(event);
> +		goto record_it;
> +	}
> +
> +	if (!bt->rchan)
> +		return;
> +
> +	t = relay_reserve(bt->rchan, sizeof(*t) + len + cgid_len);
> +	if (t) {
> +		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION_EXT;
> +		t->time = ktime_to_ns(ktime_get());
> +record_it:
> +		t->device = bt->dev;
> +		t->action = action | (cgid ? __BLK_TN_CGROUP : 0);
> +		t->ioprio = ioprio;
> +		t->pid = pid;
> +		t->cpu = cpu;
> +		t->pdu_len = len + cgid_len;
> +		if (cgid_len)
> +			memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
> +		memcpy((void *) t + sizeof(*t) + cgid_len, data, len);
> +
> +		if (blk_tracer)
> +			trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);
> +	}
> +}
> +
>  /*
>   * Send out a notify for this process, if we haven't done so since a trace
>   * started
> @@ -132,6 +178,20 @@ static void trace_note_tsk(struct task_struct *tsk)
>  	spin_unlock_irqrestore(&running_trace_lock, flags);
>  }
>  
> +static void trace_note_tsk_ext(struct task_struct *tsk, u32 ioprio)
> +{
> +	unsigned long flags;
> +	struct blk_trace_ext *bt;
> +
> +	tsk->btrace_seq = blktrace_seq;
> +	spin_lock_irqsave(&running_trace_ext_lock, flags);
> +	list_for_each_entry(bt, &running_trace_ext_list, running_ext_list) {
> +		trace_note_ext(bt, tsk->pid, BLK_TN_PROCESS_EXT, tsk->comm,
> +			   sizeof(tsk->comm), 0, ioprio);
> +	}
> +	spin_unlock_irqrestore(&running_trace_ext_lock, flags);
> +}
> +
>  static void trace_note_time(struct blk_trace *bt)
>  {
>  	struct timespec64 now;
> @@ -148,6 +208,22 @@ static void trace_note_time(struct blk_trace *bt)
>  	local_irq_restore(flags);
>  }
>  
> +static void trace_note_time_ext(struct blk_trace_ext *bt)
> +{
> +	struct timespec64 now;
> +	unsigned long flags;
> +	u32 words[2];
> +
> +	/* need to check user space to see if this breaks in y2038 or y2106 */
> +	ktime_get_real_ts64(&now);
> +	words[0] = (u32)now.tv_sec;
> +	words[1] = now.tv_nsec;
> +
> +	local_irq_save(flags);
> +	trace_note_ext(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0, 0);
> +	local_irq_restore(flags);
> +}
> +
>  void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
>  	const char *fmt, ...)
>  {
> @@ -185,6 +261,43 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
>  }
>  EXPORT_SYMBOL_GPL(__trace_note_message);
>  
> +void __trace_note_message_ext(struct blk_trace_ext *bt, struct blkcg *blkcg,
> +	const char *fmt, ...)
> +{
> +	int n;
> +	va_list args;
> +	unsigned long flags;
> +	char *buf;
> +
> +	if (unlikely(bt->trace_state != Blktrace_running &&
> +		     !blk_tracer_enabled))
> +		return;
> +
> +	/*
> +	 * If the BLK_TC_NOTIFY action mask isn't set, don't send any note
> +	 * message to the trace.
> +	 */
> +	if (!(bt->act_mask & BLK_TC_NOTIFY))
> +		return;
> +
> +	local_irq_save(flags);
> +	buf = this_cpu_ptr(bt->msg_data);
> +	va_start(args, fmt);
> +	n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
> +	va_end(args);
> +
> +	if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
> +		blkcg = NULL;
> +#ifdef CONFIG_BLK_CGROUP
> +	trace_note_ext(bt, 0, BLK_TN_MESSAGE_EXT, buf, n,
> +		blkcg ? cgroup_id(blkcg->css.cgroup) : 1, 0);
> +#else
> +	trace_note_ext(bt, 0, BLK_TN_MESSAGE_EXT, buf, n, 0, 0);
> +#endif
> +	local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL_GPL(__trace_note_message_ext);
> +
>  static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
>  			 pid_t pid)
>  {
> 

I fail to see why the xxx_ext functions need the different blk_trcae_ext
structure. It seems that everything should work with a modified blk_trace
structure. With such approach, a lot of the xxx_ext functions in here may not be
necessary at all. Simply change the interface of the existing note functions.
There are not that many call sites to change, right ?
diff mbox series

Patch

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e45bbfcb5daf..4871934b9717 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -114,6 +114,52 @@  static void trace_note(struct blk_trace *bt, pid_t pid, int action,
 	}
 }
 
+static void trace_note_ext(struct blk_trace_ext *bt, pid_t pid, u64 action,
+			   const void *data, size_t len, u64 cgid, u32 ioprio)
+{
+	struct blk_io_trace_ext *t;
+	struct ring_buffer_event *event = NULL;
+	struct trace_buffer *buffer = NULL;
+	int pc = 0;
+	int cpu = smp_processor_id();
+	bool blk_tracer = blk_tracer_enabled;
+	ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
+
+	if (blk_tracer) {
+		buffer = blk_tr->array_buffer.buffer;
+		pc = preempt_count();
+		event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
+						  sizeof(*t) + len + cgid_len,
+						  0, pc);
+		if (!event)
+			return;
+		t = ring_buffer_event_data(event);
+		goto record_it;
+	}
+
+	if (!bt->rchan)
+		return;
+
+	t = relay_reserve(bt->rchan, sizeof(*t) + len + cgid_len);
+	if (t) {
+		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION_EXT;
+		t->time = ktime_to_ns(ktime_get());
+record_it:
+		t->device = bt->dev;
+		t->action = action | (cgid ? __BLK_TN_CGROUP : 0);
+		t->ioprio = ioprio;
+		t->pid = pid;
+		t->cpu = cpu;
+		t->pdu_len = len + cgid_len;
+		if (cgid_len)
+			memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
+		memcpy((void *) t + sizeof(*t) + cgid_len, data, len);
+
+		if (blk_tracer)
+			trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);
+	}
+}
+
 /*
  * Send out a notify for this process, if we haven't done so since a trace
  * started
@@ -132,6 +178,20 @@  static void trace_note_tsk(struct task_struct *tsk)
 	spin_unlock_irqrestore(&running_trace_lock, flags);
 }
 
+static void trace_note_tsk_ext(struct task_struct *tsk, u32 ioprio)
+{
+	unsigned long flags;
+	struct blk_trace_ext *bt;
+
+	tsk->btrace_seq = blktrace_seq;
+	spin_lock_irqsave(&running_trace_ext_lock, flags);
+	list_for_each_entry(bt, &running_trace_ext_list, running_ext_list) {
+		trace_note_ext(bt, tsk->pid, BLK_TN_PROCESS_EXT, tsk->comm,
+			   sizeof(tsk->comm), 0, ioprio);
+	}
+	spin_unlock_irqrestore(&running_trace_ext_lock, flags);
+}
+
 static void trace_note_time(struct blk_trace *bt)
 {
 	struct timespec64 now;
@@ -148,6 +208,22 @@  static void trace_note_time(struct blk_trace *bt)
 	local_irq_restore(flags);
 }
 
+static void trace_note_time_ext(struct blk_trace_ext *bt)
+{
+	struct timespec64 now;
+	unsigned long flags;
+	u32 words[2];
+
+	/* need to check user space to see if this breaks in y2038 or y2106 */
+	ktime_get_real_ts64(&now);
+	words[0] = (u32)now.tv_sec;
+	words[1] = now.tv_nsec;
+
+	local_irq_save(flags);
+	trace_note_ext(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0, 0);
+	local_irq_restore(flags);
+}
+
 void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
 	const char *fmt, ...)
 {
@@ -185,6 +261,43 @@  void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
 }
 EXPORT_SYMBOL_GPL(__trace_note_message);
 
+void __trace_note_message_ext(struct blk_trace_ext *bt, struct blkcg *blkcg,
+	const char *fmt, ...)
+{
+	int n;
+	va_list args;
+	unsigned long flags;
+	char *buf;
+
+	if (unlikely(bt->trace_state != Blktrace_running &&
+		     !blk_tracer_enabled))
+		return;
+
+	/*
+	 * If the BLK_TC_NOTIFY action mask isn't set, don't send any note
+	 * message to the trace.
+	 */
+	if (!(bt->act_mask & BLK_TC_NOTIFY))
+		return;
+
+	local_irq_save(flags);
+	buf = this_cpu_ptr(bt->msg_data);
+	va_start(args, fmt);
+	n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
+	va_end(args);
+
+	if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
+		blkcg = NULL;
+#ifdef CONFIG_BLK_CGROUP
+	trace_note_ext(bt, 0, BLK_TN_MESSAGE_EXT, buf, n,
+		blkcg ? cgroup_id(blkcg->css.cgroup) : 1, 0);
+#else
+	trace_note_ext(bt, 0, BLK_TN_MESSAGE_EXT, buf, n, 0, 0);
+#endif
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(__trace_note_message_ext);
+
 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
 			 pid_t pid)
 {