diff mbox series

[v9] io_uring: Statistics of the true utilization of sq threads.

Message ID 20240219064241.20531-1-xiaobing.li@samsung.com (mailing list archive)
State New
Headers show
Series [v9] io_uring: Statistics of the true utilization of sq threads. | expand

Commit Message

Xiaobing Li Feb. 19, 2024, 6:42 a.m. UTC
Count the running time and actual IO processing time of the sqpoll
thread, and output the statistical data to fdinfo.

Variable description:
"work_time" in the code represents the sum of the jiffies of the sq
thread actually processing IO, that is, how many milliseconds it
actually takes to process IO. "total_time" represents the total time
that the sq thread has elapsed from the beginning of the loop to the
current time point, that is, how many milliseconds it has spent in
total.

The test tool is fio, and its parameters are as follows:
[global]
ioengine=io_uring
direct=1
group_reporting
bs=128k
norandommap=1
randrepeat=0
refill_buffers
ramp_time=30s
time_based
runtime=1m
clocksource=clock_gettime
overwrite=1
log_avg_msec=1000
numjobs=1

[disk0]
filename=/dev/nvme0n1
rw=read
iodepth=16
hipri
sqthread_poll=1

The test results are as follows:
Every 2.0s: cat /proc/9230/fdinfo/6 | grep -E Sq
SqMask: 0x3
SqHead: 3197153
SqTail: 3197153
CachedSqHead:   3197153
SqThread:       9231
SqThreadCpu:    11
SqTotalTime:    18099614
SqWorkTime:     16748316

The test results corresponding to different iodepths are as follows:
|-----------|-------|-------|-------|------|-------|
|   iodepth |   1   |   4   |   8   |  16  |  64   |
|-----------|-------|-------|-------|------|-------|
|utilization| 2.9%  | 8.8%  | 10.9% | 92.9%| 84.4% |
|-----------|-------|-------|-------|------|-------|
|    idle   | 97.1% | 91.2% | 89.1% | 7.1% | 15.6% |
|-----------|-------|-------|-------|------|-------|

Signed-off-by: Xiaobing Li <xiaobing.li@samsung.com>

---

changes:
v9:
 - Modified the encoding format

v8:
 - Get the work time of the sqpoll thread through getrusage

v7:
 - Get the total time of the sqpoll thread through getrusage
 - The working time of the sqpoll thread is obtained through ktime_get()

v6:
 - Replace the percentages in the fdinfo output with the actual running
time and the time actually processing IO

v5:
 - list the changes in each iteration.

v4:
 - Resubmit the patch based on removing sq->lock

v3:
 - output actual working time as a percentage of total time
 - detailed description of the meaning of each variable
 - added test results

v2:
 - output the total statistical time and work time to fdinfo

v1:
 - initial version
 - Statistics of total time and work time
 
 io_uring/fdinfo.c |  7 +++++++
 io_uring/sqpoll.c | 17 ++++++++++++++++-
 io_uring/sqpoll.h |  1 +
 3 files changed, 24 insertions(+), 1 deletion(-)

Comments

Xiaobing Li Feb. 21, 2024, 2:04 a.m. UTC | #1
On 2/19/24 14:42, Xiaobing Li wrote:
>diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
>index 976e9500f651..37afc5bac279 100644
>--- a/io_uring/fdinfo.c
>+++ b/io_uring/fdinfo.c
>@@ -55,6 +55,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
> 	struct io_ring_ctx *ctx = f->private_data;
> 	struct io_overflow_cqe *ocqe;
> 	struct io_rings *r = ctx->rings;
>+	struct rusage sq_usage;
> 	unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
> 	unsigned int sq_head = READ_ONCE(r->sq.head);
> 	unsigned int sq_tail = READ_ONCE(r->sq.tail);
>@@ -64,6 +65,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
> 	unsigned int sq_shift = 0;
> 	unsigned int sq_entries, cq_entries;
> 	int sq_pid = -1, sq_cpu = -1;
>+	u64 sq_total_time = 0, sq_work_time = 0;
> 	bool has_lock;
> 	unsigned int i;
> 
>@@ -147,10 +149,15 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
> 
> 		sq_pid = sq->task_pid;
> 		sq_cpu = sq->sq_cpu;
>+		getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
>+		sq_total_time = sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec;
>+		sq_work_time = sq->work_time;
> 	}
> 
> 	seq_printf(m, "SqThread:\t%d\n", sq_pid);
> 	seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
>+	seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
>+	seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
> 	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
> 	for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
> 		struct file *f = io_file_from_index(&ctx->file_table, i);
>diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
>index 65b5dbe3c850..006d7fc9cf92 100644
>--- a/io_uring/sqpoll.c
>+++ b/io_uring/sqpoll.c
>@@ -219,10 +219,22 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
> 	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
> }
> 
>+static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
>+{
>+		struct rusage end;
>+
>+		getrusage(current, RUSAGE_SELF, &end);
>+		end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
>+		end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
>+
>+		sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
>+}
>+
> static int io_sq_thread(void *data)
> {
> 	struct io_sq_data *sqd = data;
> 	struct io_ring_ctx *ctx;
>+	struct rusage start;
> 	unsigned long timeout = 0;
> 	char buf[TASK_COMM_LEN];
> 	DEFINE_WAIT(wait);
>@@ -251,6 +263,7 @@ static int io_sq_thread(void *data)
> 		}
> 
> 		cap_entries = !list_is_singular(&sqd->ctx_list);
>+		getrusage(current, RUSAGE_SELF, &start);
> 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
> 			int ret = __io_sq_thread(ctx, cap_entries);
> 
>@@ -261,8 +274,10 @@ static int io_sq_thread(void *data)
> 			sqt_spin = true;
> 
> 		if (sqt_spin || !time_after(jiffies, timeout)) {
>-			if (sqt_spin)
>+			if (sqt_spin) {
>+				io_sq_update_worktime(sqd, &start);
> 				timeout = jiffies + sqd->sq_thread_idle;
>+			}
> 			if (unlikely(need_resched())) {
> 				mutex_unlock(&sqd->lock);
> 				cond_resched();
>diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
>index 8df37e8c9149..4171666b1cf4 100644
>--- a/io_uring/sqpoll.h
>+++ b/io_uring/sqpoll.h
>@@ -16,6 +16,7 @@ struct io_sq_data {
> 	pid_t			task_pid;
> 	pid_t			task_tgid;
> 
>+	u64			work_time;
> 	unsigned long		state;
> 	struct completion	exited;
> };
 
Hi, Jens
I have modified the code according to your suggestions.
Do you have any other comments?

--
Xiaobing Li
Jens Axboe Feb. 21, 2024, 5:28 p.m. UTC | #2
On 2/20/24 7:04 PM, Xiaobing Li wrote:
> On 2/19/24 14:42, Xiaobing Li wrote:
>> diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
>> index 976e9500f651..37afc5bac279 100644
>> --- a/io_uring/fdinfo.c
>> +++ b/io_uring/fdinfo.c
>> @@ -55,6 +55,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>> 	struct io_ring_ctx *ctx = f->private_data;
>> 	struct io_overflow_cqe *ocqe;
>> 	struct io_rings *r = ctx->rings;
>> +	struct rusage sq_usage;
>> 	unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
>> 	unsigned int sq_head = READ_ONCE(r->sq.head);
>> 	unsigned int sq_tail = READ_ONCE(r->sq.tail);
>> @@ -64,6 +65,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>> 	unsigned int sq_shift = 0;
>> 	unsigned int sq_entries, cq_entries;
>> 	int sq_pid = -1, sq_cpu = -1;
>> +	u64 sq_total_time = 0, sq_work_time = 0;
>> 	bool has_lock;
>> 	unsigned int i;
>>
>> @@ -147,10 +149,15 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>
>> 		sq_pid = sq->task_pid;
>> 		sq_cpu = sq->sq_cpu;
>> +		getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
>> +		sq_total_time = sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec;
>> +		sq_work_time = sq->work_time;
>> 	}
>>
>> 	seq_printf(m, "SqThread:\t%d\n", sq_pid);
>> 	seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
>> +	seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
>> +	seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
>> 	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
>> 	for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
>> 		struct file *f = io_file_from_index(&ctx->file_table, i);
>> diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
>> index 65b5dbe3c850..006d7fc9cf92 100644
>> --- a/io_uring/sqpoll.c
>> +++ b/io_uring/sqpoll.c
>> @@ -219,10 +219,22 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
>> 	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
>> }
>>
>> +static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
>> +{
>> +		struct rusage end;
>> +
>> +		getrusage(current, RUSAGE_SELF, &end);
>> +		end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
>> +		end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
>> +
>> +		sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
>> +}
>> +
>> static int io_sq_thread(void *data)
>> {
>> 	struct io_sq_data *sqd = data;
>> 	struct io_ring_ctx *ctx;
>> +	struct rusage start;
>> 	unsigned long timeout = 0;
>> 	char buf[TASK_COMM_LEN];
>> 	DEFINE_WAIT(wait);
>> @@ -251,6 +263,7 @@ static int io_sq_thread(void *data)
>> 		}
>>
>> 		cap_entries = !list_is_singular(&sqd->ctx_list);
>> +		getrusage(current, RUSAGE_SELF, &start);
>> 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
>> 			int ret = __io_sq_thread(ctx, cap_entries);
>>
>> @@ -261,8 +274,10 @@ static int io_sq_thread(void *data)
>> 			sqt_spin = true;
>>
>> 		if (sqt_spin || !time_after(jiffies, timeout)) {
>> -			if (sqt_spin)
>> +			if (sqt_spin) {
>> +				io_sq_update_worktime(sqd, &start);
>> 				timeout = jiffies + sqd->sq_thread_idle;
>> +			}
>> 			if (unlikely(need_resched())) {
>> 				mutex_unlock(&sqd->lock);
>> 				cond_resched();
>> diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
>> index 8df37e8c9149..4171666b1cf4 100644
>> --- a/io_uring/sqpoll.h
>> +++ b/io_uring/sqpoll.h
>> @@ -16,6 +16,7 @@ struct io_sq_data {
>> 	pid_t			task_pid;
>> 	pid_t			task_tgid;
>>
>> +	u64			work_time;
>> 	unsigned long		state;
>> 	struct completion	exited;
>> };
>  
> Hi, Jens
> I have modified the code according to your suggestions.
> Do you have any other comments?

Out of town this week, I'll check next week. But from a quick look,
looks much better now.
Xiaobing Li Feb. 27, 2024, 5:45 a.m. UTC | #3
On 2/21/24 10:28, Jens Axboe wrote:
>On 2/20/24 7:04 PM, Xiaobing Li wrote:
>> On 2/19/24 14:42, Xiaobing Li wrote:
>>> diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
>>> index 976e9500f651..37afc5bac279 100644
>>> --- a/io_uring/fdinfo.c
>>> +++ b/io_uring/fdinfo.c
>>> @@ -55,6 +55,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>> 	struct io_ring_ctx *ctx = f->private_data;
>>> 	struct io_overflow_cqe *ocqe;
>>> 	struct io_rings *r = ctx->rings;
>>> +	struct rusage sq_usage;
>>> 	unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
>>> 	unsigned int sq_head = READ_ONCE(r->sq.head);
>>> 	unsigned int sq_tail = READ_ONCE(r->sq.tail);
>>> @@ -64,6 +65,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>> 	unsigned int sq_shift = 0;
>>> 	unsigned int sq_entries, cq_entries;
>>> 	int sq_pid = -1, sq_cpu = -1;
>>> +	u64 sq_total_time = 0, sq_work_time = 0;
>>> 	bool has_lock;
>>> 	unsigned int i;
>>>
>>> @@ -147,10 +149,15 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>>
>>> 		sq_pid = sq->task_pid;
>>> 		sq_cpu = sq->sq_cpu;
>>> +		getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
>>> +		sq_total_time = sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec;
>>> +		sq_work_time = sq->work_time;
>>> 	}
>>>
>>> 	seq_printf(m, "SqThread:\t%d\n", sq_pid);
>>> 	seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
>>> +	seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
>>> +	seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
>>> 	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
>>> 	for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
>>> 		struct file *f = io_file_from_index(&ctx->file_table, i);
>>> diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
>>> index 65b5dbe3c850..006d7fc9cf92 100644
>>> --- a/io_uring/sqpoll.c
>>> +++ b/io_uring/sqpoll.c
>>> @@ -219,10 +219,22 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
>>> 	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
>>> }
>>>
>>> +static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
>>> +{
>>> +		struct rusage end;
>>> +
>>> +		getrusage(current, RUSAGE_SELF, &end);
>>> +		end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
>>> +		end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
>>> +
>>> +		sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
>>> +}
>>> +
>>> static int io_sq_thread(void *data)
>>> {
>>> 	struct io_sq_data *sqd = data;
>>> 	struct io_ring_ctx *ctx;
>>> +	struct rusage start;
>>> 	unsigned long timeout = 0;
>>> 	char buf[TASK_COMM_LEN];
>>> 	DEFINE_WAIT(wait);
>>> @@ -251,6 +263,7 @@ static int io_sq_thread(void *data)
>>> 		}
>>>
>>> 		cap_entries = !list_is_singular(&sqd->ctx_list);
>>> +		getrusage(current, RUSAGE_SELF, &start);
>>> 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
>>> 			int ret = __io_sq_thread(ctx, cap_entries);
>>>
>>> @@ -261,8 +274,10 @@ static int io_sq_thread(void *data)
>>> 			sqt_spin = true;
>>>
>>> 		if (sqt_spin || !time_after(jiffies, timeout)) {
>>> -			if (sqt_spin)
>>> +			if (sqt_spin) {
>>> +				io_sq_update_worktime(sqd, &start);
>>> 				timeout = jiffies + sqd->sq_thread_idle;
>>> +			}
>>> 			if (unlikely(need_resched())) {
>>> 				mutex_unlock(&sqd->lock);
>>> 				cond_resched();
>>> diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
>>> index 8df37e8c9149..4171666b1cf4 100644
>>> --- a/io_uring/sqpoll.h
>>> +++ b/io_uring/sqpoll.h
>>> @@ -16,6 +16,7 @@ struct io_sq_data {
>>> 	pid_t			task_pid;
>>> 	pid_t			task_tgid;
>>>
>>> +	u64			work_time;
>>> 	unsigned long		state;
>>> 	struct completion	exited;
>>> };
>>  
>> Hi, Jens
>> I have modified the code according to your suggestions.
>> Do you have any other comments?
>
>Out of town this week, I'll check next week. But from a quick look,
>looks much better now.
 
Hi, Jens
Do you have time to check now?

--
Xiaobing Li
Jens Axboe Feb. 27, 2024, 2:36 p.m. UTC | #4
On 2/26/24 10:45 PM, Xiaobing Li wrote:
> On 2/21/24 10:28, Jens Axboe wrote:
>> On 2/20/24 7:04 PM, Xiaobing Li wrote:
>>> On 2/19/24 14:42, Xiaobing Li wrote:
>>>> diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
>>>> index 976e9500f651..37afc5bac279 100644
>>>> --- a/io_uring/fdinfo.c
>>>> +++ b/io_uring/fdinfo.c
>>>> @@ -55,6 +55,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>>> 	struct io_ring_ctx *ctx = f->private_data;
>>>> 	struct io_overflow_cqe *ocqe;
>>>> 	struct io_rings *r = ctx->rings;
>>>> +	struct rusage sq_usage;
>>>> 	unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
>>>> 	unsigned int sq_head = READ_ONCE(r->sq.head);
>>>> 	unsigned int sq_tail = READ_ONCE(r->sq.tail);
>>>> @@ -64,6 +65,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>>> 	unsigned int sq_shift = 0;
>>>> 	unsigned int sq_entries, cq_entries;
>>>> 	int sq_pid = -1, sq_cpu = -1;
>>>> +	u64 sq_total_time = 0, sq_work_time = 0;
>>>> 	bool has_lock;
>>>> 	unsigned int i;
>>>>
>>>> @@ -147,10 +149,15 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>>>
>>>> 		sq_pid = sq->task_pid;
>>>> 		sq_cpu = sq->sq_cpu;
>>>> +		getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
>>>> +		sq_total_time = sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec;
>>>> +		sq_work_time = sq->work_time;
>>>> 	}
>>>>
>>>> 	seq_printf(m, "SqThread:\t%d\n", sq_pid);
>>>> 	seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
>>>> +	seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
>>>> +	seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
>>>> 	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
>>>> 	for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
>>>> 		struct file *f = io_file_from_index(&ctx->file_table, i);
>>>> diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
>>>> index 65b5dbe3c850..006d7fc9cf92 100644
>>>> --- a/io_uring/sqpoll.c
>>>> +++ b/io_uring/sqpoll.c
>>>> @@ -219,10 +219,22 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
>>>> 	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
>>>> }
>>>>
>>>> +static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
>>>> +{
>>>> +		struct rusage end;
>>>> +
>>>> +		getrusage(current, RUSAGE_SELF, &end);
>>>> +		end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
>>>> +		end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
>>>> +
>>>> +		sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
>>>> +}
>>>> +
>>>> static int io_sq_thread(void *data)
>>>> {
>>>> 	struct io_sq_data *sqd = data;
>>>> 	struct io_ring_ctx *ctx;
>>>> +	struct rusage start;
>>>> 	unsigned long timeout = 0;
>>>> 	char buf[TASK_COMM_LEN];
>>>> 	DEFINE_WAIT(wait);
>>>> @@ -251,6 +263,7 @@ static int io_sq_thread(void *data)
>>>> 		}
>>>>
>>>> 		cap_entries = !list_is_singular(&sqd->ctx_list);
>>>> +		getrusage(current, RUSAGE_SELF, &start);
>>>> 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
>>>> 			int ret = __io_sq_thread(ctx, cap_entries);
>>>>
>>>> @@ -261,8 +274,10 @@ static int io_sq_thread(void *data)
>>>> 			sqt_spin = true;
>>>>
>>>> 		if (sqt_spin || !time_after(jiffies, timeout)) {
>>>> -			if (sqt_spin)
>>>> +			if (sqt_spin) {
>>>> +				io_sq_update_worktime(sqd, &start);
>>>> 				timeout = jiffies + sqd->sq_thread_idle;
>>>> +			}
>>>> 			if (unlikely(need_resched())) {
>>>> 				mutex_unlock(&sqd->lock);
>>>> 				cond_resched();
>>>> diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
>>>> index 8df37e8c9149..4171666b1cf4 100644
>>>> --- a/io_uring/sqpoll.h
>>>> +++ b/io_uring/sqpoll.h
>>>> @@ -16,6 +16,7 @@ struct io_sq_data {
>>>> 	pid_t			task_pid;
>>>> 	pid_t			task_tgid;
>>>>
>>>> +	u64			work_time;
>>>> 	unsigned long		state;
>>>> 	struct completion	exited;
>>>> };
>>>  
>>> Hi, Jens
>>> I have modified the code according to your suggestions.
>>> Do you have any other comments?
>>
>> Out of town this week, I'll check next week. But from a quick look,
>> looks much better now.
>  
> Hi, Jens
> Do you have time to check now?

Can I ask you to resend it against for-6.9/io_uring? For some reason I
don't see the original patch on the list.
Xiaobing Li Feb. 28, 2024, 6:30 a.m. UTC | #5
On 2/27/24 14:36, Jens Axboe wrote:
>On 2/26/24 10:45 PM, Xiaobing Li wrote:
>> On 2/21/24 10:28, Jens Axboe wrote:
>>> On 2/20/24 7:04 PM, Xiaobing Li wrote:
>>>> On 2/19/24 14:42, Xiaobing Li wrote:
>>>>> diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
>>>>> index 976e9500f651..37afc5bac279 100644
>>>>> --- a/io_uring/fdinfo.c
>>>>> +++ b/io_uring/fdinfo.c
>>>>> @@ -55,6 +55,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>>>> 	struct io_ring_ctx *ctx = f->private_data;
>>>>> 	struct io_overflow_cqe *ocqe;
>>>>> 	struct io_rings *r = ctx->rings;
>>>>> +	struct rusage sq_usage;
>>>>> 	unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
>>>>> 	unsigned int sq_head = READ_ONCE(r->sq.head);
>>>>> 	unsigned int sq_tail = READ_ONCE(r->sq.tail);
>>>>> @@ -64,6 +65,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>>>> 	unsigned int sq_shift = 0;
>>>>> 	unsigned int sq_entries, cq_entries;
>>>>> 	int sq_pid = -1, sq_cpu = -1;
>>>>> +	u64 sq_total_time = 0, sq_work_time = 0;
>>>>> 	bool has_lock;
>>>>> 	unsigned int i;
>>>>>
>>>>> @@ -147,10 +149,15 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
>>>>>
>>>>> 		sq_pid = sq->task_pid;
>>>>> 		sq_cpu = sq->sq_cpu;
>>>>> +		getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
>>>>> +		sq_total_time = sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec;
>>>>> +		sq_work_time = sq->work_time;
>>>>> 	}
>>>>>
>>>>> 	seq_printf(m, "SqThread:\t%d\n", sq_pid);
>>>>> 	seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
>>>>> +	seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
>>>>> +	seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
>>>>> 	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
>>>>> 	for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
>>>>> 		struct file *f = io_file_from_index(&ctx->file_table, i);
>>>>> diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
>>>>> index 65b5dbe3c850..006d7fc9cf92 100644
>>>>> --- a/io_uring/sqpoll.c
>>>>> +++ b/io_uring/sqpoll.c
>>>>> @@ -219,10 +219,22 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
>>>>> 	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
>>>>> }
>>>>>
>>>>> +static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
>>>>> +{
>>>>> +		struct rusage end;
>>>>> +
>>>>> +		getrusage(current, RUSAGE_SELF, &end);
>>>>> +		end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
>>>>> +		end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
>>>>> +
>>>>> +		sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
>>>>> +}
>>>>> +
>>>>> static int io_sq_thread(void *data)
>>>>> {
>>>>> 	struct io_sq_data *sqd = data;
>>>>> 	struct io_ring_ctx *ctx;
>>>>> +	struct rusage start;
>>>>> 	unsigned long timeout = 0;
>>>>> 	char buf[TASK_COMM_LEN];
>>>>> 	DEFINE_WAIT(wait);
>>>>> @@ -251,6 +263,7 @@ static int io_sq_thread(void *data)
>>>>> 		}
>>>>>
>>>>> 		cap_entries = !list_is_singular(&sqd->ctx_list);
>>>>> +		getrusage(current, RUSAGE_SELF, &start);
>>>>> 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
>>>>> 			int ret = __io_sq_thread(ctx, cap_entries);
>>>>>
>>>>> @@ -261,8 +274,10 @@ static int io_sq_thread(void *data)
>>>>> 			sqt_spin = true;
>>>>>
>>>>> 		if (sqt_spin || !time_after(jiffies, timeout)) {
>>>>> -			if (sqt_spin)
>>>>> +			if (sqt_spin) {
>>>>> +				io_sq_update_worktime(sqd, &start);
>>>>> 				timeout = jiffies + sqd->sq_thread_idle;
>>>>> +			}
>>>>> 			if (unlikely(need_resched())) {
>>>>> 				mutex_unlock(&sqd->lock);
>>>>> 				cond_resched();
>>>>> diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
>>>>> index 8df37e8c9149..4171666b1cf4 100644
>>>>> --- a/io_uring/sqpoll.h
>>>>> +++ b/io_uring/sqpoll.h
>>>>> @@ -16,6 +16,7 @@ struct io_sq_data {
>>>>> 	pid_t			task_pid;
>>>>> 	pid_t			task_tgid;
>>>>>
>>>>> +	u64			work_time;
>>>>> 	unsigned long		state;
>>>>> 	struct completion	exited;
>>>>> };
>>>>  
>>>> Hi, Jens
>>>> I have modified the code according to your suggestions.
>>>> Do you have any other comments?
>>>
>>> Out of town this week, I'll check next week. But from a quick look,
>>> looks much better now.
>>  
>> Hi, Jens
>> Do you have time to check now?
>
>Can I ask you to resend it against for-6.9/io_uring? For some reason I
>don't see the original patch on the list.
 
ok, I'll resend a v10.

--
Xiaobing Li
diff mbox series

Patch

diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 976e9500f651..37afc5bac279 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -55,6 +55,7 @@  __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
 	struct io_ring_ctx *ctx = f->private_data;
 	struct io_overflow_cqe *ocqe;
 	struct io_rings *r = ctx->rings;
+	struct rusage sq_usage;
 	unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
 	unsigned int sq_head = READ_ONCE(r->sq.head);
 	unsigned int sq_tail = READ_ONCE(r->sq.tail);
@@ -64,6 +65,7 @@  __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
 	unsigned int sq_shift = 0;
 	unsigned int sq_entries, cq_entries;
 	int sq_pid = -1, sq_cpu = -1;
+	u64 sq_total_time = 0, sq_work_time = 0;
 	bool has_lock;
 	unsigned int i;
 
@@ -147,10 +149,15 @@  __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
 
 		sq_pid = sq->task_pid;
 		sq_cpu = sq->sq_cpu;
+		getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
+		sq_total_time = sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec;
+		sq_work_time = sq->work_time;
 	}
 
 	seq_printf(m, "SqThread:\t%d\n", sq_pid);
 	seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
+	seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
+	seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
 	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
 	for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
 		struct file *f = io_file_from_index(&ctx->file_table, i);
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 65b5dbe3c850..006d7fc9cf92 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -219,10 +219,22 @@  static bool io_sqd_handle_event(struct io_sq_data *sqd)
 	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
 }
 
+static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
+{
+		struct rusage end;
+
+		getrusage(current, RUSAGE_SELF, &end);
+		end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
+		end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
+
+		sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
+}
+
 static int io_sq_thread(void *data)
 {
 	struct io_sq_data *sqd = data;
 	struct io_ring_ctx *ctx;
+	struct rusage start;
 	unsigned long timeout = 0;
 	char buf[TASK_COMM_LEN];
 	DEFINE_WAIT(wait);
@@ -251,6 +263,7 @@  static int io_sq_thread(void *data)
 		}
 
 		cap_entries = !list_is_singular(&sqd->ctx_list);
+		getrusage(current, RUSAGE_SELF, &start);
 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
 			int ret = __io_sq_thread(ctx, cap_entries);
 
@@ -261,8 +274,10 @@  static int io_sq_thread(void *data)
 			sqt_spin = true;
 
 		if (sqt_spin || !time_after(jiffies, timeout)) {
-			if (sqt_spin)
+			if (sqt_spin) {
+				io_sq_update_worktime(sqd, &start);
 				timeout = jiffies + sqd->sq_thread_idle;
+			}
 			if (unlikely(need_resched())) {
 				mutex_unlock(&sqd->lock);
 				cond_resched();
diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
index 8df37e8c9149..4171666b1cf4 100644
--- a/io_uring/sqpoll.h
+++ b/io_uring/sqpoll.h
@@ -16,6 +16,7 @@  struct io_sq_data {
 	pid_t			task_pid;
 	pid_t			task_tgid;
 
+	u64			work_time;
 	unsigned long		state;
 	struct completion	exited;
 };