diff mbox series

[RFC] block: stop wait rcu once we can ensure no io while elevator init

Message ID 20210720063147.966670-1-yangerkun@huawei.com (mailing list archive)
State New, archived
Headers show
Series [RFC] block: stop wait rcu once we can ensure no io while elevator init | expand

Commit Message

yangerkun July 20, 2021, 6:31 a.m. UTC
'commit 737eb78e82d5 ("block: Delay default elevator initialization")'
delay elevator init to fix some problem for special device like SMR.
Also, the commit add the logic to ensure no IO can happened while
blk_mq_init_sched. However, blk_mq_freeze_queue/blk_mq_quiesce_queue
will add RCU Grace period which can lead some overhead(about 36 loop
device try to mount which each Grace period around 20ms).

For loop device, no io can happened while add_disk, so it's safe to skip
this step. Add flag QUEUE_FLAG_NO_INIT_IO to identify this case.

Signed-off-by: yangerkun <yangerkun@huawei.com>
---
 block/elevator.c       | 14 ++++++++++----
 drivers/block/loop.c   |  5 +++++
 include/linux/blkdev.h |  2 ++
 3 files changed, 17 insertions(+), 4 deletions(-)

Comments

yangerkun July 20, 2021, 2:32 p.m. UTC | #1
Test with latest kernel, this will trigger a warning:

[    5.155702] ------------[ cut here ]------------
[    5.156305] WARNING: CPU: 1 PID: 1 at block/mq-deadline-main.c:600 
dd_init_sched+0x207/0x240
[    5.157876] Modules linked in:
[    5.158270] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
5.14.0-rc2-00003-gec7ef05d2bff-dirty #104
[    5.159379] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 
04/01/2014
[    5.160999] RIP: 0010:dd_init_sched+0x207/0x240
[    5.161556] Code: 48 89 ef e8 2b 47 91 ff 48 83 05 1b 15 ef 0b 01 eb 
b6 48 83 05 e1 14 ef 0b 01 41 bd f4 ff ff ff eb de 48 83 05 99 14 ef 0b 
01 <0f> 0b 48 83 05 97 14 ef 0b 01 48 8f
[    5.163812] RSP: 0000:ffffc90000013d20 EFLAGS: 00010202
[    5.164467] RAX: ffff8881060dd200 RBX: ffff888112b18008 RCX: 
0000000000000004
[    5.165315] RDX: 0000000000000001 RSI: 0000000000000206 RDI: 
ffffffff8da3bf64
[    5.166160] RBP: ffffffff8a909c60 R08: ffff888103b67648 R09: 
0000000000000005
[    5.167014] R10: ffff888000000000 R11: 0000160000000000 R12: 
0000000000000001
[    5.167852] R13: ffffffff8a909c60 R14: ffff888104c629a0 R15: 
0000000000000000
[    5.168711] FS:  0000000000000000(0000) GS:ffff88813bc40000(0000) 
knlGS:0000000000000000
[    5.169684] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    5.170365] CR2: 0000000000000000 CR3: 000000000340a000 CR4: 
00000000000006e0
[    5.171204] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 
0000000000000000
[    5.172045] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 
0000000000000400
[    5.172883] Call Trace:
[    5.173183]  blk_mq_init_sched+0x1b3/0x480
[    5.173677]  elevator_init_mq+0x189/0x2b0
[    5.174156]  __device_add_disk+0x2ff/0x520
[    5.174645]  device_add_disk+0x17/0x20
[    5.175094]  loop_add+0x2e7/0x360
[    5.175490]  loop_init+0x135/0x160
[    5.176443]  ? max_loop_setup+0x28/0x28
[    5.176912]  do_one_initcall+0x71/0x330
[    5.177370]  kernel_init_freeable+0x3aa/0x44a
[    5.177900]  ? rest_init+0x150/0x150
[    5.178334]  kernel_init+0x26/0x230
[    5.178760]  ret_from_fork+0x1f/0x30
[    5.179193] ---[ end trace 669dc68f99fcdd5a ]---
[    5.195376] loop: module loaded


08a9ad8bf607388d768a341957d53eae64250c2d ("block/mq-deadline: Add cgroup 
support") add the follow logical:

static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
{
         struct deadline_data *dd;
         struct elevator_queue *eq;
         enum dd_prio prio;
         int ret = -ENOMEM;

         /*
          * Initialization would be very tricky if the queue is not frozen,
          * hence the warning statement below.
          */
         WARN_ON_ONCE(!percpu_ref_is_zero(&q->q_usage_counter)); <=== 
Trigger this






在 2021/7/20 14:31, yangerkun 写道:
> 'commit 737eb78e82d5 ("block: Delay default elevator initialization")'
> delay elevator init to fix some problem for special device like SMR.
> Also, the commit add the logic to ensure no IO can happened while
> blk_mq_init_sched. However, blk_mq_freeze_queue/blk_mq_quiesce_queue
> will add RCU Grace period which can lead some overhead(about 36 loop
> device try to mount which each Grace period around 20ms).
> 
> For loop device, no io can happened while add_disk, so it's safe to skip
> this step. Add flag QUEUE_FLAG_NO_INIT_IO to identify this case.
> 
> Signed-off-by: yangerkun <yangerkun@huawei.com>
> ---
>   block/elevator.c       | 14 ++++++++++----
>   drivers/block/loop.c   |  5 +++++
>   include/linux/blkdev.h |  2 ++
>   3 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/block/elevator.c b/block/elevator.c
> index 52ada14cfe45..ddf24afb999e 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -672,6 +672,7 @@ void elevator_init_mq(struct request_queue *q)
>   {
>   	struct elevator_type *e;
>   	int err;
> +	bool no_init_io;
>   
>   	if (!elv_support_iosched(q))
>   		return;
> @@ -688,13 +689,18 @@ void elevator_init_mq(struct request_queue *q)
>   	if (!e)
>   		return;
>   
> -	blk_mq_freeze_queue(q);
> -	blk_mq_quiesce_queue(q);
> +	no_init_io = blk_queue_no_init_io(q);
> +	if (!no_init_io) {
> +		blk_mq_freeze_queue(q);
> +		blk_mq_quiesce_queue(q);
> +	}
>   
>   	err = blk_mq_init_sched(q, e);
>   
> -	blk_mq_unquiesce_queue(q);
> -	blk_mq_unfreeze_queue(q);
> +	if (!no_init_io) {
> +		blk_mq_unquiesce_queue(q);
> +		blk_mq_unfreeze_queue(q);
> +	}
>   
>   	if (err) {
>   		pr_warn("\"%s\" elevator initialization failed, "
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index f37b9e3d833c..4667273bf071 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -2326,6 +2326,11 @@ static int loop_add(int i)
>   	disk->private_data	= lo;
>   	disk->queue		= lo->lo_queue;
>   	sprintf(disk->disk_name, "loop%d", i);
> +	/*
> +	 * There won't be io before add_disk, QUEUE_FLAG_NO_INIT_IO can help
> +	 * to save time while elevator_init_mq.
> +	 */
> +	blk_queue_flag_set(QUEUE_FLAG_NO_INIT_IO, lo->lo_queue);
>   	add_disk(disk);
>   	mutex_unlock(&loop_ctl_mutex);
>   	return i;
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 3177181c4326..b070c902d8c9 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -603,6 +603,7 @@ struct request_queue {
>   #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
>   #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
>   #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> +#define QUEUE_FLAG_NO_INIT_IO	30	/* no IO can happen while elevator_init_mq */
>   
>   #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
>   				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> @@ -649,6 +650,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
>   #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
>   #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
>   #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> +#define blk_queue_no_init_io(q)	test_bit(QUEUE_FLAG_NO_INIT_IO, &(q)->queue_flags)
>   
>   extern void blk_set_pm_only(struct request_queue *q);
>   extern void blk_clear_pm_only(struct request_queue *q);
>
diff mbox series

Patch

diff --git a/block/elevator.c b/block/elevator.c
index 52ada14cfe45..ddf24afb999e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -672,6 +672,7 @@  void elevator_init_mq(struct request_queue *q)
 {
 	struct elevator_type *e;
 	int err;
+	bool no_init_io;
 
 	if (!elv_support_iosched(q))
 		return;
@@ -688,13 +689,18 @@  void elevator_init_mq(struct request_queue *q)
 	if (!e)
 		return;
 
-	blk_mq_freeze_queue(q);
-	blk_mq_quiesce_queue(q);
+	no_init_io = blk_queue_no_init_io(q);
+	if (!no_init_io) {
+		blk_mq_freeze_queue(q);
+		blk_mq_quiesce_queue(q);
+	}
 
 	err = blk_mq_init_sched(q, e);
 
-	blk_mq_unquiesce_queue(q);
-	blk_mq_unfreeze_queue(q);
+	if (!no_init_io) {
+		blk_mq_unquiesce_queue(q);
+		blk_mq_unfreeze_queue(q);
+	}
 
 	if (err) {
 		pr_warn("\"%s\" elevator initialization failed, "
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f37b9e3d833c..4667273bf071 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2326,6 +2326,11 @@  static int loop_add(int i)
 	disk->private_data	= lo;
 	disk->queue		= lo->lo_queue;
 	sprintf(disk->disk_name, "loop%d", i);
+	/*
+	 * There won't be io before add_disk, QUEUE_FLAG_NO_INIT_IO can help
+	 * to save time while elevator_init_mq.
+	 */
+	blk_queue_flag_set(QUEUE_FLAG_NO_INIT_IO, lo->lo_queue);
 	add_disk(disk);
 	mutex_unlock(&loop_ctl_mutex);
 	return i;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3177181c4326..b070c902d8c9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -603,6 +603,7 @@  struct request_queue {
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
+#define QUEUE_FLAG_NO_INIT_IO	30	/* no IO can happen while elevator_init_mq */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP) |		\
@@ -649,6 +650,7 @@  bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
+#define blk_queue_no_init_io(q)	test_bit(QUEUE_FLAG_NO_INIT_IO, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);