diff mbox series

[-next] blk-wbt: call rq_qos_add() after wb_normal is initialized

Message ID 20220913105749.3086243-1-yukuai1@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series [-next] blk-wbt: call rq_qos_add() after wb_normal is initialized | expand

Commit Message

Yu Kuai Sept. 13, 2022, 10:57 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

Our test found a problem that wbt inflight counter is negative, which
will cause io hang(noted that this problem doesn't exist in mainline):

t1: device create	t2: issue io
add_disk
 blk_register_queue
  wbt_enable_default
   wbt_init
    rq_qos_add
    // wb_normal is still 0
			/*
			 * in mainline, disk can't be opened before
			 * bdev_add(), however, in old kernels, disk
			 * can be opened before blk_register_queue().
			 */
			blkdev_issue_flush
                        // disk size is 0, however, it's not checked
                         submit_bio_wait
                          submit_bio
                           blk_mq_submit_bio
                            rq_qos_throttle
                             wbt_wait
			      bio_to_wbt_flags
                               rwb_enabled
			       // wb_normal is 0, inflight is not increased

    wbt_queue_depth_changed(&rwb->rqos);
     wbt_update_limits
     // wb_normal is initialized
                            rq_qos_track
                             wbt_track
                              rq->wbt_flags |= bio_to_wbt_flags(rwb, bio);
			      // wb_normal is not 0,wbt_flags will be set
t3: io completion
blk_mq_free_request
 rq_qos_done
  wbt_done
   wbt_is_tracked
   // return true
   __wbt_done
    wbt_rqw_done
     atomic_dec_return(&rqw->inflight);
     // inflight is decreased

commit 8235b5c1e8c1 ("block: call bdev_add later in device_add_disk") can
avoid this problem, however it's better to fix this problem in wbt:

1) Lower kernel can't backport this patch due to lots of refactor.
2) Root cause is that wbt call rq_qos_add() before wb_normal is
initialized.

Fixes: e34cbd307477 ("blk-wbt: add general throttling mechanism")
Cc: <stable@vger.kernel.org>
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-wbt.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

Comments

Yu Kuai Sept. 21, 2022, 9:45 a.m. UTC | #1
在 2022/09/13 18:57, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Our test found a problem that wbt inflight counter is negative, which
> will cause io hang(noted that this problem doesn't exist in mainline):
> 
> t1: device create	t2: issue io
> add_disk
>   blk_register_queue
>    wbt_enable_default
>     wbt_init
>      rq_qos_add
>      // wb_normal is still 0
> 			/*
> 			 * in mainline, disk can't be opened before
> 			 * bdev_add(), however, in old kernels, disk
> 			 * can be opened before blk_register_queue().
> 			 */
> 			blkdev_issue_flush
>                          // disk size is 0, however, it's not checked
>                           submit_bio_wait
>                            submit_bio
>                             blk_mq_submit_bio
>                              rq_qos_throttle
>                               wbt_wait
> 			      bio_to_wbt_flags
>                                 rwb_enabled
> 			       // wb_normal is 0, inflight is not increased
> 
>      wbt_queue_depth_changed(&rwb->rqos);
>       wbt_update_limits
>       // wb_normal is initialized
>                              rq_qos_track
>                               wbt_track
>                                rq->wbt_flags |= bio_to_wbt_flags(rwb, bio);
> 			      // wb_normal is not 0,wbt_flags will be set
> t3: io completion
> blk_mq_free_request
>   rq_qos_done
>    wbt_done
>     wbt_is_tracked
>     // return true
>     __wbt_done
>      wbt_rqw_done
>       atomic_dec_return(&rqw->inflight);
>       // inflight is decreased
> 
> commit 8235b5c1e8c1 ("block: call bdev_add later in device_add_disk") can
> avoid this problem, however it's better to fix this problem in wbt:
> 
> 1) Lower kernel can't backport this patch due to lots of refactor.
> 2) Root cause is that wbt call rq_qos_add() before wb_normal is
> initialized.
> 
friendly ping ...
> Fixes: e34cbd307477 ("blk-wbt: add general throttling mechanism")
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>   block/blk-wbt.c | 9 ++++-----
>   1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index a9982000b667..246467926253 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -843,6 +843,10 @@ int wbt_init(struct request_queue *q)
>   	rwb->enable_state = WBT_STATE_ON_DEFAULT;
>   	rwb->wc = 1;
>   	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
> +	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
> +
> +	wbt_queue_depth_changed(&rwb->rqos);
> +	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
>   
>   	/*
>   	 * Assign rwb and add the stats callback.
> @@ -853,11 +857,6 @@ int wbt_init(struct request_queue *q)
>   
>   	blk_stat_add_callback(q, rwb->cb);
>   
> -	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
> -
> -	wbt_queue_depth_changed(&rwb->rqos);
> -	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
> -
>   	return 0;
>   
>   err_free:
>
Jens Axboe Sept. 21, 2022, 2:37 p.m. UTC | #2
On Tue, 13 Sep 2022 18:57:49 +0800, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Our test found a problem that wbt inflight counter is negative, which
> will cause io hang(noted that this problem doesn't exist in mainline):
> 
> t1: device create	t2: issue io
> add_disk
>  blk_register_queue
>   wbt_enable_default
>    wbt_init
>     rq_qos_add
>     // wb_normal is still 0
> 			/*
> 			 * in mainline, disk can't be opened before
> 			 * bdev_add(), however, in old kernels, disk
> 			 * can be opened before blk_register_queue().
> 			 */
> 			blkdev_issue_flush
>                         // disk size is 0, however, it's not checked
>                          submit_bio_wait
>                           submit_bio
>                            blk_mq_submit_bio
>                             rq_qos_throttle
>                              wbt_wait
> 			      bio_to_wbt_flags
>                                rwb_enabled
> 			       // wb_normal is 0, inflight is not increased
> 
> [...]

Applied, thanks!

[1/1] blk-wbt: call rq_qos_add() after wb_normal is initialized
      commit: 8c5035dfbb9475b67c82b3fdb7351236525bf52b

Best regards,
diff mbox series

Patch

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index a9982000b667..246467926253 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -843,6 +843,10 @@  int wbt_init(struct request_queue *q)
 	rwb->enable_state = WBT_STATE_ON_DEFAULT;
 	rwb->wc = 1;
 	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
+	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
+
+	wbt_queue_depth_changed(&rwb->rqos);
+	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
 
 	/*
 	 * Assign rwb and add the stats callback.
@@ -853,11 +857,6 @@  int wbt_init(struct request_queue *q)
 
 	blk_stat_add_callback(q, rwb->cb);
 
-	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
-
-	wbt_queue_depth_changed(&rwb->rqos);
-	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
-
 	return 0;
 
 err_free: