diff mbox series

[v2,12/12] block: mq-deadline: Handle requeued requests correctly

Message ID 20230407235822.1672286-13-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Submit zoned writes in order | expand

Commit Message

Bart Van Assche April 7, 2023, 11:58 p.m. UTC
If a zoned write is requeued with an LBA that is lower than already
inserted zoned writes, make sure that it is submitted first.

Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/mq-deadline.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

Comments

Damien Le Moal April 10, 2023, 8:32 a.m. UTC | #1
On 4/8/23 08:58, Bart Van Assche wrote:
> If a zoned write is requeued with an LBA that is lower than already
> inserted zoned writes, make sure that it is submitted first.
> 
> Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: Mike Snitzer <snitzer@kernel.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/mq-deadline.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index d49e20d3011d..c536b499a60f 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -162,8 +162,19 @@ static void
>  deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
>  {
>  	struct rb_root *root = deadline_rb_root(per_prio, rq);
> +	struct request **next_rq = &per_prio->next_rq[rq_data_dir(rq)];
>  
>  	elv_rb_add(root, rq);
> +	if (*next_rq == NULL || !blk_queue_is_zoned(rq->q))
> +		return;

A blank line here would be nice.

> +	/*
> +	 * If a request got requeued or requests have been submitted out of
> +	 * order, make sure that per zone the request with the lowest LBA is

Requests being submitted out of order would be a bug in the issuer code. So I
would not even mention this here and focus on explaining that requeue may cause
seeing inserts outs of order.

> +	 * submitted first.
> +	 */
> +	if (blk_rq_pos(rq) < blk_rq_pos(*next_rq) &&
> +	    blk_rq_zone_no(rq) == blk_rq_zone_no(*next_rq))
> +		*next_rq = rq;
>  }
>  
>  static inline void
> @@ -822,6 +833,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
>  		list_add(&rq->queuelist, &per_prio->dispatch);
>  		rq->fifo_time = jiffies;
>  	} else {
> +		struct list_head *insert_before;
> +
>  		deadline_add_rq_rb(per_prio, rq);
>  
>  		if (rq_mergeable(rq)) {
> @@ -834,7 +847,18 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
>  		 * set expire time and add to fifo list
>  		 */
>  		rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
> -		list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
> +		insert_before = &per_prio->fifo_list[data_dir];
> +		if (blk_rq_is_seq_zoned_write(rq)) {
> +			const unsigned int zno = blk_rq_zone_no(rq);
> +			struct request *rq2 = rq;
> +
> +			while ((rq2 = deadline_earlier_request(rq2)) &&
> +			       blk_rq_zone_no(rq2) == zno &&
> +			       blk_rq_pos(rq2) > blk_rq_pos(rq)) {
> +				insert_before = &rq2->queuelist;
> +			}
> +		}
> +		list_add_tail(&rq->queuelist, insert_before);

This seems incorrect: the fifo list is ordered in arrival time, so always
queuing at the tail is the right thing to do. What I think you want to do here
is when we choose the next request to be the oldest (to implement aging), you
want to get the first request for the target zone of that oldest request. But
why do that on insert ? This should be in the dispatch code, coded in
deadline_fifo_request(), no ?
Bart Van Assche April 10, 2023, 5:31 p.m. UTC | #2
On 4/10/23 01:32, Damien Le Moal wrote:
> On 4/8/23 08:58, Bart Van Assche wrote:
>> @@ -834,7 +847,18 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
>>   		 * set expire time and add to fifo list
>>   		 */
>>   		rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
>> -		list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
>> +		insert_before = &per_prio->fifo_list[data_dir];
>> +		if (blk_rq_is_seq_zoned_write(rq)) {
>> +			const unsigned int zno = blk_rq_zone_no(rq);
>> +			struct request *rq2 = rq;
>> +
>> +			while ((rq2 = deadline_earlier_request(rq2)) &&
>> +			       blk_rq_zone_no(rq2) == zno &&
>> +			       blk_rq_pos(rq2) > blk_rq_pos(rq)) {
>> +				insert_before = &rq2->queuelist;
>> +			}
>> +		}
>> +		list_add_tail(&rq->queuelist, insert_before);
> 
> This seems incorrect: the fifo list is ordered in arrival time, so always
> queuing at the tail is the right thing to do. What I think you want to do here
> is when we choose the next request to be the oldest (to implement aging), you
> want to get the first request for the target zone of that oldest request. But
> why do that on insert ? This should be in the dispatch code, coded in
> deadline_fifo_request(), no ?

Hi Damien,

If the dispatch code would have to look up the zoned write with the 
lowest LBA then it would have to iterate over the entire FIFO list. The 
above loop will on average perform much less work. If no requeuing 
happens, the expression 'blk_rq_pos(rq2) > blk_rq_pos(rq)' will evaluate 
to false the first time it is evaluated and the loop body will never be 
executed.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index d49e20d3011d..c536b499a60f 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -162,8 +162,19 @@  static void
 deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
 {
 	struct rb_root *root = deadline_rb_root(per_prio, rq);
+	struct request **next_rq = &per_prio->next_rq[rq_data_dir(rq)];
 
 	elv_rb_add(root, rq);
+	if (*next_rq == NULL || !blk_queue_is_zoned(rq->q))
+		return;
+	/*
+	 * If a request got requeued or requests have been submitted out of
+	 * order, make sure that per zone the request with the lowest LBA is
+	 * submitted first.
+	 */
+	if (blk_rq_pos(rq) < blk_rq_pos(*next_rq) &&
+	    blk_rq_zone_no(rq) == blk_rq_zone_no(*next_rq))
+		*next_rq = rq;
 }
 
 static inline void
@@ -822,6 +833,8 @@  static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 		list_add(&rq->queuelist, &per_prio->dispatch);
 		rq->fifo_time = jiffies;
 	} else {
+		struct list_head *insert_before;
+
 		deadline_add_rq_rb(per_prio, rq);
 
 		if (rq_mergeable(rq)) {
@@ -834,7 +847,18 @@  static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 		 * set expire time and add to fifo list
 		 */
 		rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
-		list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
+		insert_before = &per_prio->fifo_list[data_dir];
+		if (blk_rq_is_seq_zoned_write(rq)) {
+			const unsigned int zno = blk_rq_zone_no(rq);
+			struct request *rq2 = rq;
+
+			while ((rq2 = deadline_earlier_request(rq2)) &&
+			       blk_rq_zone_no(rq2) == zno &&
+			       blk_rq_pos(rq2) > blk_rq_pos(rq)) {
+				insert_before = &rq2->queuelist;
+			}
+		}
+		list_add_tail(&rq->queuelist, insert_before);
 	}
 }