diff mbox series

[v2,11/12] block: mq-deadline: Fix a race condition related to zoned writes

Message ID 20230407235822.1672286-12-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Submit zoned writes in order | expand

Commit Message

Bart Van Assche April 7, 2023, 11:58 p.m. UTC
Let deadline_next_request() only consider the first zoned write per
zone. This patch fixes a race condition between deadline_next_request()
and completion of zoned writes.

Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/mq-deadline.c    | 24 +++++++++++++++++++++---
 include/linux/blk-mq.h |  5 +++++
 2 files changed, 26 insertions(+), 3 deletions(-)

Comments

Damien Le Moal April 10, 2023, 8:16 a.m. UTC | #1
On 4/8/23 08:58, Bart Van Assche wrote:
> Let deadline_next_request() only consider the first zoned write per
> zone. This patch fixes a race condition between deadline_next_request()
> and completion of zoned writes.
> 
> Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: Mike Snitzer <snitzer@kernel.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/mq-deadline.c    | 24 +++++++++++++++++++++---
>  include/linux/blk-mq.h |  5 +++++
>  2 files changed, 26 insertions(+), 3 deletions(-)
> 
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index 8c2bc9fdcf8c..d49e20d3011d 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -389,12 +389,30 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
>  	 */
>  	spin_lock_irqsave(&dd->zone_lock, flags);
>  	while (rq) {
> +		unsigned int zno = blk_rq_zone_no(rq);
> +
>  		if (blk_req_can_dispatch_to_zone(rq))
>  			break;
> -		if (blk_queue_nonrot(q))
> -			rq = deadline_latter_request(rq);
> -		else
> +
> +		WARN_ON_ONCE(!blk_queue_is_zoned(q));

I do not think this WARN is useful as blk_req_can_dispatch_to_zone() will always
return true for regular block devices.

> +
> +		if (!blk_queue_nonrot(q)) {
>  			rq = deadline_skip_seq_writes(dd, rq);
> +			if (!rq)
> +				break;
> +			rq = deadline_earlier_request(rq);
> +			if (WARN_ON_ONCE(!rq))
> +				break;

I do not understand why this is needed.

> +		}
> +
> +		/*
> +		 * Skip all other write requests for the zone with zone number
> +		 * 'zno'. This prevents that this function selects a zoned write
> +		 * that is not the first write for a given zone.
> +		 */
> +		while ((rq = deadline_latter_request(rq)) &&
> +		       blk_rq_zone_no(rq) == zno)
> +			;
>  	}
>  	spin_unlock_irqrestore(&dd->zone_lock, flags);
>  
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index e62feb17af96..515dfd04d736 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -1193,6 +1193,11 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
>  	return !blk_req_zone_is_write_locked(rq);
>  }
>  #else /* CONFIG_BLK_DEV_ZONED */
> +static inline unsigned int blk_rq_zone_no(struct request *rq)
> +{
> +	return 0;
> +}
> +
>  static inline bool blk_req_needs_zone_write_lock(struct request *rq)
>  {
>  	return false;
Bart Van Assche April 10, 2023, 5:23 p.m. UTC | #2
On 4/10/23 01:16, Damien Le Moal wrote:
> On 4/8/23 08:58, Bart Van Assche wrote:
>> +		WARN_ON_ONCE(!blk_queue_is_zoned(q));
> 
> I do not think this WARN is useful as blk_req_can_dispatch_to_zone() will always
> return true for regular block devices.

Hi Damien,

I will leave it out.

Are you OK with adding a blk_rq_zone_no() definition if 
CONFIG_BLK_DEV_ZONED is not defined (as has been done in this patch) or 
do you perhaps prefer that I surround the code blocks that have been 
added by this patch and in which blk_rq_zone_no() is called by #ifdef 
CONFIG_BLK_DEV_ZONED / #endif?

>> +
>> +		if (!blk_queue_nonrot(q)) {
>>   			rq = deadline_skip_seq_writes(dd, rq);
>> +			if (!rq)
>> +				break;
>> +			rq = deadline_earlier_request(rq);
>> +			if (WARN_ON_ONCE(!rq))
>> +				break;
> 
> I do not understand why this is needed.

Are you perhaps referring to the deadline_earlier_request() call? The 
while loop below advances 'rq' at least to the next request. The 
deadline_earlier_request() call compensates for this by going back to 
the previous request.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 8c2bc9fdcf8c..d49e20d3011d 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -389,12 +389,30 @@  deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 	 */
 	spin_lock_irqsave(&dd->zone_lock, flags);
 	while (rq) {
+		unsigned int zno = blk_rq_zone_no(rq);
+
 		if (blk_req_can_dispatch_to_zone(rq))
 			break;
-		if (blk_queue_nonrot(q))
-			rq = deadline_latter_request(rq);
-		else
+
+		WARN_ON_ONCE(!blk_queue_is_zoned(q));
+
+		if (!blk_queue_nonrot(q)) {
 			rq = deadline_skip_seq_writes(dd, rq);
+			if (!rq)
+				break;
+			rq = deadline_earlier_request(rq);
+			if (WARN_ON_ONCE(!rq))
+				break;
+		}
+
+		/*
+		 * Skip all other write requests for the zone with zone number
+		 * 'zno'. This prevents that this function selects a zoned write
+		 * that is not the first write for a given zone.
+		 */
+		while ((rq = deadline_latter_request(rq)) &&
+		       blk_rq_zone_no(rq) == zno)
+			;
 	}
 	spin_unlock_irqrestore(&dd->zone_lock, flags);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e62feb17af96..515dfd04d736 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1193,6 +1193,11 @@  static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
 	return !blk_req_zone_is_write_locked(rq);
 }
 #else /* CONFIG_BLK_DEV_ZONED */
+static inline unsigned int blk_rq_zone_no(struct request *rq)
+{
+	return 0;
+}
+
 static inline bool blk_req_needs_zone_write_lock(struct request *rq)
 {
 	return false;