diff mbox series

[v7,2/7] block/mq-deadline: Only use zone locking if necessary

Message ID 20230809202355.1171455-3-bvanassche@acm.org (mailing list archive)
State Superseded
Headers show
Series Improve performance for zoned UFS devices | expand

Commit Message

Bart Van Assche Aug. 9, 2023, 8:23 p.m. UTC
Measurements have shown that limiting the queue depth to one per zone for
zoned writes has a significant negative performance impact on zoned UFS
devices. Hence this patch that disables zone locking by the mq-deadline
scheduler if the storage controller preserves the command order. This
patch is based on the following assumptions:
- It happens infrequently that zoned write requests are reordered by the
  block layer.
- The I/O priority of all write requests is the same per zone.
- Either no I/O scheduler is used or an I/O scheduler is used that
  serializes write requests per zone.

Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/mq-deadline.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

Comments

Damien Le Moal Aug. 10, 2023, 1:36 a.m. UTC | #1
On 8/10/23 05:23, Bart Van Assche wrote:
> Measurements have shown that limiting the queue depth to one per zone for
> zoned writes has a significant negative performance impact on zoned UFS
> devices. Hence this patch that disables zone locking by the mq-deadline
> scheduler if the storage controller preserves the command order. This
> patch is based on the following assumptions:
> - It happens infrequently that zoned write requests are reordered by the
>   block layer.
> - The I/O priority of all write requests is the same per zone.
> - Either no I/O scheduler is used or an I/O scheduler is used that
>   serializes write requests per zone.
> 
> Cc: Damien Le Moal <dlemoal@kernel.org>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/mq-deadline.c | 24 ++++++++++++++++++------
>  1 file changed, 18 insertions(+), 6 deletions(-)
> 
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index f958e79277b8..cd2504205ff8 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -338,6 +338,16 @@ static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
>  	return rq;
>  }
>  
> +/*
> + * Whether or not to use zone write locking. Not using zone write locking for
> + * sequential write required zones is only safe if the block driver preserves
> + * the request order.
> + */
> +static bool dd_use_zone_write_locking(struct request_queue *q)
> +{
> +	return q->limits.use_zone_write_lock && blk_queue_is_zoned(q);

use_zone_write_lock should be true ONLY if the queue is zoned.
So the "&& blk_queue_is_zoned(q)" seems unnecessary to me.
This little helper could be moved to be generic in blkdev.h too.

> +}
> +
>  /*
>   * For the specified data direction, return the next request to
>   * dispatch using arrival ordered lists.
> @@ -353,7 +363,7 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
>  		return NULL;
>  
>  	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
> -	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
> +	if (data_dir == DD_READ || !dd_use_zone_write_locking(rq->q))
>  		return rq;
>  
>  	/*
> @@ -398,7 +408,7 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
>  	if (!rq)
>  		return NULL;
>  
> -	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
> +	if (data_dir == DD_READ || !dd_use_zone_write_locking(rq->q))
>  		return rq;
>  
>  	/*
> @@ -526,8 +536,9 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
>  	}
>  
>  	/*
> -	 * For a zoned block device, if we only have writes queued and none of
> -	 * them can be dispatched, rq will be NULL.
> +	 * For a zoned block device that requires write serialization, if we
> +	 * only have writes queued and none of them can be dispatched, rq will
> +	 * be NULL.
>  	 */
>  	if (!rq)
>  		return NULL;
> @@ -552,7 +563,8 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
>  	/*
>  	 * If the request needs its target zone locked, do it.
>  	 */
> -	blk_req_zone_write_lock(rq);
> +	if (dd_use_zone_write_locking(rq->q))
> +		blk_req_zone_write_lock(rq);
>  	rq->rq_flags |= RQF_STARTED;
>  	return rq;
>  }
> @@ -934,7 +946,7 @@ static void dd_finish_request(struct request *rq)
>  
>  	atomic_inc(&per_prio->stats.completed);
>  
> -	if (blk_queue_is_zoned(q)) {
> +	if (dd_use_zone_write_locking(rq->q)) {
>  		unsigned long flags;
>  
>  		spin_lock_irqsave(&dd->zone_lock, flags);
Bart Van Assche Aug. 10, 2023, 2 p.m. UTC | #2
On 8/9/23 18:36, Damien Le Moal wrote:
> On 8/10/23 05:23, Bart Van Assche wrote:
>> +static bool dd_use_zone_write_locking(struct request_queue *q)
>> +{
>> +	return q->limits.use_zone_write_lock && blk_queue_is_zoned(q);
> 
> use_zone_write_lock should be true ONLY if the queue is zoned.
> So the "&& blk_queue_is_zoned(q)" seems unnecessary to me.
> This little helper could be moved to be generic in blkdev.h too.

Hi Damien,

use_zone_write_lock should be set by the block driver (e.g. a SCSI
LLD) before I/O starts. The zone model information is retrieved by
submitting I/O. It is not clear to me how to set use_zone_write_lock
to true only for zoned block devices before I/O starts since I/O is
required to retrieve information about the zone model.

Thanks,

Bart.
Damien Le Moal Aug. 11, 2023, 12:45 a.m. UTC | #3
On 8/10/23 23:00, Bart Van Assche wrote:
> On 8/9/23 18:36, Damien Le Moal wrote:
>> On 8/10/23 05:23, Bart Van Assche wrote:
>>> +static bool dd_use_zone_write_locking(struct request_queue *q)
>>> +{
>>> +	return q->limits.use_zone_write_lock && blk_queue_is_zoned(q);
>>
>> use_zone_write_lock should be true ONLY if the queue is zoned.
>> So the "&& blk_queue_is_zoned(q)" seems unnecessary to me.
>> This little helper could be moved to be generic in blkdev.h too.
> 
> Hi Damien,
> 
> use_zone_write_lock should be set by the block driver (e.g. a SCSI
> LLD) before I/O starts. The zone model information is retrieved by
> submitting I/O. It is not clear to me how to set use_zone_write_lock
> to true only for zoned block devices before I/O starts since I/O is
> required to retrieve information about the zone model.

See my other email. Once you know that the drive is zoned, then
use_zone_write_lock can default to true. That is trivial to do in
disk_set_zoned(), which is called by all drivers. I proposed adding an argument
to this function to override the default, thus allowing a device driver to set
it to false.

The limit default set to true as you have it in your current patch does not make
sense to me. It should be false by default and turned on only for zoned drive
that require zone write locking (e.g. SMR HDDs). With that, mq-deadline can even
be simplified to not even look at the q zoned model and simply us
q->limits.use_zone_write_lock to determine if locking a zone is needed or not.
That would be a lot cleaner.

Not that device scan and revalidation never write to the device. So that can be
done safely regardless of the current value for the use_zone_write_lock limit.
Bart Van Assche Aug. 11, 2023, 3:49 p.m. UTC | #4
On 8/10/23 17:45, Damien Le Moal wrote:
> With that, mq-deadline can even be simplified to not even look at the
> q zoned model and simply us q->limits.use_zone_write_lock to
> determine if locking a zone is needed or not.

Hi Damien,

I think implementing the above proposal requires splitting 
'use_zone_write_lock' into two variables:
(a) One member variable that indicates whether the zone write lock
     should be used.
(b) Another member variable that indicates whether or not the LLD
     preserves the order of SCSI commands.

Member variable (b) should be set by the LLD and member variable (a) can 
be set by disk_set_zoned().

Do you want me to implement this approach?

Thanks,

Bart.
Damien Le Moal Aug. 12, 2023, 2:49 a.m. UTC | #5
On 8/12/23 00:49, Bart Van Assche wrote:
> On 8/10/23 17:45, Damien Le Moal wrote:
>> With that, mq-deadline can even be simplified to not even look at the
>> q zoned model and simply us q->limits.use_zone_write_lock to
>> determine if locking a zone is needed or not.
> 
> Hi Damien,
> 
> I think implementing the above proposal requires splitting 
> 'use_zone_write_lock' into two variables:
> (a) One member variable that indicates whether the zone write lock
>      should be used.
> (b) Another member variable that indicates whether or not the LLD
>      preserves the order of SCSI commands.
> 
> Member variable (b) should be set by the LLD and member variable (a) can 
> be set by disk_set_zoned().
> 
> Do you want me to implement this approach?
> 

Looking at your v8, this seems better.
I will have a more in-depth look Monday.
diff mbox series

Patch

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index f958e79277b8..cd2504205ff8 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -338,6 +338,16 @@  static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
 	return rq;
 }
 
+/*
+ * Whether or not to use zone write locking. Not using zone write locking for
+ * sequential write required zones is only safe if the block driver preserves
+ * the request order.
+ */
+static bool dd_use_zone_write_locking(struct request_queue *q)
+{
+	return q->limits.use_zone_write_lock && blk_queue_is_zoned(q);
+}
+
 /*
  * For the specified data direction, return the next request to
  * dispatch using arrival ordered lists.
@@ -353,7 +363,7 @@  deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 		return NULL;
 
 	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
+	if (data_dir == DD_READ || !dd_use_zone_write_locking(rq->q))
 		return rq;
 
 	/*
@@ -398,7 +408,7 @@  deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 	if (!rq)
 		return NULL;
 
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
+	if (data_dir == DD_READ || !dd_use_zone_write_locking(rq->q))
 		return rq;
 
 	/*
@@ -526,8 +536,9 @@  static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	}
 
 	/*
-	 * For a zoned block device, if we only have writes queued and none of
-	 * them can be dispatched, rq will be NULL.
+	 * For a zoned block device that requires write serialization, if we
+	 * only have writes queued and none of them can be dispatched, rq will
+	 * be NULL.
 	 */
 	if (!rq)
 		return NULL;
@@ -552,7 +563,8 @@  static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	/*
 	 * If the request needs its target zone locked, do it.
 	 */
-	blk_req_zone_write_lock(rq);
+	if (dd_use_zone_write_locking(rq->q))
+		blk_req_zone_write_lock(rq);
 	rq->rq_flags |= RQF_STARTED;
 	return rq;
 }
@@ -934,7 +946,7 @@  static void dd_finish_request(struct request *rq)
 
 	atomic_inc(&per_prio->stats.completed);
 
-	if (blk_queue_is_zoned(q)) {
+	if (dd_use_zone_write_locking(rq->q)) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&dd->zone_lock, flags);