diff mbox series

[v2,2/5] block/mq-deadline: Only use zone locking if necessary

Message ID 20230710180210.1582299-3-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Enable zoned write pipelining for UFS devices | expand

Commit Message

Bart Van Assche July 10, 2023, 6:01 p.m. UTC
Measurements have shown that limiting the queue depth to one for zoned
writes has a significant negative performance impact on zoned UFS devices.
Hence this patch that disables zone locking from the mq-deadline scheduler
for storage controllers that support pipelining zoned writes. This patch is
based on the following assumptions:
- Applications submit write requests to sequential write required zones
  in order.
- It happens infrequently that zoned write requests are reordered by the
  block layer.
- The storage controller does not reorder write requests that have been
  submitted to the same hardware queue. This is the case for UFS: the
  UFSHCI specification requires that UFS controllers process requests in
  order per hardware queue.
- The I/O priority of all pipelined write requests is the same per zone.
- Either no I/O scheduler is used or an I/O scheduler is used that
  submits write requests per zone in LBA order.

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/blk-zoned.c   |  3 ++-
 block/mq-deadline.c | 14 +++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

Comments

Damien Le Moal July 18, 2023, 6:38 a.m. UTC | #1
On 7/11/23 03:01, Bart Van Assche wrote:
> Measurements have shown that limiting the queue depth to one for zoned
> writes has a significant negative performance impact on zoned UFS devices.
> Hence this patch that disables zone locking from the mq-deadline scheduler
> for storage controllers that support pipelining zoned writes. This patch is
> based on the following assumptions:
> - Applications submit write requests to sequential write required zones
>   in order.
> - It happens infrequently that zoned write requests are reordered by the
>   block layer.
> - The storage controller does not reorder write requests that have been
>   submitted to the same hardware queue. This is the case for UFS: the
>   UFSHCI specification requires that UFS controllers process requests in
>   order per hardware queue.
> - The I/O priority of all pipelined write requests is the same per zone.
> - Either no I/O scheduler is used or an I/O scheduler is used that
>   submits write requests per zone in LBA order.
> 
> Cc: Damien Le Moal <damien.lemoal@wdc.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/blk-zoned.c   |  3 ++-
>  block/mq-deadline.c | 14 +++++++++-----
>  2 files changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 0f9f97cdddd9..59560d1657e3 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -504,7 +504,8 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
>  		break;
>  	case BLK_ZONE_TYPE_SEQWRITE_REQ:
>  	case BLK_ZONE_TYPE_SEQWRITE_PREF:
> -		if (!args->seq_zones_wlock) {
> +		if (!blk_queue_pipeline_zoned_writes(q) &&
> +		    !args->seq_zones_wlock) {

I think that this change should go into the first patch, no ?

>  			args->seq_zones_wlock =
>  				blk_alloc_zone_bitmap(q->node, args->nr_zones);
>  			if (!args->seq_zones_wlock)
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index 6aa5daf7ae32..0bed2bdeed89 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -353,7 +353,8 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
>  		return NULL;
>  
>  	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
> -	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
> +	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) ||
> +	    blk_queue_pipeline_zoned_writes(rq->q))

What about using blk_req_needs_zone_write_lock() ?

>  		return rq;
>  
>  	/*
> @@ -398,7 +399,8 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
>  	if (!rq)
>  		return NULL;
>  
> -	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
> +	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) ||
> +	    blk_queue_pipeline_zoned_writes(rq->q))

Same.

>  		return rq;
>  
>  	/*
> @@ -526,8 +528,9 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
>  	}
>  
>  	/*
> -	 * For a zoned block device, if we only have writes queued and none of
> -	 * them can be dispatched, rq will be NULL.
> +	 * For a zoned block device that requires write serialization, if we
> +	 * only have writes queued and none of them can be dispatched, rq will
> +	 * be NULL.
>  	 */
>  	if (!rq)
>  		return NULL;
> @@ -933,7 +936,8 @@ static void dd_finish_request(struct request *rq)
>  
>  	atomic_inc(&per_prio->stats.completed);
>  
> -	if (blk_queue_is_zoned(q)) {
> +	if (blk_queue_is_zoned(rq->q) &&
> +	    !blk_queue_pipeline_zoned_writes(q)) {

And again here.

>  		unsigned long flags;
>  
>  		spin_lock_irqsave(&dd->zone_lock, flags);
Bart Van Assche July 18, 2023, 10:38 p.m. UTC | #2
On 7/17/23 23:38, Damien Le Moal wrote:
> On 7/11/23 03:01, Bart Van Assche wrote:
>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>> index 0f9f97cdddd9..59560d1657e3 100644
>> --- a/block/blk-zoned.c
>> +++ b/block/blk-zoned.c
>> @@ -504,7 +504,8 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
>>   		break;
>>   	case BLK_ZONE_TYPE_SEQWRITE_REQ:
>>   	case BLK_ZONE_TYPE_SEQWRITE_PREF:
>> -		if (!args->seq_zones_wlock) {
>> +		if (!blk_queue_pipeline_zoned_writes(q) &&
>> +		    !args->seq_zones_wlock) {
> 
> I think that this change should go into the first patch, no ?

That's a good point. I will move this change into the first patch.

Thanks,

Bart.
Bart Van Assche July 24, 2023, 9:39 p.m. UTC | #3
On 7/17/23 23:38, Damien Le Moal wrote:
> On 7/11/23 03:01, Bart Van Assche wrote:
>> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
>> index 6aa5daf7ae32..0bed2bdeed89 100644
>> --- a/block/mq-deadline.c
>> +++ b/block/mq-deadline.c
>> @@ -353,7 +353,8 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
>>   		return NULL;
>>   
>>   	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
>> -	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
>> +	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) ||
>> +	    blk_queue_pipeline_zoned_writes(rq->q))
> 
> What about using blk_req_needs_zone_write_lock() ?

Hmm ... how would using blk_req_needs_zone_write_lock() improve the 
generated code? blk_queue_pipeline_zoned_writes() can be inlined and 
only tests a single bit (a request queue flag) while 
blk_req_needs_zone_write_lock() cannot be inlined by the compiler 
because it has been defined in a .c file. Additionally, 
blk_req_needs_zone_write_lock() has to dereference more pointers than 
blk_queue_pipeline_zoned_writes(). From block/blk-zoned.c:

bool blk_req_needs_zone_write_lock(struct request *rq)
{
	if (!rq->q->disk->seq_zones_wlock)
		return false;

	return blk_rq_is_seq_zoned_write(rq);
}
EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);

Thanks,

Bart.
diff mbox series

Patch

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 0f9f97cdddd9..59560d1657e3 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -504,7 +504,8 @@  static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 		break;
 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
-		if (!args->seq_zones_wlock) {
+		if (!blk_queue_pipeline_zoned_writes(q) &&
+		    !args->seq_zones_wlock) {
 			args->seq_zones_wlock =
 				blk_alloc_zone_bitmap(q->node, args->nr_zones);
 			if (!args->seq_zones_wlock)
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 6aa5daf7ae32..0bed2bdeed89 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -353,7 +353,8 @@  deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 		return NULL;
 
 	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
+	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) ||
+	    blk_queue_pipeline_zoned_writes(rq->q))
 		return rq;
 
 	/*
@@ -398,7 +399,8 @@  deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 	if (!rq)
 		return NULL;
 
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
+	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) ||
+	    blk_queue_pipeline_zoned_writes(rq->q))
 		return rq;
 
 	/*
@@ -526,8 +528,9 @@  static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	}
 
 	/*
-	 * For a zoned block device, if we only have writes queued and none of
-	 * them can be dispatched, rq will be NULL.
+	 * For a zoned block device that requires write serialization, if we
+	 * only have writes queued and none of them can be dispatched, rq will
+	 * be NULL.
 	 */
 	if (!rq)
 		return NULL;
@@ -933,7 +936,8 @@  static void dd_finish_request(struct request *rq)
 
 	atomic_inc(&per_prio->stats.completed);
 
-	if (blk_queue_is_zoned(q)) {
+	if (blk_queue_is_zoned(rq->q) &&
+	    !blk_queue_pipeline_zoned_writes(q)) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&dd->zone_lock, flags);