diff mbox series

block/blk-ioprio: Skip zoned writes that are not append operations

Message ID 20231211231451.1452979-1-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series block/blk-ioprio: Skip zoned writes that are not append operations | expand

Commit Message

Bart Van Assche Dec. 11, 2023, 11:14 p.m. UTC
If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
originate from different cgroups that could result in different
priorities being assigned to these operations. Do not modify the I/O
priority of these write operations to prevent that these would be
executed in the wrong order when using the mq-deadline I/O
scheduler.

Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/blk-ioprio.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

Comments

Damien Le Moal Dec. 11, 2023, 11:31 p.m. UTC | #1
On 12/12/23 08:14, Bart Van Assche wrote:
> If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
> originate from different cgroups that could result in different
> priorities being assigned to these operations. Do not modify the I/O
> priority of these write operations to prevent that these would be
> executed in the wrong order when using the mq-deadline I/O

...to prevent them from being executed in the wrong...

> scheduler.
> 
> Cc: Damien Le Moal <dlemoal@kernel.org>
> Cc: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/blk-ioprio.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
> index 4051fada01f1..09ce083a0e3a 100644
> --- a/block/blk-ioprio.c
> +++ b/block/blk-ioprio.c
> @@ -192,6 +192,17 @@ void blkcg_set_ioprio(struct bio *bio)
>  	if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
>  		return;
>  
> +	/*
> +	 * If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
> +	 * originate from different cgroups that could result in different
> +	 * priorities being assigned to these operations. Do not modify the I/O
> +	 * priority of these write operations to prevent that these would be
> +	 * executed in the wrong order when using the mq-deadline I/O
> +	 * scheduler.
> +	 */
> +	if (bdev_op_is_zoned_write(bio->bi_bdev, bio_op(bio)))

Ideally, we want the bio equivalent of blk_rq_is_seq_zoned_write() here so that
writes to conventional zones are not affected (these can be reordered).

> +		return;
> +
>  	if (blkcg->prio_policy == POLICY_PROMOTE_TO_RT ||
>  	    blkcg->prio_policy == POLICY_NONE_TO_RT) {
>  		/*
Bart Van Assche Dec. 12, 2023, 12:11 a.m. UTC | #2
On 12/11/23 15:31, Damien Le Moal wrote:
> On 12/12/23 08:14, Bart Van Assche wrote:
>> +	/*
>> +	 * If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
>> +	 * originate from different cgroups that could result in different
>> +	 * priorities being assigned to these operations. Do not modify the I/O
>> +	 * priority of these write operations to prevent that these would be
>> +	 * executed in the wrong order when using the mq-deadline I/O
>> +	 * scheduler.
>> +	 */
>> +	if (bdev_op_is_zoned_write(bio->bi_bdev, bio_op(bio)))
> 
> Ideally, we want the bio equivalent of blk_rq_is_seq_zoned_write() here so that
> writes to conventional zones are not affected (these can be reordered).
  How about the patch below?

Thanks,

Bart.


[PATCH] block/blk-ioprio: Skip zoned writes that are not append operations

If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
originate from different cgroups that could result in different priorities
being assigned to these operations. Do not modify the I/O priority of
these write operations to prevent them from being executed in the wrong
order when using the mq-deadline I/O scheduler.

Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
  block/blk-ioprio.c     | 11 +++++++++++
  include/linux/blk-mq.h | 17 +++++++++++++++++
  2 files changed, 28 insertions(+)

diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 4051fada01f1..96b46d34e3d6 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -192,6 +192,17 @@ void blkcg_set_ioprio(struct bio *bio)
  	if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
  		return;

+	/*
+	 * If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
+	 * originate from different cgroups that could result in different
+	 * priorities being assigned to these operations. Do not modify the I/O
+	 * priority of these write operations to prevent that these would be
+	 * executed in the wrong order when using the mq-deadline I/O
+	 * scheduler.
+	 */
+	if (blk_bio_is_seq_zoned_write(bio))
+		return;
+
  	if (blkcg->prio_policy == POLICY_PROMOTE_TO_RT ||
  	    blkcg->prio_policy == POLICY_NONE_TO_RT) {
  		/*
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1ab3081c82ed..90907d9001c0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1149,6 +1149,18 @@ static inline unsigned int blk_rq_zone_no(struct request *rq)
  	return disk_zone_no(rq->q->disk, blk_rq_pos(rq));
  }

+/**
+ * blk_bio_is_seq_zoned_write() - Check if @bio requires write serialization.
+ * @bio: Bio to examine.
+ *
+ * Note: REQ_OP_ZONE_APPEND bios do not require serialization.
+ */
+static inline bool blk_bio_is_seq_zoned_write(struct bio *bio)
+{
+	return op_needs_zoned_write_locking(bio_op(bio)) &&
+		disk_zone_is_seq(bio->bi_disk, bio.bi_iter.bi_sector);
+}
+
  static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
  {
  	return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq));
@@ -1196,6 +1208,11 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
  	return !blk_req_zone_is_write_locked(rq);
  }
  #else /* CONFIG_BLK_DEV_ZONED */
+static inline bool blk_bio_is_seq_zoned_write(struct bio *bio)
+{
+	return false;
+}
+
  static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
  {
  	return false;
Damien Le Moal Dec. 12, 2023, 10:08 a.m. UTC | #3
On 12/12/23 09:11, Bart Van Assche wrote:
> On 12/11/23 15:31, Damien Le Moal wrote:
>> On 12/12/23 08:14, Bart Van Assche wrote:
>>> +	/*
>>> +	 * If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
>>> +	 * originate from different cgroups that could result in different
>>> +	 * priorities being assigned to these operations. Do not modify the I/O
>>> +	 * priority of these write operations to prevent that these would be
>>> +	 * executed in the wrong order when using the mq-deadline I/O
>>> +	 * scheduler.
>>> +	 */
>>> +	if (bdev_op_is_zoned_write(bio->bi_bdev, bio_op(bio)))
>>
>> Ideally, we want the bio equivalent of blk_rq_is_seq_zoned_write() here so that
>> writes to conventional zones are not affected (these can be reordered).
>   How about the patch below?
> 
> Thanks,
> 
> Bart.
> 
> 
> [PATCH] block/blk-ioprio: Skip zoned writes that are not append operations
> 
> If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
> originate from different cgroups that could result in different priorities
> being assigned to these operations. Do not modify the I/O priority of
> these write operations to prevent them from being executed in the wrong
> order when using the mq-deadline I/O scheduler.
> 
> Cc: Damien Le Moal <dlemoal@kernel.org>
> Cc: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>   block/blk-ioprio.c     | 11 +++++++++++
>   include/linux/blk-mq.h | 17 +++++++++++++++++
>   2 files changed, 28 insertions(+)
> 
> diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
> index 4051fada01f1..96b46d34e3d6 100644
> --- a/block/blk-ioprio.c
> +++ b/block/blk-ioprio.c
> @@ -192,6 +192,17 @@ void blkcg_set_ioprio(struct bio *bio)
>   	if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
>   		return;
> 
> +	/*
> +	 * If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
> +	 * originate from different cgroups that could result in different
> +	 * priorities being assigned to these operations. Do not modify the I/O
> +	 * priority of these write operations to prevent that these would be
> +	 * executed in the wrong order when using the mq-deadline I/O
> +	 * scheduler.
> +	 */
> +	if (blk_bio_is_seq_zoned_write(bio))
> +		return;
> +
>   	if (blkcg->prio_policy == POLICY_PROMOTE_TO_RT ||
>   	    blkcg->prio_policy == POLICY_NONE_TO_RT) {
>   		/*
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 1ab3081c82ed..90907d9001c0 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -1149,6 +1149,18 @@ static inline unsigned int blk_rq_zone_no(struct request *rq)
>   	return disk_zone_no(rq->q->disk, blk_rq_pos(rq));
>   }
> 
> +/**
> + * blk_bio_is_seq_zoned_write() - Check if @bio requires write serialization.
> + * @bio: Bio to examine.
> + *
> + * Note: REQ_OP_ZONE_APPEND bios do not require serialization.
> + */
> +static inline bool blk_bio_is_seq_zoned_write(struct bio *bio)
> +{
> +	return op_needs_zoned_write_locking(bio_op(bio)) &&
> +		disk_zone_is_seq(bio->bi_disk, bio.bi_iter.bi_sector);

Given that disk_zone_is_seq() always return false for regular devices, I think
reversing the test order is better:

	return disk_zone_is_seq(bio->bi_disk, bio.bi_iter.bi_sector) &&
	       op_needs_zoned_write_locking(bio_op(bio));

> +}
> +
>   static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
>   {
>   	return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq));
> @@ -1196,6 +1208,11 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
>   	return !blk_req_zone_is_write_locked(rq);
>   }
>   #else /* CONFIG_BLK_DEV_ZONED */
> +static inline bool blk_bio_is_seq_zoned_write(struct bio *bio)
> +{
> +	return false;
> +}
> +
>   static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
>   {
>   	return false;
>
diff mbox series

Patch

diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 4051fada01f1..09ce083a0e3a 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -192,6 +192,17 @@  void blkcg_set_ioprio(struct bio *bio)
 	if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
 		return;
 
+	/*
+	 * If REQ_OP_WRITE or REQ_OP_WRITE_ZEROES operations for the same zone
+	 * originate from different cgroups that could result in different
+	 * priorities being assigned to these operations. Do not modify the I/O
+	 * priority of these write operations to prevent that these would be
+	 * executed in the wrong order when using the mq-deadline I/O
+	 * scheduler.
+	 */
+	if (bdev_op_is_zoned_write(bio->bi_bdev, bio_op(bio)))
+		return;
+
 	if (blkcg->prio_policy == POLICY_PROMOTE_TO_RT ||
 	    blkcg->prio_policy == POLICY_NONE_TO_RT) {
 		/*