diff mbox series

[V4,11/12] block: add poll_capable method to support bio-based IO polling

Message ID 20210329152622.173035-12-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series block: support bio based io polling | expand

Commit Message

Ming Lei March 29, 2021, 3:26 p.m. UTC
From: Jeffle Xu <jefflexu@linux.alibaba.com>

This method can be used to check if bio-based device supports IO polling
or not. For mq devices, checking for hw queue in polling mode is
adequate, while the sanity check shall be implementation specific for
bio-based devices. For example, dm device needs to check if all
underlying devices are capable of IO polling.

Though bio-based device may have done the sanity check during the
device initialization phase, cacheing the result of this sanity check
(such as by cacheing in the queue_flags) may not work. Because for dm
devices, users could change the state of the underlying devices through
'/sys/block/<dev>/io_poll', bypassing the dm device above. In this case,
the cached result of the very beginning sanity check could be
out-of-date. Thus the sanity check needs to be done every time 'io_poll'
is to be modified.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 block/blk-sysfs.c      | 14 +++++++++++---
 include/linux/blkdev.h |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

Comments

Hannes Reinecke March 30, 2021, 6:26 a.m. UTC | #1
On 3/29/21 5:26 PM, Ming Lei wrote:
> From: Jeffle Xu <jefflexu@linux.alibaba.com>
> 
> This method can be used to check if bio-based device supports IO polling
> or not. For mq devices, checking for hw queue in polling mode is
> adequate, while the sanity check shall be implementation specific for
> bio-based devices. For example, dm device needs to check if all
> underlying devices are capable of IO polling.
> 
> Though bio-based device may have done the sanity check during the
> device initialization phase, cacheing the result of this sanity check
> (such as by cacheing in the queue_flags) may not work. Because for dm
> devices, users could change the state of the underlying devices through
> '/sys/block/<dev>/io_poll', bypassing the dm device above. In this case,
> the cached result of the very beginning sanity check could be
> out-of-date. Thus the sanity check needs to be done every time 'io_poll'
> is to be modified.
> 
> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> ---
>   block/blk-sysfs.c      | 14 +++++++++++---
>   include/linux/blkdev.h |  1 +
>   2 files changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index db3268d41274..c8e7e4af66cb 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -426,9 +426,17 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
>   	unsigned long poll_on;
>   	ssize_t ret;
>   
> -	if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL ||
> -	    !q->tag_set->map[HCTX_TYPE_POLL].nr_queues)
> -		return -EINVAL;
> +	if (queue_is_mq(q)) {
> +		if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL ||
> +		    !q->tag_set->map[HCTX_TYPE_POLL].nr_queues)
> +			return -EINVAL;
> +	} else {
> +		struct gendisk *disk = queue_to_disk(q);
> +
> +		if (!disk->fops->poll_capable ||
> +		    !disk->fops->poll_capable(disk))
> +			return -EINVAL;
> +	}
>   
>   	ret = queue_var_store(&poll_on, page, count);
>   	if (ret < 0)
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index bfab74b45f15..a46f975f2a2f 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1881,6 +1881,7 @@ struct block_device_operations {
>   	int (*report_zones)(struct gendisk *, sector_t sector,
>   			unsigned int nr_zones, report_zones_cb cb, void *data);
>   	char *(*devnode)(struct gendisk *disk, umode_t *mode);
> +	bool (*poll_capable)(struct gendisk *disk);
>   	struct module *owner;
>   	const struct pr_ops *pr_ops;
>   };
> 
I really wonder how this would work for nvme multipath; but I guess it 
doesn't change the current situation.

So:

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
Jingbo Xu March 30, 2021, 6:50 a.m. UTC | #2
On 3/30/21 2:26 PM, Hannes Reinecke wrote:
> On 3/29/21 5:26 PM, Ming Lei wrote:
>> From: Jeffle Xu <jefflexu@linux.alibaba.com>
>>
>> This method can be used to check if bio-based device supports IO polling
>> or not. For mq devices, checking for hw queue in polling mode is
>> adequate, while the sanity check shall be implementation specific for
>> bio-based devices. For example, dm device needs to check if all
>> underlying devices are capable of IO polling.
>>
>> Though bio-based device may have done the sanity check during the
>> device initialization phase, cacheing the result of this sanity check
>> (such as by cacheing in the queue_flags) may not work. Because for dm
>> devices, users could change the state of the underlying devices through
>> '/sys/block/<dev>/io_poll', bypassing the dm device above. In this case,
>> the cached result of the very beginning sanity check could be
>> out-of-date. Thus the sanity check needs to be done every time 'io_poll'
>> is to be modified.
>>
>> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
>> ---
>>   block/blk-sysfs.c      | 14 +++++++++++---
>>   include/linux/blkdev.h |  1 +
>>   2 files changed, 12 insertions(+), 3 deletions(-)
>>
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index db3268d41274..c8e7e4af66cb 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -426,9 +426,17 @@ static ssize_t queue_poll_store(struct
>> request_queue *q, const char *page,
>>       unsigned long poll_on;
>>       ssize_t ret;
>>   -    if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL ||
>> -        !q->tag_set->map[HCTX_TYPE_POLL].nr_queues)
>> -        return -EINVAL;
>> +    if (queue_is_mq(q)) {
>> +        if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL ||
>> +            !q->tag_set->map[HCTX_TYPE_POLL].nr_queues)
>> +            return -EINVAL;
>> +    } else {
>> +        struct gendisk *disk = queue_to_disk(q);
>> +
>> +        if (!disk->fops->poll_capable ||
>> +            !disk->fops->poll_capable(disk))
>> +            return -EINVAL;
>> +    }
>>         ret = queue_var_store(&poll_on, page, count);
>>       if (ret < 0)
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index bfab74b45f15..a46f975f2a2f 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -1881,6 +1881,7 @@ struct block_device_operations {
>>       int (*report_zones)(struct gendisk *, sector_t sector,
>>               unsigned int nr_zones, report_zones_cb cb, void *data);
>>       char *(*devnode)(struct gendisk *disk, umode_t *mode);
>> +    bool (*poll_capable)(struct gendisk *disk);
>>       struct module *owner;
>>       const struct pr_ops *pr_ops;
>>   };
>>
> I really wonder how this would work for nvme multipath; but I guess it
> doesn't change the current situation.

I wonder, at least, md/dm, which is built upon other devices, or
'virtual device' in other words, should be distinguished from other
'original' bio-based device (e.g., nvme multipath) then. Maybe one extra
flag or something.
Ming Lei March 30, 2021, 8:49 a.m. UTC | #3
On Tue, Mar 30, 2021 at 02:50:51PM +0800, JeffleXu wrote:
> 
> 
> On 3/30/21 2:26 PM, Hannes Reinecke wrote:
> > On 3/29/21 5:26 PM, Ming Lei wrote:
> >> From: Jeffle Xu <jefflexu@linux.alibaba.com>
> >>
> >> This method can be used to check if bio-based device supports IO polling
> >> or not. For mq devices, checking for hw queue in polling mode is
> >> adequate, while the sanity check shall be implementation specific for
> >> bio-based devices. For example, dm device needs to check if all
> >> underlying devices are capable of IO polling.
> >>
> >> Though bio-based device may have done the sanity check during the
> >> device initialization phase, cacheing the result of this sanity check
> >> (such as by cacheing in the queue_flags) may not work. Because for dm
> >> devices, users could change the state of the underlying devices through
> >> '/sys/block/<dev>/io_poll', bypassing the dm device above. In this case,
> >> the cached result of the very beginning sanity check could be
> >> out-of-date. Thus the sanity check needs to be done every time 'io_poll'
> >> is to be modified.
> >>
> >> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> >> ---
> >>   block/blk-sysfs.c      | 14 +++++++++++---
> >>   include/linux/blkdev.h |  1 +
> >>   2 files changed, 12 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> >> index db3268d41274..c8e7e4af66cb 100644
> >> --- a/block/blk-sysfs.c
> >> +++ b/block/blk-sysfs.c
> >> @@ -426,9 +426,17 @@ static ssize_t queue_poll_store(struct
> >> request_queue *q, const char *page,
> >>       unsigned long poll_on;
> >>       ssize_t ret;
> >>   -    if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL ||
> >> -        !q->tag_set->map[HCTX_TYPE_POLL].nr_queues)
> >> -        return -EINVAL;
> >> +    if (queue_is_mq(q)) {
> >> +        if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL ||
> >> +            !q->tag_set->map[HCTX_TYPE_POLL].nr_queues)
> >> +            return -EINVAL;
> >> +    } else {
> >> +        struct gendisk *disk = queue_to_disk(q);
> >> +
> >> +        if (!disk->fops->poll_capable ||
> >> +            !disk->fops->poll_capable(disk))
> >> +            return -EINVAL;
> >> +    }
> >>         ret = queue_var_store(&poll_on, page, count);
> >>       if (ret < 0)
> >> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> >> index bfab74b45f15..a46f975f2a2f 100644
> >> --- a/include/linux/blkdev.h
> >> +++ b/include/linux/blkdev.h
> >> @@ -1881,6 +1881,7 @@ struct block_device_operations {
> >>       int (*report_zones)(struct gendisk *, sector_t sector,
> >>               unsigned int nr_zones, report_zones_cb cb, void *data);
> >>       char *(*devnode)(struct gendisk *disk, umode_t *mode);
> >> +    bool (*poll_capable)(struct gendisk *disk);
> >>       struct module *owner;
> >>       const struct pr_ops *pr_ops;
> >>   };
> >>
> > I really wonder how this would work for nvme multipath; but I guess it
> > doesn't change the current situation.

It should work for nvme multipath since the approach covers this case,
and bio submitted to underlying NVMe is marked with REQ_HIPRI and
REQ_POLL_CTX too.

> 
> I wonder, at least, md/dm, which is built upon other devices, or
> 'virtual device' in other words, should be distinguished from other
> 'original' bio-based device (e.g., nvme multipath) then. Maybe one extra
> flag or something.

There is REQ_NVME_MPATH, but not sure we need to deal with that.


Thanks,
Ming
diff mbox series

Patch

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index db3268d41274..c8e7e4af66cb 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -426,9 +426,17 @@  static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 	unsigned long poll_on;
 	ssize_t ret;
 
-	if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL ||
-	    !q->tag_set->map[HCTX_TYPE_POLL].nr_queues)
-		return -EINVAL;
+	if (queue_is_mq(q)) {
+		if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL ||
+		    !q->tag_set->map[HCTX_TYPE_POLL].nr_queues)
+			return -EINVAL;
+	} else {
+		struct gendisk *disk = queue_to_disk(q);
+
+		if (!disk->fops->poll_capable ||
+		    !disk->fops->poll_capable(disk))
+			return -EINVAL;
+	}
 
 	ret = queue_var_store(&poll_on, page, count);
 	if (ret < 0)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bfab74b45f15..a46f975f2a2f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1881,6 +1881,7 @@  struct block_device_operations {
 	int (*report_zones)(struct gendisk *, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
 	char *(*devnode)(struct gendisk *disk, umode_t *mode);
+	bool (*poll_capable)(struct gendisk *disk);
 	struct module *owner;
 	const struct pr_ops *pr_ops;
 };