diff mbox series

[2/6] block: add support for selecting all zones

Message ID 20200625122152.17359-3-javier@javigon.com (mailing list archive)
State New, archived
Headers show
Series ZNS: Extra features for current patches | expand

Commit Message

Javier González June 25, 2020, 12:21 p.m. UTC
From: Javier González <javier.gonz@samsung.com>

Add flag to allow selecting all zones on a single zone management
operation

Signed-off-by: Javier González <javier.gonz@samsung.com>
Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
---
 block/blk-zoned.c             | 3 +++
 include/linux/blk_types.h     | 3 ++-
 include/uapi/linux/blkzoned.h | 9 +++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

Comments

Damien Le Moal June 26, 2020, 1:27 a.m. UTC | #1
On 2020/06/25 21:22, Javier González wrote:
> From: Javier González <javier.gonz@samsung.com>
> 
> Add flag to allow selecting all zones on a single zone management
> operation
> 
> Signed-off-by: Javier González <javier.gonz@samsung.com>
> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> ---
>  block/blk-zoned.c             | 3 +++
>  include/linux/blk_types.h     | 3 ++-
>  include/uapi/linux/blkzoned.h | 9 +++++++++
>  3 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index e87c60004dc5..29194388a1bb 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>  		return -ENOTTY;
>  	}
>  
> +	if (zmgmt.flags & BLK_ZONE_SELECT_ALL)
> +		op |= REQ_ZONE_ALL;
> +
>  	return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors,
>  				GFP_KERNEL);
>  }
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index ccb895f911b1..16b57fb2b99c 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -351,6 +351,7 @@ enum req_flag_bits {
>  	 * work item to avoid such priority inversions.
>  	 */
>  	__REQ_CGROUP_PUNT,
> +	__REQ_ZONE_ALL,		/* apply zone operation to all zones */
>  
>  	/* command specific flags for REQ_OP_WRITE_ZEROES: */
>  	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
> @@ -378,7 +379,7 @@ enum req_flag_bits {
>  #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
>  #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
>  #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
> -
> +#define REQ_ZONE_ALL		(1ULL << __REQ_ZONE_ALL)
>  #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
>  #define REQ_HIPRI		(1ULL << __REQ_HIPRI)
>  
> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
> index 07b5fde21d9f..a8c89fe58f97 100644
> --- a/include/uapi/linux/blkzoned.h
> +++ b/include/uapi/linux/blkzoned.h
> @@ -157,6 +157,15 @@ enum blk_zone_action {
>  	BLK_ZONE_MGMT_RESET	= 0x4,
>  };
>  
> +/**
> + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt
> + *
> + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action
> + */
> +enum blk_zone_mgmt_flags {
> +	BLK_ZONE_SELECT_ALL	= 1 << 0,
> +};
> +
>  /**
>   * struct blk_zone_mgmt - Extended zoned management
>   *
> 

NACK.

Details:
1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as
REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that
already exists.
2) The patch introduces REQ_ZONE_ALL at the block layer only without defining
how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that
the zone management commands are to be executed with the ALL bit set ? If yes,
that will break device-mapper. See the special code for handling
REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block
device may not be an entire physical device. In that case, applying a zone
management command to all zones of the physical drive is wrong.
3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0
.. drive capacity]. So what is the point ? The current interface handles that.
That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now.
4) Without any in-kernel user, I do not see the point. And for applications, I
do not see any good use case for doing open all, close all, offline all or
finish all. If you have any such good use case, please elaborate.
Javier González June 26, 2020, 5:58 a.m. UTC | #2
On 26.06.2020 01:27, Damien Le Moal wrote:
>On 2020/06/25 21:22, Javier González wrote:
>> From: Javier González <javier.gonz@samsung.com>
>>
>> Add flag to allow selecting all zones on a single zone management
>> operation
>>
>> Signed-off-by: Javier González <javier.gonz@samsung.com>
>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>> ---
>>  block/blk-zoned.c             | 3 +++
>>  include/linux/blk_types.h     | 3 ++-
>>  include/uapi/linux/blkzoned.h | 9 +++++++++
>>  3 files changed, 14 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>> index e87c60004dc5..29194388a1bb 100644
>> --- a/block/blk-zoned.c
>> +++ b/block/blk-zoned.c
>> @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>  		return -ENOTTY;
>>  	}
>>
>> +	if (zmgmt.flags & BLK_ZONE_SELECT_ALL)
>> +		op |= REQ_ZONE_ALL;
>> +
>>  	return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors,
>>  				GFP_KERNEL);
>>  }
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index ccb895f911b1..16b57fb2b99c 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -351,6 +351,7 @@ enum req_flag_bits {
>>  	 * work item to avoid such priority inversions.
>>  	 */
>>  	__REQ_CGROUP_PUNT,
>> +	__REQ_ZONE_ALL,		/* apply zone operation to all zones */
>>
>>  	/* command specific flags for REQ_OP_WRITE_ZEROES: */
>>  	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
>> @@ -378,7 +379,7 @@ enum req_flag_bits {
>>  #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
>>  #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
>>  #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
>> -
>> +#define REQ_ZONE_ALL		(1ULL << __REQ_ZONE_ALL)
>>  #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
>>  #define REQ_HIPRI		(1ULL << __REQ_HIPRI)
>>
>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>> index 07b5fde21d9f..a8c89fe58f97 100644
>> --- a/include/uapi/linux/blkzoned.h
>> +++ b/include/uapi/linux/blkzoned.h
>> @@ -157,6 +157,15 @@ enum blk_zone_action {
>>  	BLK_ZONE_MGMT_RESET	= 0x4,
>>  };
>>
>> +/**
>> + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt
>> + *
>> + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action
>> + */
>> +enum blk_zone_mgmt_flags {
>> +	BLK_ZONE_SELECT_ALL	= 1 << 0,
>> +};
>> +
>>  /**
>>   * struct blk_zone_mgmt - Extended zoned management
>>   *
>>
>
>NACK.
>
>Details:
>1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as
>REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that
>already exists.
>2) The patch introduces REQ_ZONE_ALL at the block layer only without defining
>how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that
>the zone management commands are to be executed with the ALL bit set ? If yes,
>that will break device-mapper. See the special code for handling
>REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block
>device may not be an entire physical device. In that case, applying a zone
>management command to all zones of the physical drive is wrong.
>3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0
>.. drive capacity]. So what is the point ? The current interface handles that.
>That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now.
>4) Without any in-kernel user, I do not see the point. And for applications, I
>do not see any good use case for doing open all, close all, offline all or
>finish all. If you have any such good use case, please elaborate.
>

The main use if reset all, but without having to look through all zones,
as it imposes an overhead when we have a large number of zones. Having
the possibility to offload it to HW is more efficient.

I had not thought about the device mapper use case. Would it be an
option to translate this into REQ_OP_ZONE_RESET_ALL when we have a
device mapper (or any other case where this might break) and then leave
the bit go to the driver if it applies to the whole device?

Javier
Damien Le Moal June 26, 2020, 6:35 a.m. UTC | #3
On 2020/06/26 14:58, Javier González wrote:
> On 26.06.2020 01:27, Damien Le Moal wrote:
>> On 2020/06/25 21:22, Javier González wrote:
>>> From: Javier González <javier.gonz@samsung.com>
>>>
>>> Add flag to allow selecting all zones on a single zone management
>>> operation
>>>
>>> Signed-off-by: Javier González <javier.gonz@samsung.com>
>>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>>> ---
>>>  block/blk-zoned.c             | 3 +++
>>>  include/linux/blk_types.h     | 3 ++-
>>>  include/uapi/linux/blkzoned.h | 9 +++++++++
>>>  3 files changed, 14 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>> index e87c60004dc5..29194388a1bb 100644
>>> --- a/block/blk-zoned.c
>>> +++ b/block/blk-zoned.c
>>> @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>>  		return -ENOTTY;
>>>  	}
>>>
>>> +	if (zmgmt.flags & BLK_ZONE_SELECT_ALL)
>>> +		op |= REQ_ZONE_ALL;
>>> +
>>>  	return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors,
>>>  				GFP_KERNEL);
>>>  }
>>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>>> index ccb895f911b1..16b57fb2b99c 100644
>>> --- a/include/linux/blk_types.h
>>> +++ b/include/linux/blk_types.h
>>> @@ -351,6 +351,7 @@ enum req_flag_bits {
>>>  	 * work item to avoid such priority inversions.
>>>  	 */
>>>  	__REQ_CGROUP_PUNT,
>>> +	__REQ_ZONE_ALL,		/* apply zone operation to all zones */
>>>
>>>  	/* command specific flags for REQ_OP_WRITE_ZEROES: */
>>>  	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
>>> @@ -378,7 +379,7 @@ enum req_flag_bits {
>>>  #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
>>>  #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
>>>  #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
>>> -
>>> +#define REQ_ZONE_ALL		(1ULL << __REQ_ZONE_ALL)
>>>  #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
>>>  #define REQ_HIPRI		(1ULL << __REQ_HIPRI)
>>>
>>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>>> index 07b5fde21d9f..a8c89fe58f97 100644
>>> --- a/include/uapi/linux/blkzoned.h
>>> +++ b/include/uapi/linux/blkzoned.h
>>> @@ -157,6 +157,15 @@ enum blk_zone_action {
>>>  	BLK_ZONE_MGMT_RESET	= 0x4,
>>>  };
>>>
>>> +/**
>>> + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt
>>> + *
>>> + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action
>>> + */
>>> +enum blk_zone_mgmt_flags {
>>> +	BLK_ZONE_SELECT_ALL	= 1 << 0,
>>> +};
>>> +
>>>  /**
>>>   * struct blk_zone_mgmt - Extended zoned management
>>>   *
>>>
>>
>> NACK.
>>
>> Details:
>> 1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as
>> REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that
>> already exists.
>> 2) The patch introduces REQ_ZONE_ALL at the block layer only without defining
>> how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that
>> the zone management commands are to be executed with the ALL bit set ? If yes,
>> that will break device-mapper. See the special code for handling
>> REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block
>> device may not be an entire physical device. In that case, applying a zone
>> management command to all zones of the physical drive is wrong.
>> 3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0
>> .. drive capacity]. So what is the point ? The current interface handles that.
>> That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now.
>> 4) Without any in-kernel user, I do not see the point. And for applications, I
>> do not see any good use case for doing open all, close all, offline all or
>> finish all. If you have any such good use case, please elaborate.
>>
> 
> The main use if reset all, but without having to look through all zones,
> as it imposes an overhead when we have a large number of zones. Having
> the possibility to offload it to HW is more efficient.
> 
> I had not thought about the device mapper use case. Would it be an
> option to translate this into REQ_OP_ZONE_RESET_ALL when we have a
> device mapper (or any other case where this might break) and then leave
> the bit go to the driver if it applies to the whole device?

But REQ_OP_ZONE_RESET_ALL is already implemented and supported and will reset
all zones of a drive using a single command if the ioctl is called for the
entire sector range of a physical drive. For device mapper with a partial
mapping, the per zone reset loop will be used. If you have no other use case for
the REQ_ZONE_ALL flag, what is the point here ? Reset is already optimized for
the all zones case

> 
> Javier
>
Javier González June 26, 2020, 6:52 a.m. UTC | #4
On 26.06.2020 06:35, Damien Le Moal wrote:
>On 2020/06/26 14:58, Javier González wrote:
>> On 26.06.2020 01:27, Damien Le Moal wrote:
>>> On 2020/06/25 21:22, Javier González wrote:
>>>> From: Javier González <javier.gonz@samsung.com>
>>>>
>>>> Add flag to allow selecting all zones on a single zone management
>>>> operation
>>>>
>>>> Signed-off-by: Javier González <javier.gonz@samsung.com>
>>>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>>>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>>>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>>>> ---
>>>>  block/blk-zoned.c             | 3 +++
>>>>  include/linux/blk_types.h     | 3 ++-
>>>>  include/uapi/linux/blkzoned.h | 9 +++++++++
>>>>  3 files changed, 14 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>>> index e87c60004dc5..29194388a1bb 100644
>>>> --- a/block/blk-zoned.c
>>>> +++ b/block/blk-zoned.c
>>>> @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>>>  		return -ENOTTY;
>>>>  	}
>>>>
>>>> +	if (zmgmt.flags & BLK_ZONE_SELECT_ALL)
>>>> +		op |= REQ_ZONE_ALL;
>>>> +
>>>>  	return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors,
>>>>  				GFP_KERNEL);
>>>>  }
>>>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>>>> index ccb895f911b1..16b57fb2b99c 100644
>>>> --- a/include/linux/blk_types.h
>>>> +++ b/include/linux/blk_types.h
>>>> @@ -351,6 +351,7 @@ enum req_flag_bits {
>>>>  	 * work item to avoid such priority inversions.
>>>>  	 */
>>>>  	__REQ_CGROUP_PUNT,
>>>> +	__REQ_ZONE_ALL,		/* apply zone operation to all zones */
>>>>
>>>>  	/* command specific flags for REQ_OP_WRITE_ZEROES: */
>>>>  	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
>>>> @@ -378,7 +379,7 @@ enum req_flag_bits {
>>>>  #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
>>>>  #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
>>>>  #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
>>>> -
>>>> +#define REQ_ZONE_ALL		(1ULL << __REQ_ZONE_ALL)
>>>>  #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
>>>>  #define REQ_HIPRI		(1ULL << __REQ_HIPRI)
>>>>
>>>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>>>> index 07b5fde21d9f..a8c89fe58f97 100644
>>>> --- a/include/uapi/linux/blkzoned.h
>>>> +++ b/include/uapi/linux/blkzoned.h
>>>> @@ -157,6 +157,15 @@ enum blk_zone_action {
>>>>  	BLK_ZONE_MGMT_RESET	= 0x4,
>>>>  };
>>>>
>>>> +/**
>>>> + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt
>>>> + *
>>>> + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action
>>>> + */
>>>> +enum blk_zone_mgmt_flags {
>>>> +	BLK_ZONE_SELECT_ALL	= 1 << 0,
>>>> +};
>>>> +
>>>>  /**
>>>>   * struct blk_zone_mgmt - Extended zoned management
>>>>   *
>>>>
>>>
>>> NACK.
>>>
>>> Details:
>>> 1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as
>>> REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that
>>> already exists.
>>> 2) The patch introduces REQ_ZONE_ALL at the block layer only without defining
>>> how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that
>>> the zone management commands are to be executed with the ALL bit set ? If yes,
>>> that will break device-mapper. See the special code for handling
>>> REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block
>>> device may not be an entire physical device. In that case, applying a zone
>>> management command to all zones of the physical drive is wrong.
>>> 3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0
>>> .. drive capacity]. So what is the point ? The current interface handles that.
>>> That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now.
>>> 4) Without any in-kernel user, I do not see the point. And for applications, I
>>> do not see any good use case for doing open all, close all, offline all or
>>> finish all. If you have any such good use case, please elaborate.
>>>
>>
>> The main use if reset all, but without having to look through all zones,
>> as it imposes an overhead when we have a large number of zones. Having
>> the possibility to offload it to HW is more efficient.
>>
>> I had not thought about the device mapper use case. Would it be an
>> option to translate this into REQ_OP_ZONE_RESET_ALL when we have a
>> device mapper (or any other case where this might break) and then leave
>> the bit go to the driver if it applies to the whole device?
>
>But REQ_OP_ZONE_RESET_ALL is already implemented and supported and will reset
>all zones of a drive using a single command if the ioctl is called for the
>entire sector range of a physical drive. For device mapper with a partial
>mapping, the per zone reset loop will be used. If you have no other use case for
>the REQ_ZONE_ALL flag, what is the point here ? Reset is already optimized for
>the all zones case

OK. I might have missed this. I thought we were sending several commands
instead of a single reset with the bit. I will check again. Thanks for
pointing at this.

Javier
Damien Le Moal June 26, 2020, 7:06 a.m. UTC | #5
On 2020/06/26 15:52, Javier González wrote:
> On 26.06.2020 06:35, Damien Le Moal wrote:
>> On 2020/06/26 14:58, Javier González wrote:
>>> On 26.06.2020 01:27, Damien Le Moal wrote:
>>>> On 2020/06/25 21:22, Javier González wrote:
>>>>> From: Javier González <javier.gonz@samsung.com>
>>>>>
>>>>> Add flag to allow selecting all zones on a single zone management
>>>>> operation
>>>>>
>>>>> Signed-off-by: Javier González <javier.gonz@samsung.com>
>>>>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>>>>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>>>>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>>>>> ---
>>>>>  block/blk-zoned.c             | 3 +++
>>>>>  include/linux/blk_types.h     | 3 ++-
>>>>>  include/uapi/linux/blkzoned.h | 9 +++++++++
>>>>>  3 files changed, 14 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>>>> index e87c60004dc5..29194388a1bb 100644
>>>>> --- a/block/blk-zoned.c
>>>>> +++ b/block/blk-zoned.c
>>>>> @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>>>>  		return -ENOTTY;
>>>>>  	}
>>>>>
>>>>> +	if (zmgmt.flags & BLK_ZONE_SELECT_ALL)
>>>>> +		op |= REQ_ZONE_ALL;
>>>>> +
>>>>>  	return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors,
>>>>>  				GFP_KERNEL);
>>>>>  }
>>>>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>>>>> index ccb895f911b1..16b57fb2b99c 100644
>>>>> --- a/include/linux/blk_types.h
>>>>> +++ b/include/linux/blk_types.h
>>>>> @@ -351,6 +351,7 @@ enum req_flag_bits {
>>>>>  	 * work item to avoid such priority inversions.
>>>>>  	 */
>>>>>  	__REQ_CGROUP_PUNT,
>>>>> +	__REQ_ZONE_ALL,		/* apply zone operation to all zones */
>>>>>
>>>>>  	/* command specific flags for REQ_OP_WRITE_ZEROES: */
>>>>>  	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
>>>>> @@ -378,7 +379,7 @@ enum req_flag_bits {
>>>>>  #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
>>>>>  #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
>>>>>  #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
>>>>> -
>>>>> +#define REQ_ZONE_ALL		(1ULL << __REQ_ZONE_ALL)
>>>>>  #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
>>>>>  #define REQ_HIPRI		(1ULL << __REQ_HIPRI)
>>>>>
>>>>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>>>>> index 07b5fde21d9f..a8c89fe58f97 100644
>>>>> --- a/include/uapi/linux/blkzoned.h
>>>>> +++ b/include/uapi/linux/blkzoned.h
>>>>> @@ -157,6 +157,15 @@ enum blk_zone_action {
>>>>>  	BLK_ZONE_MGMT_RESET	= 0x4,
>>>>>  };
>>>>>
>>>>> +/**
>>>>> + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt
>>>>> + *
>>>>> + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action
>>>>> + */
>>>>> +enum blk_zone_mgmt_flags {
>>>>> +	BLK_ZONE_SELECT_ALL	= 1 << 0,
>>>>> +};
>>>>> +
>>>>>  /**
>>>>>   * struct blk_zone_mgmt - Extended zoned management
>>>>>   *
>>>>>
>>>>
>>>> NACK.
>>>>
>>>> Details:
>>>> 1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as
>>>> REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that
>>>> already exists.
>>>> 2) The patch introduces REQ_ZONE_ALL at the block layer only without defining
>>>> how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that
>>>> the zone management commands are to be executed with the ALL bit set ? If yes,
>>>> that will break device-mapper. See the special code for handling
>>>> REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block
>>>> device may not be an entire physical device. In that case, applying a zone
>>>> management command to all zones of the physical drive is wrong.
>>>> 3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0
>>>> .. drive capacity]. So what is the point ? The current interface handles that.
>>>> That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now.
>>>> 4) Without any in-kernel user, I do not see the point. And for applications, I
>>>> do not see any good use case for doing open all, close all, offline all or
>>>> finish all. If you have any such good use case, please elaborate.
>>>>
>>>
>>> The main use if reset all, but without having to look through all zones,
>>> as it imposes an overhead when we have a large number of zones. Having
>>> the possibility to offload it to HW is more efficient.
>>>
>>> I had not thought about the device mapper use case. Would it be an
>>> option to translate this into REQ_OP_ZONE_RESET_ALL when we have a
>>> device mapper (or any other case where this might break) and then leave
>>> the bit go to the driver if it applies to the whole device?
>>
>> But REQ_OP_ZONE_RESET_ALL is already implemented and supported and will reset
>> all zones of a drive using a single command if the ioctl is called for the
>> entire sector range of a physical drive. For device mapper with a partial
>> mapping, the per zone reset loop will be used. If you have no other use case for
>> the REQ_ZONE_ALL flag, what is the point here ? Reset is already optimized for
>> the all zones case
> 
> OK. I might have missed this. I thought we were sending several commands
> instead of a single reset with the bit. I will check again. Thanks for
> pointing at this.

In block/blk-zoned.c, there is:

static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
                                                sector_t sector,
                                                sector_t nr_sectors)
{
        if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
                return false;

        /*
         * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
         * of the applicable zone range is the entire disk.
         */
        return !sector && nr_sectors == get_capacity(bdev->bd_disk);
}

And:

int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
                     sector_t sector, sector_t nr_sectors,
                     gfp_t gfp_mask)
{
	...

	while (sector < end_sector) {
                bio = blk_next_bio(bio, 0, gfp_mask);
                bio_set_dev(bio, bdev);

                /*
                 * Special case for the zone reset operation that reset all
                 * zones, this is useful for applications like mkfs.
                 */
                if (op == REQ_OP_ZONE_RESET &&
                    blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
                        bio->bi_opf = REQ_OP_ZONE_RESET_ALL;
                        break;
			^^^^^ This means one command only...

                }

                bio->bi_opf = op | REQ_SYNC;
                bio->bi_iter.bi_sector = sector;
                sector += zone_sectors;

                /* This may take a while, so be nice to others */
                cond_resched();
        }

        ret = submit_bio_wait(bio);
        bio_put(bio);

        return ret;
}

And see scsi/sd_zbc.c and zns.c. REQ_OP_ZONE_RESET_ALL end up setting the ALL
bit for reset command.


> 
> Javier
>
diff mbox series

Patch

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index e87c60004dc5..29194388a1bb 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -420,6 +420,9 @@  int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 		return -ENOTTY;
 	}
 
+	if (zmgmt.flags & BLK_ZONE_SELECT_ALL)
+		op |= REQ_ZONE_ALL;
+
 	return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors,
 				GFP_KERNEL);
 }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index ccb895f911b1..16b57fb2b99c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -351,6 +351,7 @@  enum req_flag_bits {
 	 * work item to avoid such priority inversions.
 	 */
 	__REQ_CGROUP_PUNT,
+	__REQ_ZONE_ALL,		/* apply zone operation to all zones */
 
 	/* command specific flags for REQ_OP_WRITE_ZEROES: */
 	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
@@ -378,7 +379,7 @@  enum req_flag_bits {
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
 #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
-
+#define REQ_ZONE_ALL		(1ULL << __REQ_ZONE_ALL)
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
 #define REQ_HIPRI		(1ULL << __REQ_HIPRI)
 
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index 07b5fde21d9f..a8c89fe58f97 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -157,6 +157,15 @@  enum blk_zone_action {
 	BLK_ZONE_MGMT_RESET	= 0x4,
 };
 
+/**
+ * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt
+ *
+ * BLK_ZONE_SELECT_ALL: Select all zones for current zone action
+ */
+enum blk_zone_mgmt_flags {
+	BLK_ZONE_SELECT_ALL	= 1 << 0,
+};
+
 /**
  * struct blk_zone_mgmt - Extended zoned management
  *