Message ID | 20200625122152.17359-3-javier@javigon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ZNS: Extra features for current patches | expand |
On 2020/06/25 21:22, Javier González wrote: > From: Javier González <javier.gonz@samsung.com> > > Add flag to allow selecting all zones on a single zone management > operation > > Signed-off-by: Javier González <javier.gonz@samsung.com> > Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com> > Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > --- > block/blk-zoned.c | 3 +++ > include/linux/blk_types.h | 3 ++- > include/uapi/linux/blkzoned.h | 9 +++++++++ > 3 files changed, 14 insertions(+), 1 deletion(-) > > diff --git a/block/blk-zoned.c b/block/blk-zoned.c > index e87c60004dc5..29194388a1bb 100644 > --- a/block/blk-zoned.c > +++ b/block/blk-zoned.c > @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, > return -ENOTTY; > } > > + if (zmgmt.flags & BLK_ZONE_SELECT_ALL) > + op |= REQ_ZONE_ALL; > + > return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors, > GFP_KERNEL); > } > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > index ccb895f911b1..16b57fb2b99c 100644 > --- a/include/linux/blk_types.h > +++ b/include/linux/blk_types.h > @@ -351,6 +351,7 @@ enum req_flag_bits { > * work item to avoid such priority inversions. > */ > __REQ_CGROUP_PUNT, > + __REQ_ZONE_ALL, /* apply zone operation to all zones */ > > /* command specific flags for REQ_OP_WRITE_ZEROES: */ > __REQ_NOUNMAP, /* do not free blocks when zeroing */ > @@ -378,7 +379,7 @@ enum req_flag_bits { > #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) > #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) > #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) > - > +#define REQ_ZONE_ALL (1ULL << __REQ_ZONE_ALL) > #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) > #define REQ_HIPRI (1ULL << __REQ_HIPRI) > > diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h > index 07b5fde21d9f..a8c89fe58f97 100644 > --- a/include/uapi/linux/blkzoned.h > +++ b/include/uapi/linux/blkzoned.h > @@ -157,6 +157,15 @@ enum blk_zone_action { > BLK_ZONE_MGMT_RESET = 0x4, > }; > > +/** > + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt > + * > + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action > + */ > +enum blk_zone_mgmt_flags { > + BLK_ZONE_SELECT_ALL = 1 << 0, > +}; > + > /** > * struct blk_zone_mgmt - Extended zoned management > * > NACK. Details: 1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that already exists. 2) The patch introduces REQ_ZONE_ALL at the block layer only without defining how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that the zone management commands are to be executed with the ALL bit set ? If yes, that will break device-mapper. See the special code for handling REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block device may not be an entire physical device. In that case, applying a zone management command to all zones of the physical drive is wrong. 3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0 .. drive capacity]. So what is the point ? The current interface handles that. That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now. 4) Without any in-kernel user, I do not see the point. And for applications, I do not see any good use case for doing open all, close all, offline all or finish all. If you have any such good use case, please elaborate.
On 26.06.2020 01:27, Damien Le Moal wrote: >On 2020/06/25 21:22, Javier González wrote: >> From: Javier González <javier.gonz@samsung.com> >> >> Add flag to allow selecting all zones on a single zone management >> operation >> >> Signed-off-by: Javier González <javier.gonz@samsung.com> >> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com> >> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> >> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> >> --- >> block/blk-zoned.c | 3 +++ >> include/linux/blk_types.h | 3 ++- >> include/uapi/linux/blkzoned.h | 9 +++++++++ >> 3 files changed, 14 insertions(+), 1 deletion(-) >> >> diff --git a/block/blk-zoned.c b/block/blk-zoned.c >> index e87c60004dc5..29194388a1bb 100644 >> --- a/block/blk-zoned.c >> +++ b/block/blk-zoned.c >> @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, >> return -ENOTTY; >> } >> >> + if (zmgmt.flags & BLK_ZONE_SELECT_ALL) >> + op |= REQ_ZONE_ALL; >> + >> return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors, >> GFP_KERNEL); >> } >> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h >> index ccb895f911b1..16b57fb2b99c 100644 >> --- a/include/linux/blk_types.h >> +++ b/include/linux/blk_types.h >> @@ -351,6 +351,7 @@ enum req_flag_bits { >> * work item to avoid such priority inversions. >> */ >> __REQ_CGROUP_PUNT, >> + __REQ_ZONE_ALL, /* apply zone operation to all zones */ >> >> /* command specific flags for REQ_OP_WRITE_ZEROES: */ >> __REQ_NOUNMAP, /* do not free blocks when zeroing */ >> @@ -378,7 +379,7 @@ enum req_flag_bits { >> #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) >> #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) >> #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) >> - >> +#define REQ_ZONE_ALL (1ULL << __REQ_ZONE_ALL) >> #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) >> #define REQ_HIPRI (1ULL << __REQ_HIPRI) >> >> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h >> index 07b5fde21d9f..a8c89fe58f97 100644 >> --- a/include/uapi/linux/blkzoned.h >> +++ b/include/uapi/linux/blkzoned.h >> @@ -157,6 +157,15 @@ enum blk_zone_action { >> BLK_ZONE_MGMT_RESET = 0x4, >> }; >> >> +/** >> + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt >> + * >> + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action >> + */ >> +enum blk_zone_mgmt_flags { >> + BLK_ZONE_SELECT_ALL = 1 << 0, >> +}; >> + >> /** >> * struct blk_zone_mgmt - Extended zoned management >> * >> > >NACK. > >Details: >1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as >REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that >already exists. >2) The patch introduces REQ_ZONE_ALL at the block layer only without defining >how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that >the zone management commands are to be executed with the ALL bit set ? If yes, >that will break device-mapper. See the special code for handling >REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block >device may not be an entire physical device. In that case, applying a zone >management command to all zones of the physical drive is wrong. >3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0 >.. drive capacity]. So what is the point ? The current interface handles that. >That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now. >4) Without any in-kernel user, I do not see the point. And for applications, I >do not see any good use case for doing open all, close all, offline all or >finish all. If you have any such good use case, please elaborate. > The main use if reset all, but without having to look through all zones, as it imposes an overhead when we have a large number of zones. Having the possibility to offload it to HW is more efficient. I had not thought about the device mapper use case. Would it be an option to translate this into REQ_OP_ZONE_RESET_ALL when we have a device mapper (or any other case where this might break) and then leave the bit go to the driver if it applies to the whole device? Javier
On 2020/06/26 14:58, Javier González wrote: > On 26.06.2020 01:27, Damien Le Moal wrote: >> On 2020/06/25 21:22, Javier González wrote: >>> From: Javier González <javier.gonz@samsung.com> >>> >>> Add flag to allow selecting all zones on a single zone management >>> operation >>> >>> Signed-off-by: Javier González <javier.gonz@samsung.com> >>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com> >>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> >>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> >>> --- >>> block/blk-zoned.c | 3 +++ >>> include/linux/blk_types.h | 3 ++- >>> include/uapi/linux/blkzoned.h | 9 +++++++++ >>> 3 files changed, 14 insertions(+), 1 deletion(-) >>> >>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c >>> index e87c60004dc5..29194388a1bb 100644 >>> --- a/block/blk-zoned.c >>> +++ b/block/blk-zoned.c >>> @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, >>> return -ENOTTY; >>> } >>> >>> + if (zmgmt.flags & BLK_ZONE_SELECT_ALL) >>> + op |= REQ_ZONE_ALL; >>> + >>> return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors, >>> GFP_KERNEL); >>> } >>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h >>> index ccb895f911b1..16b57fb2b99c 100644 >>> --- a/include/linux/blk_types.h >>> +++ b/include/linux/blk_types.h >>> @@ -351,6 +351,7 @@ enum req_flag_bits { >>> * work item to avoid such priority inversions. >>> */ >>> __REQ_CGROUP_PUNT, >>> + __REQ_ZONE_ALL, /* apply zone operation to all zones */ >>> >>> /* command specific flags for REQ_OP_WRITE_ZEROES: */ >>> __REQ_NOUNMAP, /* do not free blocks when zeroing */ >>> @@ -378,7 +379,7 @@ enum req_flag_bits { >>> #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) >>> #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) >>> #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) >>> - >>> +#define REQ_ZONE_ALL (1ULL << __REQ_ZONE_ALL) >>> #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) >>> #define REQ_HIPRI (1ULL << __REQ_HIPRI) >>> >>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h >>> index 07b5fde21d9f..a8c89fe58f97 100644 >>> --- a/include/uapi/linux/blkzoned.h >>> +++ b/include/uapi/linux/blkzoned.h >>> @@ -157,6 +157,15 @@ enum blk_zone_action { >>> BLK_ZONE_MGMT_RESET = 0x4, >>> }; >>> >>> +/** >>> + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt >>> + * >>> + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action >>> + */ >>> +enum blk_zone_mgmt_flags { >>> + BLK_ZONE_SELECT_ALL = 1 << 0, >>> +}; >>> + >>> /** >>> * struct blk_zone_mgmt - Extended zoned management >>> * >>> >> >> NACK. >> >> Details: >> 1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as >> REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that >> already exists. >> 2) The patch introduces REQ_ZONE_ALL at the block layer only without defining >> how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that >> the zone management commands are to be executed with the ALL bit set ? If yes, >> that will break device-mapper. See the special code for handling >> REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block >> device may not be an entire physical device. In that case, applying a zone >> management command to all zones of the physical drive is wrong. >> 3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0 >> .. drive capacity]. So what is the point ? The current interface handles that. >> That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now. >> 4) Without any in-kernel user, I do not see the point. And for applications, I >> do not see any good use case for doing open all, close all, offline all or >> finish all. If you have any such good use case, please elaborate. >> > > The main use if reset all, but without having to look through all zones, > as it imposes an overhead when we have a large number of zones. Having > the possibility to offload it to HW is more efficient. > > I had not thought about the device mapper use case. Would it be an > option to translate this into REQ_OP_ZONE_RESET_ALL when we have a > device mapper (or any other case where this might break) and then leave > the bit go to the driver if it applies to the whole device? But REQ_OP_ZONE_RESET_ALL is already implemented and supported and will reset all zones of a drive using a single command if the ioctl is called for the entire sector range of a physical drive. For device mapper with a partial mapping, the per zone reset loop will be used. If you have no other use case for the REQ_ZONE_ALL flag, what is the point here ? Reset is already optimized for the all zones case > > Javier >
On 26.06.2020 06:35, Damien Le Moal wrote: >On 2020/06/26 14:58, Javier González wrote: >> On 26.06.2020 01:27, Damien Le Moal wrote: >>> On 2020/06/25 21:22, Javier González wrote: >>>> From: Javier González <javier.gonz@samsung.com> >>>> >>>> Add flag to allow selecting all zones on a single zone management >>>> operation >>>> >>>> Signed-off-by: Javier González <javier.gonz@samsung.com> >>>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com> >>>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> >>>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> >>>> --- >>>> block/blk-zoned.c | 3 +++ >>>> include/linux/blk_types.h | 3 ++- >>>> include/uapi/linux/blkzoned.h | 9 +++++++++ >>>> 3 files changed, 14 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c >>>> index e87c60004dc5..29194388a1bb 100644 >>>> --- a/block/blk-zoned.c >>>> +++ b/block/blk-zoned.c >>>> @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, >>>> return -ENOTTY; >>>> } >>>> >>>> + if (zmgmt.flags & BLK_ZONE_SELECT_ALL) >>>> + op |= REQ_ZONE_ALL; >>>> + >>>> return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors, >>>> GFP_KERNEL); >>>> } >>>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h >>>> index ccb895f911b1..16b57fb2b99c 100644 >>>> --- a/include/linux/blk_types.h >>>> +++ b/include/linux/blk_types.h >>>> @@ -351,6 +351,7 @@ enum req_flag_bits { >>>> * work item to avoid such priority inversions. >>>> */ >>>> __REQ_CGROUP_PUNT, >>>> + __REQ_ZONE_ALL, /* apply zone operation to all zones */ >>>> >>>> /* command specific flags for REQ_OP_WRITE_ZEROES: */ >>>> __REQ_NOUNMAP, /* do not free blocks when zeroing */ >>>> @@ -378,7 +379,7 @@ enum req_flag_bits { >>>> #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) >>>> #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) >>>> #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) >>>> - >>>> +#define REQ_ZONE_ALL (1ULL << __REQ_ZONE_ALL) >>>> #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) >>>> #define REQ_HIPRI (1ULL << __REQ_HIPRI) >>>> >>>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h >>>> index 07b5fde21d9f..a8c89fe58f97 100644 >>>> --- a/include/uapi/linux/blkzoned.h >>>> +++ b/include/uapi/linux/blkzoned.h >>>> @@ -157,6 +157,15 @@ enum blk_zone_action { >>>> BLK_ZONE_MGMT_RESET = 0x4, >>>> }; >>>> >>>> +/** >>>> + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt >>>> + * >>>> + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action >>>> + */ >>>> +enum blk_zone_mgmt_flags { >>>> + BLK_ZONE_SELECT_ALL = 1 << 0, >>>> +}; >>>> + >>>> /** >>>> * struct blk_zone_mgmt - Extended zoned management >>>> * >>>> >>> >>> NACK. >>> >>> Details: >>> 1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as >>> REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that >>> already exists. >>> 2) The patch introduces REQ_ZONE_ALL at the block layer only without defining >>> how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that >>> the zone management commands are to be executed with the ALL bit set ? If yes, >>> that will break device-mapper. See the special code for handling >>> REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block >>> device may not be an entire physical device. In that case, applying a zone >>> management command to all zones of the physical drive is wrong. >>> 3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0 >>> .. drive capacity]. So what is the point ? The current interface handles that. >>> That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now. >>> 4) Without any in-kernel user, I do not see the point. And for applications, I >>> do not see any good use case for doing open all, close all, offline all or >>> finish all. If you have any such good use case, please elaborate. >>> >> >> The main use if reset all, but without having to look through all zones, >> as it imposes an overhead when we have a large number of zones. Having >> the possibility to offload it to HW is more efficient. >> >> I had not thought about the device mapper use case. Would it be an >> option to translate this into REQ_OP_ZONE_RESET_ALL when we have a >> device mapper (or any other case where this might break) and then leave >> the bit go to the driver if it applies to the whole device? > >But REQ_OP_ZONE_RESET_ALL is already implemented and supported and will reset >all zones of a drive using a single command if the ioctl is called for the >entire sector range of a physical drive. For device mapper with a partial >mapping, the per zone reset loop will be used. If you have no other use case for >the REQ_ZONE_ALL flag, what is the point here ? Reset is already optimized for >the all zones case OK. I might have missed this. I thought we were sending several commands instead of a single reset with the bit. I will check again. Thanks for pointing at this. Javier
On 2020/06/26 15:52, Javier González wrote: > On 26.06.2020 06:35, Damien Le Moal wrote: >> On 2020/06/26 14:58, Javier González wrote: >>> On 26.06.2020 01:27, Damien Le Moal wrote: >>>> On 2020/06/25 21:22, Javier González wrote: >>>>> From: Javier González <javier.gonz@samsung.com> >>>>> >>>>> Add flag to allow selecting all zones on a single zone management >>>>> operation >>>>> >>>>> Signed-off-by: Javier González <javier.gonz@samsung.com> >>>>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com> >>>>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> >>>>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> >>>>> --- >>>>> block/blk-zoned.c | 3 +++ >>>>> include/linux/blk_types.h | 3 ++- >>>>> include/uapi/linux/blkzoned.h | 9 +++++++++ >>>>> 3 files changed, 14 insertions(+), 1 deletion(-) >>>>> >>>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c >>>>> index e87c60004dc5..29194388a1bb 100644 >>>>> --- a/block/blk-zoned.c >>>>> +++ b/block/blk-zoned.c >>>>> @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, >>>>> return -ENOTTY; >>>>> } >>>>> >>>>> + if (zmgmt.flags & BLK_ZONE_SELECT_ALL) >>>>> + op |= REQ_ZONE_ALL; >>>>> + >>>>> return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors, >>>>> GFP_KERNEL); >>>>> } >>>>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h >>>>> index ccb895f911b1..16b57fb2b99c 100644 >>>>> --- a/include/linux/blk_types.h >>>>> +++ b/include/linux/blk_types.h >>>>> @@ -351,6 +351,7 @@ enum req_flag_bits { >>>>> * work item to avoid such priority inversions. >>>>> */ >>>>> __REQ_CGROUP_PUNT, >>>>> + __REQ_ZONE_ALL, /* apply zone operation to all zones */ >>>>> >>>>> /* command specific flags for REQ_OP_WRITE_ZEROES: */ >>>>> __REQ_NOUNMAP, /* do not free blocks when zeroing */ >>>>> @@ -378,7 +379,7 @@ enum req_flag_bits { >>>>> #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) >>>>> #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) >>>>> #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) >>>>> - >>>>> +#define REQ_ZONE_ALL (1ULL << __REQ_ZONE_ALL) >>>>> #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) >>>>> #define REQ_HIPRI (1ULL << __REQ_HIPRI) >>>>> >>>>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h >>>>> index 07b5fde21d9f..a8c89fe58f97 100644 >>>>> --- a/include/uapi/linux/blkzoned.h >>>>> +++ b/include/uapi/linux/blkzoned.h >>>>> @@ -157,6 +157,15 @@ enum blk_zone_action { >>>>> BLK_ZONE_MGMT_RESET = 0x4, >>>>> }; >>>>> >>>>> +/** >>>>> + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt >>>>> + * >>>>> + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action >>>>> + */ >>>>> +enum blk_zone_mgmt_flags { >>>>> + BLK_ZONE_SELECT_ALL = 1 << 0, >>>>> +}; >>>>> + >>>>> /** >>>>> * struct blk_zone_mgmt - Extended zoned management >>>>> * >>>>> >>>> >>>> NACK. >>>> >>>> Details: >>>> 1) REQ_OP_ZONE_RESET together with REQ_ZONE_ALL is the same as >>>> REQ_OP_ZONE_RESET_ALL, isn't it ? You are duplicating a functionality that >>>> already exists. >>>> 2) The patch introduces REQ_ZONE_ALL at the block layer only without defining >>>> how it ties into SCSI and NVMe driver use of it. Is REQ_ZONE_ALL indicating that >>>> the zone management commands are to be executed with the ALL bit set ? If yes, >>>> that will break device-mapper. See the special code for handling >>>> REQ_OP_ZONE_RESET_ALL. That code is in place for a reason: the target block >>>> device may not be an entire physical device. In that case, applying a zone >>>> management command to all zones of the physical drive is wrong. >>>> 3) REQ_ZONE_ALL seems completely equivalent to specifying a sector range of [0 >>>> .. drive capacity]. So what is the point ? The current interface handles that. >>>> That is how we chose between REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL right now. >>>> 4) Without any in-kernel user, I do not see the point. And for applications, I >>>> do not see any good use case for doing open all, close all, offline all or >>>> finish all. If you have any such good use case, please elaborate. >>>> >>> >>> The main use if reset all, but without having to look through all zones, >>> as it imposes an overhead when we have a large number of zones. Having >>> the possibility to offload it to HW is more efficient. >>> >>> I had not thought about the device mapper use case. Would it be an >>> option to translate this into REQ_OP_ZONE_RESET_ALL when we have a >>> device mapper (or any other case where this might break) and then leave >>> the bit go to the driver if it applies to the whole device? >> >> But REQ_OP_ZONE_RESET_ALL is already implemented and supported and will reset >> all zones of a drive using a single command if the ioctl is called for the >> entire sector range of a physical drive. For device mapper with a partial >> mapping, the per zone reset loop will be used. If you have no other use case for >> the REQ_ZONE_ALL flag, what is the point here ? Reset is already optimized for >> the all zones case > > OK. I might have missed this. I thought we were sending several commands > instead of a single reset with the bit. I will check again. Thanks for > pointing at this. In block/blk-zoned.c, there is: static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, sector_t sector, sector_t nr_sectors) { if (!blk_queue_zone_resetall(bdev_get_queue(bdev))) return false; /* * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors * of the applicable zone range is the entire disk. */ return !sector && nr_sectors == get_capacity(bdev->bd_disk); } And: int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sector, sector_t nr_sectors, gfp_t gfp_mask) { ... while (sector < end_sector) { bio = blk_next_bio(bio, 0, gfp_mask); bio_set_dev(bio, bdev); /* * Special case for the zone reset operation that reset all * zones, this is useful for applications like mkfs. */ if (op == REQ_OP_ZONE_RESET && blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) { bio->bi_opf = REQ_OP_ZONE_RESET_ALL; break; ^^^^^ This means one command only... } bio->bi_opf = op | REQ_SYNC; bio->bi_iter.bi_sector = sector; sector += zone_sectors; /* This may take a while, so be nice to others */ cond_resched(); } ret = submit_bio_wait(bio); bio_put(bio); return ret; } And see scsi/sd_zbc.c and zns.c. REQ_OP_ZONE_RESET_ALL end up setting the ALL bit for reset command. > > Javier >
diff --git a/block/blk-zoned.c b/block/blk-zoned.c index e87c60004dc5..29194388a1bb 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -420,6 +420,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, return -ENOTTY; } + if (zmgmt.flags & BLK_ZONE_SELECT_ALL) + op |= REQ_ZONE_ALL; + return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors, GFP_KERNEL); } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ccb895f911b1..16b57fb2b99c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -351,6 +351,7 @@ enum req_flag_bits { * work item to avoid such priority inversions. */ __REQ_CGROUP_PUNT, + __REQ_ZONE_ALL, /* apply zone operation to all zones */ /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ @@ -378,7 +379,7 @@ enum req_flag_bits { #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) - +#define REQ_ZONE_ALL (1ULL << __REQ_ZONE_ALL) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) #define REQ_HIPRI (1ULL << __REQ_HIPRI) diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 07b5fde21d9f..a8c89fe58f97 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -157,6 +157,15 @@ enum blk_zone_action { BLK_ZONE_MGMT_RESET = 0x4, }; +/** + * enum blk_zone_mgmt_flags - Flags for blk_zone_mgmt + * + * BLK_ZONE_SELECT_ALL: Select all zones for current zone action + */ +enum blk_zone_mgmt_flags { + BLK_ZONE_SELECT_ALL = 1 << 0, +}; + /** * struct blk_zone_mgmt - Extended zoned management *