Message ID | 20240530054035.491497-4-dlemoal@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Zone write plugging and DM zone fixes | expand |
On Thu, May 30, 2024 at 02:40:34PM +0900, Damien Le Moal wrote: > A zoned device may have a last sequential write required zone that is > smaller than other zones. However, all tests to check if a zone write > plug write offset exceeds the zone capacity use the same capacity > value stored in the gendisk zone_capacity field. This is incorrect for a > zoned device with a last runt (smaller) zone. > > Add the new field last_zone_capacity to struct gendisk to store the > capacity of the last zone of the device. blk_revalidate_seq_zone() and > blk_revalidate_conv_zone() are both modified to get this value when > disk_zone_is_last() returns true. Similarly to zone_capacity, the value > is first stored using the last_zone_capacity field of struct > blk_revalidate_zone_args. Once zone revalidation of all zones is done, > this is used to set the gendisk last_zone_capacity field. > > The checks to determine if a zone is full or if a sector offset in a > zone exceeds the zone capacity in disk_should_remove_zone_wplug(), > disk_zone_wplug_abort_unaligned(), blk_zone_write_plug_init_request(), > and blk_zone_wplug_prepare_bio() are modified to use the new helper > functions disk_zone_is_full() and disk_zone_wplug_is_full(). > disk_zone_is_full() uses the zone index to determine if the zone being > tested is the last one of the disk and uses the either the disk > zone_capacity or last_zone_capacity accordingly. > > Fixes: dd291d77cc90 ("block: Introduce zone write plugging") > Signed-off-by: Damien Le Moal <dlemoal@kernel.org> > --- > block/blk-zoned.c | 35 +++++++++++++++++++++++++++-------- > include/linux/blkdev.h | 1 + > 2 files changed, 28 insertions(+), 8 deletions(-) > > diff --git a/block/blk-zoned.c b/block/blk-zoned.c > index 402a50a1ac4d..52abebf56027 100644 > --- a/block/blk-zoned.c > +++ b/block/blk-zoned.c > @@ -455,6 +455,20 @@ static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone) > return zone->start + zone->len >= get_capacity(disk); > } > > +static bool disk_zone_is_full(struct gendisk *disk, > + unsigned int zno, unsigned int offset_in_zone) Why not just call the third parameter wp? > +{ > + if (zno < disk->nr_zones - 1) > + return offset_in_zone >= disk->zone_capacity; > + return offset_in_zone >= disk->last_zone_capacity; > +} > + > +static bool disk_zone_wplug_is_full(struct gendisk *disk, > + struct blk_zone_wplug *zwplug) > +{ > + return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset); > +} > + > static bool disk_insert_zone_wplug(struct gendisk *disk, > struct blk_zone_wplug *zwplug) > { > @@ -548,7 +562,7 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk, > return false; > > /* We can remove zone write plugs for zones that are empty or full. */ > - return !zwplug->wp_offset || zwplug->wp_offset >= disk->zone_capacity; > + return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug); > } > > static void disk_remove_zone_wplug(struct gendisk *disk, > @@ -669,13 +683,12 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) > static void disk_zone_wplug_abort_unaligned(struct gendisk *disk, > struct blk_zone_wplug *zwplug) > { > - unsigned int zone_capacity = disk->zone_capacity; > unsigned int wp_offset = zwplug->wp_offset; > struct bio_list bl = BIO_EMPTY_LIST; > struct bio *bio; > > while ((bio = bio_list_pop(&zwplug->bio_list))) { > - if (wp_offset >= zone_capacity || > + if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) || Why don't you use disk_zone_wplug_is_full() here? > (bio_op(bio) != REQ_OP_ZONE_APPEND && > bio_offset_from_zone_start(bio) != wp_offset)) { > blk_zone_wplug_bio_io_error(zwplug, bio); > @@ -914,7 +927,6 @@ void blk_zone_write_plug_init_request(struct request *req) > sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req); > struct request_queue *q = req->q; > struct gendisk *disk = q->disk; > - unsigned int zone_capacity = disk->zone_capacity; > struct blk_zone_wplug *zwplug = > disk_get_zone_wplug(disk, blk_rq_pos(req)); > unsigned long flags; > @@ -938,7 +950,7 @@ void blk_zone_write_plug_init_request(struct request *req) > * into the back of the request. > */ > spin_lock_irqsave(&zwplug->lock, flags); > - while (zwplug->wp_offset < zone_capacity) { > + while (!disk_zone_wplug_is_full(disk, zwplug)) { > bio = bio_list_peek(&zwplug->bio_list); > if (!bio) > break; > @@ -984,7 +996,7 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, > * We know such BIO will fail, and that would potentially overflow our > * write pointer offset beyond the end of the zone. > */ > - if (zwplug->wp_offset >= disk->zone_capacity) > + if (disk_zone_wplug_is_full(disk, zwplug)) > goto err; > > if (bio_op(bio) == REQ_OP_ZONE_APPEND) { > @@ -1561,6 +1573,7 @@ void disk_free_zone_resources(struct gendisk *disk) > kfree(disk->conv_zones_bitmap); > disk->conv_zones_bitmap = NULL; > disk->zone_capacity = 0; > + disk->last_zone_capacity = 0; > disk->nr_zones = 0; > } > > @@ -1605,6 +1618,7 @@ struct blk_revalidate_zone_args { > unsigned long *conv_zones_bitmap; > unsigned int nr_zones; > unsigned int zone_capacity; > + unsigned int last_zone_capacity; > sector_t sector; > }; > > @@ -1622,6 +1636,7 @@ static int disk_update_zone_resources(struct gendisk *disk, > > disk->nr_zones = args->nr_zones; > disk->zone_capacity = args->zone_capacity; > + disk->last_zone_capacity = args->last_zone_capacity; > swap(disk->conv_zones_bitmap, args->conv_zones_bitmap); > if (disk->conv_zones_bitmap) > nr_conv_zones = bitmap_weight(disk->conv_zones_bitmap, > @@ -1673,6 +1688,9 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, > return -ENODEV; > } > > + if (disk_zone_is_last(disk, zone)) > + args->last_zone_capacity = zone->capacity; > + > if (!disk_need_zone_resources(disk)) > return 0; > > @@ -1703,8 +1721,9 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, > */ > if (!args->zone_capacity) > args->zone_capacity = zone->capacity; > - if (!disk_zone_is_last(disk, zone) && > - zone->capacity != args->zone_capacity) { > + if (disk_zone_is_last(disk, zone)) { > + args->last_zone_capacity = zone->capacity; > + } else if (zone->capacity != args->zone_capacity) { > pr_warn("%s: Invalid variable zone capacity\n", > disk->disk_name); > return -ENODEV; > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index aefdda9f4ec7..24c36929920b 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -186,6 +186,7 @@ struct gendisk { > */ > unsigned int nr_zones; > unsigned int zone_capacity; > + unsigned int last_zone_capacity; > unsigned long *conv_zones_bitmap; > unsigned int zone_wplugs_hash_bits; > spinlock_t zone_wplugs_lock; > -- > 2.45.1 >
On 5/30/24 16:37, Niklas Cassel wrote: [...] >> +static bool disk_zone_is_full(struct gendisk *disk, >> + unsigned int zno, unsigned int offset_in_zone) > > Why not just call the third parameter wp? Because it does not have to be a plug write pointer. And even then, zone write plugging uses offset in a zone as write pointer values :) [...] >> static void disk_remove_zone_wplug(struct gendisk *disk, >> @@ -669,13 +683,12 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) >> static void disk_zone_wplug_abort_unaligned(struct gendisk *disk, >> struct blk_zone_wplug *zwplug) >> { >> - unsigned int zone_capacity = disk->zone_capacity; >> unsigned int wp_offset = zwplug->wp_offset; >> struct bio_list bl = BIO_EMPTY_LIST; >> struct bio *bio; >> >> while ((bio = bio_list_pop(&zwplug->bio_list))) { >> - if (wp_offset >= zone_capacity || >> + if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) || > > Why don't you use disk_zone_wplug_is_full() here? Because this function does not modify the zone write plug write offset. So we cannot use it.
On Thu, May 30, 2024 at 02:40:34PM +0900, Damien Le Moal wrote: > A zoned device may have a last sequential write required zone that is > smaller than other zones. However, all tests to check if a zone write > plug write offset exceeds the zone capacity use the same capacity > value stored in the gendisk zone_capacity field. This is incorrect for a > zoned device with a last runt (smaller) zone. > > Add the new field last_zone_capacity to struct gendisk to store the > capacity of the last zone of the device. blk_revalidate_seq_zone() and > blk_revalidate_conv_zone() are both modified to get this value when > disk_zone_is_last() returns true. Similarly to zone_capacity, the value > is first stored using the last_zone_capacity field of struct > blk_revalidate_zone_args. Once zone revalidation of all zones is done, > this is used to set the gendisk last_zone_capacity field. > > The checks to determine if a zone is full or if a sector offset in a > zone exceeds the zone capacity in disk_should_remove_zone_wplug(), > disk_zone_wplug_abort_unaligned(), blk_zone_write_plug_init_request(), > and blk_zone_wplug_prepare_bio() are modified to use the new helper > functions disk_zone_is_full() and disk_zone_wplug_is_full(). > disk_zone_is_full() uses the zone index to determine if the zone being > tested is the last one of the disk and uses the either the disk > zone_capacity or last_zone_capacity accordingly. > > Fixes: dd291d77cc90 ("block: Introduce zone write plugging") > Signed-off-by: Damien Le Moal <dlemoal@kernel.org> > --- Reviewed-by: Niklas Cassel <cassel@kernel.org>
On 5/29/24 22:40, Damien Le Moal wrote: > A zoned device may have a last sequential write required zone that is > smaller than other zones. However, all tests to check if a zone write > plug write offset exceeds the zone capacity use the same capacity > value stored in the gendisk zone_capacity field. This is incorrect for a > zoned device with a last runt (smaller) zone. > > Add the new field last_zone_capacity to struct gendisk to store the > capacity of the last zone of the device. blk_revalidate_seq_zone() and > blk_revalidate_conv_zone() are both modified to get this value when > disk_zone_is_last() returns true. Similarly to zone_capacity, the value > is first stored using the last_zone_capacity field of struct > blk_revalidate_zone_args. Once zone revalidation of all zones is done, > this is used to set the gendisk last_zone_capacity field. > > The checks to determine if a zone is full or if a sector offset in a > zone exceeds the zone capacity in disk_should_remove_zone_wplug(), > disk_zone_wplug_abort_unaligned(), blk_zone_write_plug_init_request(), > and blk_zone_wplug_prepare_bio() are modified to use the new helper > functions disk_zone_is_full() and disk_zone_wplug_is_full(). > disk_zone_is_full() uses the zone index to determine if the zone being > tested is the last one of the disk and uses the either the disk > zone_capacity or last_zone_capacity accordingly. Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
On 5/30/24 07:40, Damien Le Moal wrote: > A zoned device may have a last sequential write required zone that is > smaller than other zones. However, all tests to check if a zone write > plug write offset exceeds the zone capacity use the same capacity > value stored in the gendisk zone_capacity field. This is incorrect for a > zoned device with a last runt (smaller) zone. > > Add the new field last_zone_capacity to struct gendisk to store the > capacity of the last zone of the device. blk_revalidate_seq_zone() and > blk_revalidate_conv_zone() are both modified to get this value when > disk_zone_is_last() returns true. Similarly to zone_capacity, the value > is first stored using the last_zone_capacity field of struct > blk_revalidate_zone_args. Once zone revalidation of all zones is done, > this is used to set the gendisk last_zone_capacity field. > > The checks to determine if a zone is full or if a sector offset in a > zone exceeds the zone capacity in disk_should_remove_zone_wplug(), > disk_zone_wplug_abort_unaligned(), blk_zone_write_plug_init_request(), > and blk_zone_wplug_prepare_bio() are modified to use the new helper > functions disk_zone_is_full() and disk_zone_wplug_is_full(). > disk_zone_is_full() uses the zone index to determine if the zone being > tested is the last one of the disk and uses the either the disk > zone_capacity or last_zone_capacity accordingly. > > Fixes: dd291d77cc90 ("block: Introduce zone write plugging") > Signed-off-by: Damien Le Moal <dlemoal@kernel.org> > --- > block/blk-zoned.c | 35 +++++++++++++++++++++++++++-------- > include/linux/blkdev.h | 1 + > 2 files changed, 28 insertions(+), 8 deletions(-) > Reviewed-by: Hannes Reinecke <hare@suse.de> Cheers, Hannes
diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 402a50a1ac4d..52abebf56027 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -455,6 +455,20 @@ static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone) return zone->start + zone->len >= get_capacity(disk); } +static bool disk_zone_is_full(struct gendisk *disk, + unsigned int zno, unsigned int offset_in_zone) +{ + if (zno < disk->nr_zones - 1) + return offset_in_zone >= disk->zone_capacity; + return offset_in_zone >= disk->last_zone_capacity; +} + +static bool disk_zone_wplug_is_full(struct gendisk *disk, + struct blk_zone_wplug *zwplug) +{ + return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset); +} + static bool disk_insert_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { @@ -548,7 +562,7 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk, return false; /* We can remove zone write plugs for zones that are empty or full. */ - return !zwplug->wp_offset || zwplug->wp_offset >= disk->zone_capacity; + return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug); } static void disk_remove_zone_wplug(struct gendisk *disk, @@ -669,13 +683,12 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) static void disk_zone_wplug_abort_unaligned(struct gendisk *disk, struct blk_zone_wplug *zwplug) { - unsigned int zone_capacity = disk->zone_capacity; unsigned int wp_offset = zwplug->wp_offset; struct bio_list bl = BIO_EMPTY_LIST; struct bio *bio; while ((bio = bio_list_pop(&zwplug->bio_list))) { - if (wp_offset >= zone_capacity || + if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) || (bio_op(bio) != REQ_OP_ZONE_APPEND && bio_offset_from_zone_start(bio) != wp_offset)) { blk_zone_wplug_bio_io_error(zwplug, bio); @@ -914,7 +927,6 @@ void blk_zone_write_plug_init_request(struct request *req) sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req); struct request_queue *q = req->q; struct gendisk *disk = q->disk; - unsigned int zone_capacity = disk->zone_capacity; struct blk_zone_wplug *zwplug = disk_get_zone_wplug(disk, blk_rq_pos(req)); unsigned long flags; @@ -938,7 +950,7 @@ void blk_zone_write_plug_init_request(struct request *req) * into the back of the request. */ spin_lock_irqsave(&zwplug->lock, flags); - while (zwplug->wp_offset < zone_capacity) { + while (!disk_zone_wplug_is_full(disk, zwplug)) { bio = bio_list_peek(&zwplug->bio_list); if (!bio) break; @@ -984,7 +996,7 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, * We know such BIO will fail, and that would potentially overflow our * write pointer offset beyond the end of the zone. */ - if (zwplug->wp_offset >= disk->zone_capacity) + if (disk_zone_wplug_is_full(disk, zwplug)) goto err; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { @@ -1561,6 +1573,7 @@ void disk_free_zone_resources(struct gendisk *disk) kfree(disk->conv_zones_bitmap); disk->conv_zones_bitmap = NULL; disk->zone_capacity = 0; + disk->last_zone_capacity = 0; disk->nr_zones = 0; } @@ -1605,6 +1618,7 @@ struct blk_revalidate_zone_args { unsigned long *conv_zones_bitmap; unsigned int nr_zones; unsigned int zone_capacity; + unsigned int last_zone_capacity; sector_t sector; }; @@ -1622,6 +1636,7 @@ static int disk_update_zone_resources(struct gendisk *disk, disk->nr_zones = args->nr_zones; disk->zone_capacity = args->zone_capacity; + disk->last_zone_capacity = args->last_zone_capacity; swap(disk->conv_zones_bitmap, args->conv_zones_bitmap); if (disk->conv_zones_bitmap) nr_conv_zones = bitmap_weight(disk->conv_zones_bitmap, @@ -1673,6 +1688,9 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, return -ENODEV; } + if (disk_zone_is_last(disk, zone)) + args->last_zone_capacity = zone->capacity; + if (!disk_need_zone_resources(disk)) return 0; @@ -1703,8 +1721,9 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, */ if (!args->zone_capacity) args->zone_capacity = zone->capacity; - if (!disk_zone_is_last(disk, zone) && - zone->capacity != args->zone_capacity) { + if (disk_zone_is_last(disk, zone)) { + args->last_zone_capacity = zone->capacity; + } else if (zone->capacity != args->zone_capacity) { pr_warn("%s: Invalid variable zone capacity\n", disk->disk_name); return -ENODEV; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aefdda9f4ec7..24c36929920b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -186,6 +186,7 @@ struct gendisk { */ unsigned int nr_zones; unsigned int zone_capacity; + unsigned int last_zone_capacity; unsigned long *conv_zones_bitmap; unsigned int zone_wplugs_hash_bits; spinlock_t zone_wplugs_lock;
A zoned device may have a last sequential write required zone that is smaller than other zones. However, all tests to check if a zone write plug write offset exceeds the zone capacity use the same capacity value stored in the gendisk zone_capacity field. This is incorrect for a zoned device with a last runt (smaller) zone. Add the new field last_zone_capacity to struct gendisk to store the capacity of the last zone of the device. blk_revalidate_seq_zone() and blk_revalidate_conv_zone() are both modified to get this value when disk_zone_is_last() returns true. Similarly to zone_capacity, the value is first stored using the last_zone_capacity field of struct blk_revalidate_zone_args. Once zone revalidation of all zones is done, this is used to set the gendisk last_zone_capacity field. The checks to determine if a zone is full or if a sector offset in a zone exceeds the zone capacity in disk_should_remove_zone_wplug(), disk_zone_wplug_abort_unaligned(), blk_zone_write_plug_init_request(), and blk_zone_wplug_prepare_bio() are modified to use the new helper functions disk_zone_is_full() and disk_zone_wplug_is_full(). disk_zone_is_full() uses the zone index to determine if the zone being tested is the last one of the disk and uses the either the disk zone_capacity or last_zone_capacity accordingly. Fixes: dd291d77cc90 ("block: Introduce zone write plugging") Signed-off-by: Damien Le Moal <dlemoal@kernel.org> --- block/blk-zoned.c | 35 +++++++++++++++++++++++++++-------- include/linux/blkdev.h | 1 + 2 files changed, 28 insertions(+), 8 deletions(-)