[v2,3/5] dm: handle REQ_OP_ZONE_RESET_ALL

Message ID	20240704052816.623865-4-dlemoal@kernel.org (mailing list archive)
State	Not Applicable
Headers	show Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 55847747F; Thu, 4 Jul 2024 05:28:27 +0000 (UTC) From: Damien Le Moal <dlemoal@kernel.org> To: Jens Axboe <axboe@kernel.dk>, linux-block@vger.kernel.org, dm-devel@lists.linux.dev, Mike Snitzer <snitzer@kernel.org>, Mikulas Patocka <mpatocka@redhat.com>, linux-scsi@vger.kernel.org, "Martin K . Petersen" <martin.petersen@oracle.com>, Ming Lei <ming.lei@redhat.com>, "Michael S . Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, Christoph Hellwig <hch@lst.de> Subject: [PATCH v2 3/5] dm: handle REQ_OP_ZONE_RESET_ALL Date: Thu, 4 Jul 2024 14:28:14 +0900 Message-ID: <20240704052816.623865-4-dlemoal@kernel.org> In-Reply-To: <20240704052816.623865-1-dlemoal@kernel.org> References: <20240704052816.623865-1-dlemoal@kernel.org> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	Remove zone reset all emulation \| expand [v2,0/5] Remove zone reset all emulation [v2,1/5] null_blk: Introduce the zone_full parameter [v2,2/5] dm: Refactor is_abnormal_io() [v2,3/5] dm: handle REQ_OP_ZONE_RESET_ALL [v2,4/5] block: Remove REQ_OP_ZONE_RESET_ALL emulation [v2,5/5] block: Remove blk_alloc_zone_bitmap()

diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 4d37e53b50ee..c0d41c36e06e 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -292,10 +292,12 @@ static int device_get_zone_resource_limits(struct dm_target *ti, /* * If the target does not map all sequential zones, the limits - * will not be reliable. + * will not be reliable and we cannot use REQ_OP_ZONE_RESET_ALL. */ - if (zc.target_nr_seq_zones < zc.total_nr_seq_zones) + if (zc.target_nr_seq_zones < zc.total_nr_seq_zones) { zlim->reliable_limits = false; + ti->zone_reset_all_supported = false; + } /* * If the target maps less sequential zones than the limit values, then @@ -353,6 +355,14 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); + /* + * Assume that the target can accept REQ_OP_ZONE_RESET_ALL. + * device_get_zone_resource_limits() may adjust this if one of + * the device used by the target does not have all its + * sequential write required zones mapped. + */ + ti->zone_reset_all_supported = true; + if (!ti->type->iterate_devices || ti->type->iterate_devices(ti, device_get_zone_resource_limits, &zlim)) { @@ -420,3 +430,39 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone) return; } + +static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + /* + * For an all-zones reset, ignore conventional, empty, read-only + * and offline zones. + */ + switch (zone->cond) { + case BLK_ZONE_COND_NOT_WP: + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_READONLY: + case BLK_ZONE_COND_OFFLINE: + return 0; + default: + set_bit(idx, (unsigned long *)data); + return 0; + } +} + +int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t, + sector_t sector, unsigned int nr_zones, + unsigned long *need_reset) +{ + int ret; + + ret = dm_blk_do_report_zones(md, t, sector, nr_zones, + dm_zone_need_reset_cb, need_reset); + if (ret != nr_zones) { + DMERR("Get %s zone reset bitmap failed\n", + md->disk->disk_name); + return -EIO; + } + + return 0; +} diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0d80caccbd9e..4b1b69e576a5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1606,6 +1606,7 @@ static bool is_abnormal_io(struct bio *bio) case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: + case REQ_OP_ZONE_RESET_ALL: return true; default: return false; @@ -1774,6 +1775,119 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio) { return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0); } + +static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci, + struct dm_target *ti) +{ + struct bio_list blist = BIO_EMPTY_LIST; + struct mapped_device *md = ci->io->md; + unsigned int zone_sectors = md->disk->queue->limits.chunk_sectors; + unsigned long *need_reset; + unsigned int i, nr_zones, nr_reset; + unsigned int num_bios = 0; + blk_status_t sts = BLK_STS_OK; + sector_t sector = ti->begin; + struct bio *clone; + int ret; + + nr_zones = ti->len >> ilog2(zone_sectors); + need_reset = bitmap_zalloc(nr_zones, GFP_NOIO); + if (!need_reset) + return BLK_STS_RESOURCE; + + ret = dm_zone_get_reset_bitmap(md, ci->map, ti->begin, + nr_zones, need_reset); + if (ret) { + sts = BLK_STS_IOERR; + goto free_bitmap; + } + + /* If we have no zone to reset, we are done. */ + nr_reset = bitmap_weight(need_reset, nr_zones); + if (!nr_reset) + goto free_bitmap; + + atomic_add(nr_zones, &ci->io->io_count); + + for (i = 0; i < nr_zones; i++) { + + if (!test_bit(i, need_reset)) { + sector += zone_sectors; + continue; + } + + if (bio_list_empty(&blist)) { + /* This may take a while, so be nice to others */ + if (num_bios) + cond_resched(); + + /* + * We may need to reset thousands of zones, so let's + * not go crazy with the clone allocation. + */ + alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32), + NULL, GFP_NOIO); + } + + /* Get a clone and change it to a regular reset operation. */ + clone = bio_list_pop(&blist); + clone->bi_opf &= ~REQ_OP_MASK; + clone->bi_opf |= REQ_OP_ZONE_RESET | REQ_SYNC; + clone->bi_iter.bi_sector = sector; + clone->bi_iter.bi_size = 0; + __map_bio(clone); + + sector += zone_sectors; + num_bios++; + nr_reset--; + } + + WARN_ON_ONCE(!bio_list_empty(&blist)); + atomic_sub(nr_zones - num_bios, &ci->io->io_count); + ci->sector_count = 0; + +free_bitmap: + bitmap_free(need_reset); + + return sts; +} + +static void __send_zone_reset_all_native(struct clone_info *ci, + struct dm_target *ti) +{ + unsigned int bios; + + atomic_add(1, &ci->io->io_count); + bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO); + atomic_sub(1 - bios, &ci->io->io_count); + + ci->sector_count = 0; +} + +static blk_status_t __send_zone_reset_all(struct clone_info *ci) +{ + struct dm_table *t = ci->map; + blk_status_t sts = BLK_STS_OK; + + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + + if (ti->zone_reset_all_supported) { + __send_zone_reset_all_native(ci, ti); + continue; + } + + sts = __send_zone_reset_all_emulated(ci, ti); + if (sts != BLK_STS_OK) + break; + } + + /* Release the reference that alloc_io() took for submission. */ + atomic_sub(1, &ci->io->io_count); + + return sts; +} + #else static inline bool dm_zone_bio_needs_split(struct mapped_device *md, struct bio *bio) @@ -1784,6 +1898,10 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio) { return false; } +static blk_status_t __send_zone_reset_all(struct clone_info *ci) +{ + return BLK_STS_NOTSUPP; +} #endif /* @@ -1797,9 +1915,14 @@ static void dm_split_and_process_bio(struct mapped_device *md, blk_status_t error = BLK_STS_OK; bool is_abnormal, need_split; - need_split = is_abnormal = is_abnormal_io(bio); - if (static_branch_unlikely(&zoned_enabled)) - need_split = is_abnormal || dm_zone_bio_needs_split(md, bio); + is_abnormal = is_abnormal_io(bio); + if (static_branch_unlikely(&zoned_enabled)) { + /* Special case REQ_OP_ZONE_RESET_ALL as it cannot be split. */ + need_split = (bio_op(bio) != REQ_OP_ZONE_RESET_ALL) && + (is_abnormal || dm_zone_bio_needs_split(md, bio)); + } else { + need_split = is_abnormal; + } if (unlikely(need_split)) { /* @@ -1840,6 +1963,12 @@ static void dm_split_and_process_bio(struct mapped_device *md, goto out; } + if (static_branch_unlikely(&zoned_enabled) && + (bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) { + error = __send_zone_reset_all(&ci); + goto out; + } + error = __split_and_process_bio(&ci); if (error || !ci.sector_count) goto out; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index c984ecb64b1e..cc466ad5cb1d 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -110,6 +110,9 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); bool dm_is_zone_write(struct mapped_device *md, struct bio *bio); int dm_zone_map_bio(struct dm_target_io *io); +int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t, + sector_t sector, unsigned int nr_zones, + unsigned long *need_reset); #else #define dm_blk_report_zones NULL static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 82b2195efaca..15d28164bbbd 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -357,6 +357,13 @@ struct dm_target { */ bool discards_supported:1; + /* + * Automatically set by dm-core if this target supports + * REQ_OP_ZONE_RESET_ALL. Otherwise, this operation will be emulated + * using REQ_OP_ZONE_RESET. Target drivers must not set this manually. + */ + bool zone_reset_all_supported:1; + /* * Set if this target requires that discards be split on * 'max_discard_sectors' boundaries.

[v2,3/5] dm: handle REQ_OP_ZONE_RESET_ALL

Commit Message

Patch