diff mbox series

[v2,08/28] block: Use a mempool to allocate zone write plugs

Message ID 20240325044452.3125418-9-dlemoal@kernel.org (mailing list archive)
State New, archived
Headers show
Series Zone write plugging | expand

Commit Message

Damien Le Moal March 25, 2024, 4:44 a.m. UTC
Allocating zone write plugs using a struct kmem_cache does not guarantee
that enough write plugs can be allocated to simultaneously write up to
the maximum number of active zones of a zoned block device.

Avoid any issue with memory allocation by using a mempool with a size
equal to the disk maximum number of open zones or maximum number of
active zones, whichever is larger. For zoned devices that do not have
open or active zone limits, the default 128 is used as the mempool size.
If a change to the zone limits is detected, the mempool is resized in
blk_revalidate_disk_zones().

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 block/blk-zoned.c      | 62 ++++++++++++++++++++++++++++++------------
 include/linux/blkdev.h |  3 ++
 2 files changed, 47 insertions(+), 18 deletions(-)

Comments

Hannes Reinecke March 27, 2024, 7:19 a.m. UTC | #1
On 3/25/24 05:44, Damien Le Moal wrote:
> Allocating zone write plugs using a struct kmem_cache does not guarantee
> that enough write plugs can be allocated to simultaneously write up to
> the maximum number of active zones of a zoned block device.
> 
> Avoid any issue with memory allocation by using a mempool with a size
> equal to the disk maximum number of open zones or maximum number of
> active zones, whichever is larger. For zoned devices that do not have
> open or active zone limits, the default 128 is used as the mempool size.
> If a change to the zone limits is detected, the mempool is resized in
> blk_revalidate_disk_zones().
> 
> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
> ---
>   block/blk-zoned.c      | 62 ++++++++++++++++++++++++++++++------------
>   include/linux/blkdev.h |  3 ++
>   2 files changed, 47 insertions(+), 18 deletions(-)
> 
Why isn't this part of the previous patch?
But that shouldn't hold off the patchset, so:

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
Damien Le Moal March 27, 2024, 7:22 a.m. UTC | #2
On 3/27/24 16:19, Hannes Reinecke wrote:
> On 3/25/24 05:44, Damien Le Moal wrote:
>> Allocating zone write plugs using a struct kmem_cache does not guarantee
>> that enough write plugs can be allocated to simultaneously write up to
>> the maximum number of active zones of a zoned block device.
>>
>> Avoid any issue with memory allocation by using a mempool with a size
>> equal to the disk maximum number of open zones or maximum number of
>> active zones, whichever is larger. For zoned devices that do not have
>> open or active zone limits, the default 128 is used as the mempool size.
>> If a change to the zone limits is detected, the mempool is resized in
>> blk_revalidate_disk_zones().
>>
>> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
>> ---
>>   block/blk-zoned.c      | 62 ++++++++++++++++++++++++++++++------------
>>   include/linux/blkdev.h |  3 ++
>>   2 files changed, 47 insertions(+), 18 deletions(-)
>>
> Why isn't this part of the previous patch?

I tried to reduce the patch size...

> But that shouldn't hold off the patchset, so:
> 
> Reviewed-by: Hannes Reinecke <hare@suse.de>

Thanks. But I am about to send V3 and I removed the mempool and replaced it with
a simple free list...

> 
> Cheers,
> 
> Hannes
diff mbox series

Patch

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 39e66d64ea55..4e93293b1233 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -568,13 +568,14 @@  static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
 	return zwplug;
 }
 
-static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
+static inline void disk_put_zone_wplug(struct gendisk *disk,
+				       struct blk_zone_wplug *zwplug)
 {
 	if (atomic_dec_and_test(&zwplug->ref)) {
 		WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
 		WARN_ON_ONCE(!list_empty(&zwplug->err));
 
-		kmem_cache_free(blk_zone_wplugs_cachep, zwplug);
+		mempool_free(zwplug, disk->zone_wplugs_pool);
 	}
 }
 
@@ -599,14 +600,14 @@  static struct blk_zone_wplug *disk_get_zone_wplug_locked(struct gendisk *disk,
 		spin_lock_irqsave(&zwplug->lock, *flags);
 		if (zwplug->flags & BLK_ZONE_WPLUG_FREEING) {
 			spin_unlock_irqrestore(&zwplug->lock, *flags);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 			goto again;
 		}
 		return zwplug;
 	}
 
 	/* Allocate and insert a new zone write plug. */
-	zwplug = kmem_cache_alloc(blk_zone_wplugs_cachep, gfp_mask);
+	zwplug = mempool_alloc(disk->zone_wplugs_pool, gfp_mask);
 	if (!zwplug)
 		return NULL;
 
@@ -629,7 +630,7 @@  static struct blk_zone_wplug *disk_get_zone_wplug_locked(struct gendisk *disk,
 	 */
 	if (!disk_insert_zone_wplug(disk, zwplug)) {
 		spin_unlock_irqrestore(&zwplug->lock, *flags);
-		kmem_cache_free(blk_zone_wplugs_cachep, zwplug);
+		mempool_free(zwplug, disk->zone_wplugs_pool);
 		goto again;
 	}
 
@@ -659,13 +660,14 @@  static inline void blk_zone_wplug_bio_io_error(struct bio *bio)
 	blk_queue_exit(q);
 }
 
-static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
+static void disk_zone_wplug_abort(struct gendisk *disk,
+				  struct blk_zone_wplug *zwplug)
 {
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(&zwplug->bio_list))) {
 		blk_zone_wplug_bio_io_error(bio);
-		disk_put_zone_wplug(zwplug);
+		disk_put_zone_wplug(disk, zwplug);
 	}
 }
 
@@ -681,7 +683,7 @@  static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
 		if (wp_offset >= zone_capacity ||
 		     bio_offset_from_zone_start(bio) != wp_offset) {
 			blk_zone_wplug_bio_io_error(bio);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 			continue;
 		}
 
@@ -718,7 +720,7 @@  static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
 
 	/* Update the zone write pointer and abort all plugged BIOs. */
 	zwplug->wp_offset = wp_offset;
-	disk_zone_wplug_abort(zwplug);
+	disk_zone_wplug_abort(disk, zwplug);
 
 	/*
 	 * Updating the write pointer offset puts back the zone
@@ -765,7 +767,7 @@  static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio,
 	zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
 	if (zwplug) {
 		disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset);
-		disk_put_zone_wplug(zwplug);
+		disk_put_zone_wplug(disk, zwplug);
 	}
 
 	return false;
@@ -787,7 +789,7 @@  static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
 		zwplug = disk_get_zone_wplug(disk, sector);
 		if (zwplug) {
 			disk_zone_wplug_set_wp_offset(disk, zwplug, 0);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 		}
 	}
 
@@ -1158,7 +1160,7 @@  void blk_zone_write_plug_bio_endio(struct bio *bio)
 	if (bio->bi_bdev->bd_has_submit_bio)
 		disk_zone_wplug_unplug_bio(disk, zwplug);
 
-	disk_put_zone_wplug(zwplug);
+	disk_put_zone_wplug(disk, zwplug);
 }
 
 void blk_zone_write_plug_complete_request(struct request *req)
@@ -1171,7 +1173,7 @@  void blk_zone_write_plug_complete_request(struct request *req)
 
 	disk_zone_wplug_unplug_bio(disk, zwplug);
 
-	disk_put_zone_wplug(zwplug);
+	disk_put_zone_wplug(disk, zwplug);
 }
 
 static void blk_zone_wplug_bio_work(struct work_struct *work)
@@ -1284,7 +1286,7 @@  static void disk_zone_wplug_handle_error(struct gendisk *disk,
 		 * plugged BIOs as otherwise we could endup waiting forever on
 		 * plugged BIOs to complete if there is a queue freeze on-going.
 		 */
-		disk_zone_wplug_abort(zwplug);
+		disk_zone_wplug_abort(disk, zwplug);
 		goto unplug;
 	}
 
@@ -1325,7 +1327,7 @@  static void disk_zone_wplugs_work(struct work_struct *work)
 		spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
 
 		disk_zone_wplug_handle_error(disk, zwplug);
-		disk_put_zone_wplug(zwplug);
+		disk_put_zone_wplug(disk, zwplug);
 
 		spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
 	}
@@ -1353,7 +1355,7 @@  static void disk_free_zone_wplugs(struct gendisk *disk)
 					     struct blk_zone_wplug, node);
 			blk_get_zone_wplug(zwplug);
 			disk_remove_zone_wplug(disk, zwplug);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 		}
 	}
 }
@@ -1369,7 +1371,7 @@  void disk_init_zone_resources(struct gendisk *disk)
  * For the size of a disk zone write plug hash table, use the disk maximum
  * open zones and maximum active zones limits, but do not exceed 4KB (512 hlist
  * head entries), that is, 9 bits. For a disk that has no limits, default to
- * 128 zones to hash.
+ * 128 zones for the mempool size and the hash size.
  */
 #define BLK_ZONE_MAX_WPLUG_HASH_BITS		9
 #define BLK_ZONE_DEFAULT_WPLUG_HASH_SIZE	128
@@ -1391,6 +1393,17 @@  static int disk_alloc_zone_resources(struct gendisk *disk,
 	for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++)
 		INIT_HLIST_HEAD(&disk->zone_wplugs_hash[i]);
 
+	disk->zone_wplugs_pool =
+		mempool_create_slab_pool(hash_size, blk_zone_wplugs_cachep);
+	if (!disk->zone_wplugs_pool) {
+		kfree(disk->zone_wplugs_hash);
+		disk->zone_wplugs_hash = NULL;
+		disk->zone_wplugs_hash_bits = 0;
+		return -ENOMEM;
+	}
+
+	disk->zone_wplugs_pool_size = hash_size;
+
 	return 0;
 }
 
@@ -1404,6 +1417,10 @@  void disk_free_zone_resources(struct gendisk *disk)
 	disk->zone_wplugs_hash = NULL;
 	disk->zone_wplugs_hash_bits = 0;
 
+	mempool_destroy(disk->zone_wplugs_pool);
+	disk->zone_wplugs_pool = NULL;
+	disk->zone_wplugs_pool_size = 0;
+
 	kfree(disk->conv_zones_bitmap);
 	disk->conv_zones_bitmap = NULL;
 	kfree(disk->seq_zones_wlock);
@@ -1418,6 +1435,7 @@  static int disk_revalidate_zone_resources(struct gendisk *disk,
 {
 	struct queue_limits *lim = &disk->queue->limits;
 	unsigned int hash_size;
+	int ret;
 
 	hash_size = max(lim->max_open_zones, lim->max_active_zones);
 	if (!hash_size)
@@ -1427,6 +1445,14 @@  static int disk_revalidate_zone_resources(struct gendisk *disk,
 	if (!disk->zone_wplugs_hash)
 		return disk_alloc_zone_resources(disk, hash_size);
 
+	/* Resize the memory pool if needed. */
+	if (disk->zone_wplugs_pool_size != hash_size) {
+		ret = mempool_resize(disk->zone_wplugs_pool, hash_size);
+		if (ret)
+			return ret;
+		disk->zone_wplugs_pool_size = hash_size;
+	}
+
 	return 0;
 }
 
@@ -1526,7 +1552,7 @@  static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 			if (!zwplug)
 				return -ENOMEM;
 			spin_unlock_irqrestore(&zwplug->lock, flags);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 		}
 
 		break;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e9b670be338b..68c60039a7ea 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -25,6 +25,7 @@ 
 #include <linux/uuid.h>
 #include <linux/xarray.h>
 #include <linux/file.h>
+#include <linux/mempool.h>
 
 struct module;
 struct request_queue;
@@ -194,6 +195,8 @@  struct gendisk {
 	unsigned int		zone_capacity;
 	unsigned long		*conv_zones_bitmap;
 	unsigned long		*seq_zones_wlock;
+	unsigned int		zone_wplugs_pool_size;
+	mempool_t		*zone_wplugs_pool;
 	unsigned int            zone_wplugs_hash_bits;
 	spinlock_t              zone_wplugs_lock;
 	struct hlist_head       *zone_wplugs_hash;