@@ -852,6 +852,8 @@ static void __blk_release_queue(struct work_struct *work)
if (q->queue_tags)
__blk_queue_free_tags(q);
+ blk_queue_free_zone_bitmaps(q);
+
if (!q->mq_ops) {
if (q->exit_rq_fn)
q->exit_rq_fn(q, q->fq->flush_rq);
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/rbtree.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include "blk.h"
@@ -359,3 +360,138 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
GFP_KERNEL);
}
+
+static inline unsigned long *blk_alloc_zone_bitmap(int node,
+ unsigned int nr_zones)
+{
+ return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
+ GFP_NOIO, node);
+}
+
+/*
+ * Allocate an array of struct blk_zone to get nr_zones zone information.
+ * The allocated array may be smaller than nr_zones.
+ */
+static struct blk_zone *blk_alloc_zones(int node, unsigned int *nr_zones)
+{
+ size_t size = *nr_zones * sizeof(struct blk_zone);
+ struct page *page;
+ int order;
+
+ for (order = get_order(size); order > 0; order--) {
+ page = alloc_pages_node(node, GFP_NOIO | __GFP_ZERO, order);
+ if (page) {
+ *nr_zones = min_t(unsigned int, *nr_zones,
+ (PAGE_SIZE << order) / sizeof(struct blk_zone));
+ return page_address(page);
+ }
+ }
+
+ return NULL;
+}
+
+void blk_queue_free_zone_bitmaps(struct request_queue *q)
+{
+ kfree(q->seq_zones_bitmap);
+ q->seq_zones_bitmap = NULL;
+ kfree(q->seq_zones_wlock);
+ q->seq_zones_wlock = NULL;
+}
+
+/**
+ * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
+ * @disk: Target disk
+ *
+ * Helper function for low-level device drivers to (re) allocate and initialize
+ * a disk request queue zone bitmaps. This functions should normally be called
+ * within the disk ->revalidate method. For BIO based queues, no zone bitmap
+ * is allocated.
+ */
+int blk_revalidate_disk_zones(struct gendisk *disk)
+{
+ struct request_queue *q = disk->queue;
+ unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
+ unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
+ unsigned int i, rep_nr_zones = 0, z = 0, nrz;
+ struct blk_zone *zones = NULL;
+ sector_t sector = 0;
+ int ret = 0;
+
+ /*
+ * BIO based queues do not use a scheduler so only q->nr_zones
+ * needs to be updated so that the sysfs exposed value is correct.
+ */
+ if (!queue_is_rq_based(q)) {
+ q->nr_zones = nr_zones;
+ return 0;
+ }
+
+ if (!blk_queue_is_zoned(q) || !nr_zones) {
+ nr_zones = 0;
+ goto update;
+ }
+
+ /* Allocate bitmaps */
+ ret = -ENOMEM;
+ seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
+ if (!seq_zones_wlock)
+ goto out;
+ seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
+ if (!seq_zones_bitmap)
+ goto out;
+
+ /* Get zone information and initialize seq_zones_bitmap */
+ rep_nr_zones = nr_zones;
+ zones = blk_alloc_zones(q->node, &rep_nr_zones);
+ if (!zones)
+ goto out;
+
+ while (z < nr_zones) {
+ nrz = min(nr_zones - z, rep_nr_zones);
+ ret = blk_report_zones(disk, sector, zones, &nrz, GFP_NOIO);
+ if (ret)
+ goto out;
+ if (!nrz)
+ break;
+ for (i = 0; i < nrz; i++) {
+ if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
+ set_bit(z, seq_zones_bitmap);
+ z++;
+ }
+ sector += nrz * blk_queue_zone_sectors(q);
+ }
+
+ if (WARN_ON(z != nr_zones)) {
+ ret = -EIO;
+ goto out;
+ }
+
+update:
+ /*
+ * Install the new bitmaps, making sure the queue is stopped and
+ * all I/Os are completed (i.e. a scheduler is not referencing the
+ * bitmaps).
+ */
+ blk_mq_freeze_queue(q);
+ q->nr_zones = nr_zones;
+ swap(q->seq_zones_wlock, seq_zones_wlock);
+ swap(q->seq_zones_bitmap, seq_zones_bitmap);
+ blk_mq_unfreeze_queue(q);
+
+out:
+ free_pages((unsigned long)zones,
+ get_order(rep_nr_zones * sizeof(struct blk_zone)));
+ kfree(seq_zones_wlock);
+ kfree(seq_zones_bitmap);
+
+ if (ret) {
+ pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
+ blk_mq_freeze_queue(q);
+ blk_queue_free_zone_bitmaps(q);
+ blk_mq_unfreeze_queue(q);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
+
@@ -423,4 +423,10 @@ static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
+#ifdef CONFIG_BLK_DEV_ZONED
+void blk_queue_free_zone_bitmaps(struct request_queue *q);
+#else
+static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
+#endif
+
#endif /* BLK_INTERNAL_H */
@@ -1613,6 +1613,13 @@ static int null_gendisk_register(struct nullb *nullb)
disk->queue = nullb->q;
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+ if (nullb->dev->zoned) {
+ int ret = blk_revalidate_disk_zones(disk);
+
+ if (ret != 0)
+ return ret;
+ }
+
add_disk(disk);
return 0;
}
@@ -1937,6 +1937,16 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
*/
if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+
+ /*
+ * For a zoned target, the number of zones should be updated for the
+ * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
+ * target, this is all that is needed. For a request based target, the
+ * queue zone bitmaps must also be updated.
+ * Use blk_revalidate_disk_zones() to handle this.
+ */
+ if (blk_queue_is_zoned(q))
+ blk_revalidate_disk_zones(t->md->disk);
}
unsigned int dm_table_get_num_targets(struct dm_table *t)
@@ -3413,8 +3413,6 @@ static int sd_remove(struct device *dev)
del_gendisk(sdkp->disk);
sd_shutdown(dev);
- sd_zbc_remove(sdkp);
-
free_opal_dev(sdkp->opal_dev);
blk_register_region(devt, SD_MINORS, NULL,
@@ -76,7 +76,6 @@ struct scsi_disk {
#ifdef CONFIG_BLK_DEV_ZONED
u32 nr_zones;
u32 zone_blocks;
- u32 zone_shift;
u32 zones_optimal_open;
u32 zones_optimal_nonseq;
u32 zones_max_open;
@@ -271,7 +270,6 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
#ifdef CONFIG_BLK_DEV_ZONED
extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
-extern void sd_zbc_remove(struct scsi_disk *sdkp);
extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
extern int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
@@ -288,8 +286,6 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
return 0;
}
-static inline void sd_zbc_remove(struct scsi_disk *sdkp) {}
-
static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {}
static inline int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
@@ -425,191 +425,10 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
return ret;
}
-/**
- * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
- * @nr_zones: Number of zones to allocate space for.
- * @numa_node: NUMA node to allocate the memory from.
- */
-static inline unsigned long *
-sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
-{
- return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
- GFP_KERNEL, numa_node);
-}
-
-/**
- * sd_zbc_get_seq_zones - Parse report zones reply to identify sequential zones
- * @sdkp: disk used
- * @buf: report reply buffer
- * @buflen: length of @buf
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
- * @seq_zones_bitmap: bitmap of sequential zones to set
- *
- * Parse reported zone descriptors in @buf to identify sequential zones and
- * set the reported zone bit in @seq_zones_bitmap accordingly.
- * Since read-only and offline zones cannot be written, do not
- * mark them as sequential in the bitmap.
- * Return the LBA after the last zone reported.
- */
-static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
- unsigned int buflen, u32 zone_shift,
- unsigned long *seq_zones_bitmap)
-{
- sector_t lba, next_lba = sdkp->capacity;
- unsigned int buf_len, list_length;
- unsigned char *rec;
- u8 type, cond;
-
- list_length = get_unaligned_be32(&buf[0]) + 64;
- buf_len = min(list_length, buflen);
- rec = buf + 64;
-
- while (rec < buf + buf_len) {
- type = rec[0] & 0x0f;
- cond = (rec[1] >> 4) & 0xf;
- lba = get_unaligned_be64(&rec[16]);
- if (type != ZBC_ZONE_TYPE_CONV &&
- cond != ZBC_ZONE_COND_READONLY &&
- cond != ZBC_ZONE_COND_OFFLINE)
- set_bit(lba >> zone_shift, seq_zones_bitmap);
- next_lba = lba + get_unaligned_be64(&rec[8]);
- rec += 64;
- }
-
- return next_lba;
-}
-
-/**
- * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
- * @sdkp: target disk
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
- * @nr_zones: number of zones to set up a seq zone bitmap for
- *
- * Allocate a zone bitmap and initialize it by identifying sequential zones.
- */
-static unsigned long *
-sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
- u32 nr_zones)
-{
- struct request_queue *q = sdkp->disk->queue;
- unsigned long *seq_zones_bitmap;
- sector_t lba = 0;
- unsigned char *buf;
- int ret = -ENOMEM;
-
- seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
- if (!seq_zones_bitmap)
- return ERR_PTR(-ENOMEM);
-
- buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
- if (!buf)
- goto out;
-
- while (lba < sdkp->capacity) {
- ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, lba,
- true);
- if (ret)
- goto out;
- lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
- zone_shift, seq_zones_bitmap);
- }
-
- if (lba != sdkp->capacity) {
- /* Something went wrong */
- ret = -EIO;
- }
-
-out:
- kfree(buf);
- if (ret) {
- kfree(seq_zones_bitmap);
- return ERR_PTR(ret);
- }
- return seq_zones_bitmap;
-}
-
-static void sd_zbc_cleanup(struct scsi_disk *sdkp)
-{
- struct request_queue *q = sdkp->disk->queue;
-
- kfree(q->seq_zones_bitmap);
- q->seq_zones_bitmap = NULL;
-
- kfree(q->seq_zones_wlock);
- q->seq_zones_wlock = NULL;
-
- q->nr_zones = 0;
-}
-
-static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
-{
- struct request_queue *q = sdkp->disk->queue;
- u32 zone_shift = ilog2(zone_blocks);
- u32 nr_zones;
- int ret;
-
- /* chunk_sectors indicates the zone size */
- blk_queue_chunk_sectors(q,
- logical_to_sectors(sdkp->device, zone_blocks));
- nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
-
- /*
- * Initialize the device request queue information if the number
- * of zones changed.
- */
- if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
- unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
- size_t zone_bitmap_size;
-
- if (nr_zones) {
- seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
- q->node);
- if (!seq_zones_wlock) {
- ret = -ENOMEM;
- goto err;
- }
-
- seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
- zone_shift, nr_zones);
- if (IS_ERR(seq_zones_bitmap)) {
- ret = PTR_ERR(seq_zones_bitmap);
- kfree(seq_zones_wlock);
- goto err;
- }
- }
- zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
- sizeof(unsigned long);
- blk_mq_freeze_queue(q);
- if (q->nr_zones != nr_zones) {
- /* READ16/WRITE16 is mandatory for ZBC disks */
- sdkp->device->use_16_for_rw = 1;
- sdkp->device->use_10_for_rw = 0;
-
- sdkp->zone_blocks = zone_blocks;
- sdkp->zone_shift = zone_shift;
- sdkp->nr_zones = nr_zones;
- q->nr_zones = nr_zones;
- swap(q->seq_zones_wlock, seq_zones_wlock);
- swap(q->seq_zones_bitmap, seq_zones_bitmap);
- } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
- zone_bitmap_size) != 0) {
- memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
- zone_bitmap_size);
- }
- blk_mq_unfreeze_queue(q);
- kfree(seq_zones_wlock);
- kfree(seq_zones_bitmap);
- }
-
- return 0;
-
-err:
- sd_zbc_cleanup(sdkp);
- return ret;
-}
-
int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
{
+ struct gendisk *disk = sdkp->disk;
+ unsigned int nr_zones;
u32 zone_blocks;
int ret;
@@ -634,24 +453,39 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
goto err;
/* The drive satisfies the kernel restrictions: set it up */
- ret = sd_zbc_setup(sdkp, zone_blocks);
- if (ret)
- goto err;
+ blk_queue_chunk_sectors(sdkp->disk->queue,
+ logical_to_sectors(sdkp->device, zone_blocks));
+ nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
+
+ /* READ16/WRITE16 is mandatory for ZBC disks */
+ sdkp->device->use_16_for_rw = 1;
+ sdkp->device->use_10_for_rw = 0;
+
+ /*
+ * If something changed, revalidate the disk zone bitmaps once we have
+ * the capacity, that is on the second revalidate execution during disk
+ * scan and always during normal revalidate.
+ */
+ if (sdkp->first_scan)
+ return 0;
+ if (sdkp->zone_blocks != zone_blocks ||
+ sdkp->nr_zones != nr_zones ||
+ disk->queue->nr_zones != nr_zones) {
+ ret = blk_revalidate_disk_zones(disk);
+ if (ret != 0)
+ goto err;
+ sdkp->zone_blocks = zone_blocks;
+ sdkp->nr_zones = nr_zones;
+ }
return 0;
err:
sdkp->capacity = 0;
- sd_zbc_cleanup(sdkp);
return ret;
}
-void sd_zbc_remove(struct scsi_disk *sdkp)
-{
- sd_zbc_cleanup(sdkp);
-}
-
void sd_zbc_print_zones(struct scsi_disk *sdkp)
{
if (!sd_is_zoned(sdkp) || !sdkp->capacity)
@@ -402,6 +402,7 @@ extern int blkdev_report_zones(struct block_device *bdev,
unsigned int *nr_zones, gfp_t gfp_mask);
extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
sector_t nr_sectors, gfp_t gfp_mask);
+extern int blk_revalidate_disk_zones(struct gendisk *disk);
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg);
@@ -414,6 +415,12 @@ static inline unsigned int blkdev_nr_zones(struct block_device *bdev)
{
return 0;
}
+
+static inline int blk_revalidate_disk_zones(struct gendisk *disk)
+{
+ return 0;
+}
+
static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
fmode_t mode, unsigned int cmd,
unsigned long arg)