@@ -901,7 +901,7 @@ static void __blk_release_queue(struct work_struct *work)
blk_exit_queue(q);
- blk_queue_free_zone_bitmaps(q);
+ blk_queue_free_zone_resources(q);
if (queue_is_mq(q))
blk_mq_release(q);
@@ -344,18 +344,65 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
GFP_NOIO, node);
}
-void blk_queue_free_zone_bitmaps(struct request_queue *q)
+static inline unsigned int *blk_alloc_zone_wp_ofst(unsigned int nr_zones)
+{
+ return kvcalloc(nr_zones, sizeof(unsigned int), GFP_NOIO);
+}
+
+void blk_queue_free_zone_resources(struct request_queue *q)
{
kfree(q->conv_zones_bitmap);
q->conv_zones_bitmap = NULL;
kfree(q->seq_zones_wlock);
q->seq_zones_wlock = NULL;
+ kvfree(q->seq_zones_wp_ofst);
+ q->seq_zones_wp_ofst = NULL;
}
+/**
+ * blk_get_zone_wp_ofst - Calculate a zone write pointer offset position
+ * @zone: Target zone
+ * @wp_ofst: Calculated write pointer offset
+ *
+ * Helper function for low-level device drivers to obtain a zone write pointer
+ * position relative to the zone start sector (write pointer offset). The write
+ * pointer offset depends on the zone condition. If the zone has an invalid
+ * condition, -ENODEV is returned.
+ */
+int blk_get_zone_wp_offset(struct blk_zone *zone, unsigned int *wp_ofst)
+{
+ switch (zone->cond) {
+ case BLK_ZONE_COND_EMPTY:
+ *wp_ofst = 0;
+ return 0;
+ case BLK_ZONE_COND_IMP_OPEN:
+ case BLK_ZONE_COND_EXP_OPEN:
+ case BLK_ZONE_COND_CLOSED:
+ *wp_ofst = zone->wp - zone->start;
+ return 0;
+ case BLK_ZONE_COND_FULL:
+ *wp_ofst = zone->len;
+ return 0;
+ case BLK_ZONE_COND_NOT_WP:
+ case BLK_ZONE_COND_OFFLINE:
+ case BLK_ZONE_COND_READONLY:
+ /*
+ * Conventional, offline and read-only zones do not have a valid
+ * write pointer. Use 0 as a dummy value.
+ */
+ *wp_ofst = 0;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+}
+EXPORT_SYMBOL_GPL(blk_get_zone_wp_offset);
+
struct blk_revalidate_zone_args {
struct gendisk *disk;
unsigned long *conv_zones_bitmap;
unsigned long *seq_zones_wlock;
+ unsigned int *seq_zones_wp_ofst;
unsigned int nr_zones;
sector_t zone_sectors;
sector_t sector;
@@ -371,6 +418,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
struct gendisk *disk = args->disk;
struct request_queue *q = disk->queue;
sector_t capacity = get_capacity(disk);
+ int ret;
/*
* All zones must have the same size, with the exception on an eventual
@@ -406,6 +454,13 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
return -ENODEV;
}
+ if (blk_queue_zone_wp_ofst(q) && !args->seq_zones_wp_ofst) {
+ args->seq_zones_wp_ofst =
+ blk_alloc_zone_wp_ofst(args->nr_zones);
+ if (!args->seq_zones_wp_ofst)
+ return -ENOMEM;
+ }
+
/* Check zone type */
switch (zone->type) {
case BLK_ZONE_TYPE_CONVENTIONAL:
@@ -432,6 +487,14 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
return -ENODEV;
}
+ if (args->seq_zones_wp_ofst) {
+ /* Initialize the zone write pointer offset */
+ ret = blk_get_zone_wp_offset(zone,
+ &args->seq_zones_wp_ofst[idx]);
+ if (ret)
+ return ret;
+ }
+
args->sector += zone->len;
return 0;
}
@@ -489,16 +552,18 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
q->nr_zones = args.nr_zones;
swap(q->seq_zones_wlock, args.seq_zones_wlock);
swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
+ swap(q->seq_zones_wp_ofst, args.seq_zones_wp_ofst);
ret = 0;
} else {
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
- blk_queue_free_zone_bitmaps(q);
+ blk_queue_free_zone_resources(q);
}
blk_mq_unfreeze_queue(q);
out:
kfree(args.seq_zones_wlock);
kfree(args.conv_zones_bitmap);
+ kvfree(args.seq_zones_wp_ofst);
return ret;
}
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
@@ -370,9 +370,9 @@ static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
#ifdef CONFIG_BLK_DEV_ZONED
-void blk_queue_free_zone_bitmaps(struct request_queue *q);
+void blk_queue_free_zone_resources(struct request_queue *q);
#else
-static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
+static inline void blk_queue_free_zone_resources(struct request_queue *q) {}
#endif
void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
@@ -363,6 +363,7 @@ extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
sector_t sectors, sector_t nr_sectors,
gfp_t gfp_mask);
extern int blk_revalidate_disk_zones(struct gendisk *disk);
+int blk_get_zone_wp_offset(struct blk_zone *zone, unsigned int *wp_ofst);
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg);
@@ -499,14 +500,17 @@ struct request_queue {
/*
* Zoned block device information for request dispatch control.
* nr_zones is the total number of zones of the device. This is always
- * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones
- * bits which indicates if a zone is conventional (bit set) or
+ * 0 for regular block devices. conv_zones_bitmap is a bitmap of
+ * nr_zones bits which indicates if a zone is conventional (bit set) or
* sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones
* bits which indicates if a zone is write locked, that is, if a write
- * request targeting the zone was dispatched. All three fields are
- * initialized by the low level device driver (e.g. scsi/sd.c).
- * Stacking drivers (device mappers) may or may not initialize
- * these fields.
+ * request targeting the zone was dispatched. seq_zones_wp_ofst is an
+ * array of nr_zones write pointer values relative to the zone start
+ * sector. This is only initialized for LLDs needing zone append write
+ * command emulation with regular write. All fields are initialized by
+ * the blk_revalidate_disk_zones() function when called by the low
+ * level device driver (e.g. scsi/sd.c). Stacking drivers (device
+ * mappers) may or may not initialize these fields.
*
* Reads of this information must be protected with blk_queue_enter() /
* blk_queue_exit(). Modifying this information is only allowed while
@@ -516,6 +520,7 @@ struct request_queue {
unsigned int nr_zones;
unsigned long *conv_zones_bitmap;
unsigned long *seq_zones_wlock;
+ unsigned int *seq_zones_wp_ofst;
#endif /* CONFIG_BLK_DEV_ZONED */
/*
@@ -613,6 +618,7 @@ struct request_queue {
#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */
#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */
#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
+#define QUEUE_FLAG_ZONE_WP_OFST 28 /* queue needs zone wp offsets */
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_SAME_COMP))
@@ -647,6 +653,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#else
#define blk_queue_rq_alloc_time(q) false
#endif
+#define blk_queue_zone_wp_ofst(q) \
+ test_bit(QUEUE_FLAG_ZONE_WP_OFST, &(q)->queue_flags)
#define blk_noretry_request(rq) \
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \