@@ -88,6 +88,14 @@ config BLK_DEV_INTEGRITY
T10/SCSI Data Integrity Field or the T13/ATA External Path
Protection. If in doubt, say N.
+config BLK_DEV_ZONED
+ bool "Zoned block device support"
+ ---help---
+ Block layer zoned block device support. This option enables
+ support for ZAC/ZBC host-managed and host-aware zoned block devices.
+
+ Say yes here if you have a ZAC or ZBC storage device.
+
config BLK_DEV_THROTTLING
bool "Block layer bio throttling support"
depends on BLK_CGROUP=y
@@ -22,4 +22,5 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
+obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
@@ -590,6 +590,8 @@ void blk_cleanup_queue(struct request_queue *q)
blk_mq_free_queue(q);
percpu_ref_exit(&q->q_usage_counter);
+ blk_drop_zones(q);
+
spin_lock_irq(lock);
if (q->queue_lock != &q->__queue_lock)
q->queue_lock = &q->__queue_lock;
@@ -728,6 +730,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
#endif
INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
+ blk_init_zones(q);
+
kobject_init(&q->kobj, &blk_queue_ktype);
mutex_init(&q->sysfs_lock);
new file mode 100644
@@ -0,0 +1,338 @@
+/*
+ * Zoned block device handling
+ *
+ * Copyright (c) 2015, Hannes Reinecke
+ * Copyright (c) 2015, SUSE Linux GmbH
+ *
+ * Copyright (c) 2016, Damien Le Moal
+ * Copyright (c) 2016, Western Digital
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/blkdev.h>
+
+void blk_init_zones(struct request_queue *q)
+{
+ spin_lock_init(&q->zones_lock);
+ q->zones = RB_ROOT;
+}
+
+/**
+ * blk_drop_zones - Empty a zoned device zone tree.
+ * @q: queue of the zoned device to operate on
+ *
+ * Free all zone descriptors added to the queue zone tree.
+ */
+void blk_drop_zones(struct request_queue *q)
+{
+ struct rb_root *root = &q->zones;
+ struct blk_zone *zone, *next;
+
+ rbtree_postorder_for_each_entry_safe(zone, next, root, node)
+ kfree(zone);
+ q->zones = RB_ROOT;
+}
+EXPORT_SYMBOL_GPL(blk_drop_zones);
+
+/**
+ * blk_insert_zone - Add a new zone struct to the queue RB-tree.
+ * @q: queue of the zoned device to operate on
+ * @new_zone: The zone struct to add
+ *
+ * If @new_zone is not already added to the zone tree, add it.
+ * Otherwise, return the existing entry.
+ */
+struct blk_zone *blk_insert_zone(struct request_queue *q,
+ struct blk_zone *new_zone)
+{
+ struct rb_root *root = &q->zones;
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+ struct blk_zone *zone = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->zones_lock, flags);
+
+ /* Figure out where to put new node */
+ while (*new) {
+ zone = container_of(*new, struct blk_zone, node);
+ parent = *new;
+ if (new_zone->start + new_zone->len <= zone->start)
+ new = &((*new)->rb_left);
+ else if (new_zone->start >= zone->start + zone->len)
+ new = &((*new)->rb_right);
+ else
+ /* Return existing zone */
+ break;
+ zone = NULL;
+ }
+
+ if (!zone) {
+ /* No existing zone: add new node and rebalance tree */
+ rb_link_node(&new_zone->node, parent, new);
+ rb_insert_color(&new_zone->node, root);
+ }
+
+ spin_unlock_irqrestore(&q->zones_lock, flags);
+
+ return zone;
+}
+EXPORT_SYMBOL_GPL(blk_insert_zone);
+
+/**
+ * blk_lookup_zone - Search a zone in a zoned device zone tree.
+ * @q: queue of the zoned device tree to search
+ * @sector: A sector within the zone to search for
+ *
+ * Search the zone containing @sector in the zone tree owned
+ * by @q. NULL is returned if the zone is not found. Since this
+ * can be called concurrently with blk_insert_zone during device
+ * initialization, the tree traversal is protected using the
+ * zones_lock of the queue.
+ */
+struct blk_zone *blk_lookup_zone(struct request_queue *q, sector_t sector)
+{
+ struct rb_root *root = &q->zones;
+ struct rb_node *node = root->rb_node;
+ struct blk_zone *zone = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->zones_lock, flags);
+
+ while (node) {
+ zone = container_of(node, struct blk_zone, node);
+ if (sector < zone->start)
+ node = node->rb_left;
+ else if (sector >= zone->start + zone->len)
+ node = node->rb_right;
+ else
+ break;
+ zone = NULL;
+ }
+
+ spin_unlock_irqrestore(&q->zones_lock, flags);
+
+ return zone;
+}
+EXPORT_SYMBOL_GPL(blk_lookup_zone);
+
+/**
+ * Execute a zone operation (REQ_OP_ZONE*)
+ */
+static int blkdev_issue_zone_operation(struct block_device *bdev,
+ unsigned int op,
+ sector_t sector, sector_t nr_sects,
+ gfp_t gfp_mask)
+{
+ struct bio *bio;
+ int ret;
+
+ if (!bdev_zoned(bdev))
+ return -EOPNOTSUPP;
+
+ /*
+ * Make sure bi_size does not overflow because
+ * of some weird very large zone size.
+ */
+ if (nr_sects && (unsigned long long)nr_sects << 9 > UINT_MAX)
+ return -EINVAL;
+
+ bio = bio_alloc(gfp_mask, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_iter.bi_size = nr_sects << 9;
+ bio->bi_vcnt = 0;
+ bio->bi_bdev = bdev;
+ bio_set_op_attrs(bio, op, 0);
+
+ ret = submit_bio_wait(bio);
+
+ bio_put(bio);
+
+ return ret;
+}
+
+/**
+ * blkdev_update_zones - Force an update of a device zone information
+ * @bdev: Target block device
+ *
+ * Force an update of all zones information of @bdev. This call does not
+ * block waiting for the update to complete. On return, all zones are only
+ * marked as "in-update". Waiting on the zone update to complete can be done
+ * on a per zone basis using the function blk_wait_for_zone_update.
+ */
+int blkdev_update_zones(struct block_device *bdev,
+ gfp_t gfp_mask)
+{
+ return blkdev_issue_zone_operation(bdev, REQ_OP_ZONE_REPORT,
+ 0, 0, gfp_mask);
+}
+
+/*
+ * Wait for a zone update to complete.
+ */
+static void __blk_wait_for_zone_update(struct blk_zone *zone)
+{
+ might_sleep();
+ if (test_bit(BLK_ZONE_IN_UPDATE, &zone->flags))
+ wait_on_bit_io(&zone->flags, BLK_ZONE_IN_UPDATE,
+ TASK_UNINTERRUPTIBLE);
+}
+
+/**
+ * blk_wait_for_zone_update - Wait for a zone information update
+ * @zone: The zone to wait for
+ *
+ * This must be called with the zone lock held. If @zone is not
+ * under update, returns immediately. Otherwise, wait for the
+ * update flag to be cleared on completion of the zone information
+ * update by the device driver.
+ */
+void blk_wait_for_zone_update(struct blk_zone *zone)
+{
+ WARN_ON_ONCE(!test_bit(BLK_ZONE_LOCKED, &zone->flags));
+ while (test_bit(BLK_ZONE_IN_UPDATE, &zone->flags)) {
+ blk_unlock_zone(zone);
+ __blk_wait_for_zone_update(zone);
+ blk_lock_zone(zone);
+ }
+}
+
+/**
+ * blkdev_report_zone - Get a zone information
+ * @bdev: Target block device
+ * @sector: A sector of the zone to report
+ * @update: Force an update of the zone information
+ * @gfp_mask: Memory allocation flags (for bio_alloc)
+ *
+ * Get a zone from the zone cache. And return it.
+ * If update is requested, issue a report zone operation
+ * and wait for the zone information to be updated.
+ */
+struct blk_zone *blkdev_report_zone(struct block_device *bdev,
+ sector_t sector,
+ bool update,
+ gfp_t gfp_mask)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct blk_zone *zone;
+ int ret;
+
+ zone = blk_lookup_zone(q, sector);
+ if (!zone)
+ return ERR_PTR(-ENXIO);
+
+ if (update) {
+ ret = blkdev_issue_zone_operation(bdev, REQ_OP_ZONE_REPORT,
+ zone->start, zone->len,
+ gfp_mask);
+ if (ret)
+ return ERR_PTR(ret);
+ __blk_wait_for_zone_update(zone);
+ }
+
+ return zone;
+}
+
+/**
+ * Execute a zone action (open, close, reset or finish).
+ */
+static int blkdev_issue_zone_action(struct block_device *bdev,
+ sector_t sector, unsigned int op,
+ gfp_t gfp_mask)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct blk_zone *zone;
+ sector_t nr_sects;
+ int ret;
+
+ if (!blk_queue_zoned(q))
+ return -EOPNOTSUPP;
+
+ if (sector == ~0ULL) {
+ /* All zones */
+ sector = 0;
+ nr_sects = 0;
+ } else {
+ /* This zone */
+ zone = blk_lookup_zone(q, sector);
+ if (!zone)
+ return -ENXIO;
+ sector = zone->start;
+ nr_sects = zone->len;
+ }
+
+ ret = blkdev_issue_zone_operation(bdev, op, sector,
+ nr_sects, gfp_mask);
+ if (ret == 0 && !nr_sects)
+ blkdev_update_zones(bdev, gfp_mask);
+
+ return ret;
+}
+
+/**
+ * blkdev_reset_zone - Reset a zone write pointer
+ * @bdev: target block device
+ * @sector: A sector of the zone to reset or ~0ul for all zones.
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ * Reset a zone or all zones write pointer.
+ */
+int blkdev_reset_zone(struct block_device *bdev,
+ sector_t sector, gfp_t gfp_mask)
+{
+ return blkdev_issue_zone_action(bdev, sector, REQ_OP_ZONE_RESET,
+ gfp_mask);
+}
+
+/**
+ * blkdev_open_zone - Explicitely open a zone
+ * @bdev: target block device
+ * @sector: A sector of the zone to open or ~0ul for all zones.
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ * Open a zone or all possible zones.
+ */
+int blkdev_open_zone(struct block_device *bdev,
+ sector_t sector, gfp_t gfp_mask)
+{
+ return blkdev_issue_zone_action(bdev, sector, REQ_OP_ZONE_OPEN,
+ gfp_mask);
+}
+
+/**
+ * blkdev_close_zone - Close an open zone
+ * @bdev: target block device
+ * @sector: A sector of the zone to close or ~0ul for all zones.
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ * Close a zone or all open zones.
+ */
+int blkdev_close_zone(struct block_device *bdev,
+ sector_t sector, gfp_t gfp_mask)
+{
+ return blkdev_issue_zone_action(bdev, sector, REQ_OP_ZONE_CLOSE,
+ gfp_mask);
+}
+
+/**
+ * blkdev_finish_zone - Finish a zone (make it full)
+ * @bdev: target block device
+ * @sector: A sector of the zone to close or ~0ul for all zones.
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ * Finish one zone or all possible zones.
+ */
+int blkdev_finish_zone(struct block_device *bdev,
+ sector_t sector, gfp_t gfp_mask)
+{
+ return blkdev_issue_zone_action(bdev, sector, REQ_OP_ZONE_FINISH,
+ gfp_mask);
+}
@@ -24,6 +24,7 @@
#include <linux/rcupdate.h>
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
+#include <linux/bit_spinlock.h>
struct module;
struct scsi_ioctl_command;
@@ -302,6 +303,113 @@ struct queue_limits {
unsigned char zoned;
};
+#ifdef CONFIG_BLK_DEV_ZONED
+
+enum blk_zone_type {
+ BLK_ZONE_TYPE_UNKNOWN,
+ BLK_ZONE_TYPE_CONVENTIONAL,
+ BLK_ZONE_TYPE_SEQWRITE_REQ,
+ BLK_ZONE_TYPE_SEQWRITE_PREF,
+};
+
+enum blk_zone_cond {
+ BLK_ZONE_COND_NO_WP,
+ BLK_ZONE_COND_EMPTY,
+ BLK_ZONE_COND_IMP_OPEN,
+ BLK_ZONE_COND_EXP_OPEN,
+ BLK_ZONE_COND_CLOSED,
+ BLK_ZONE_COND_READONLY = 0xd,
+ BLK_ZONE_COND_FULL,
+ BLK_ZONE_COND_OFFLINE,
+};
+
+enum blk_zone_flags {
+ BLK_ZONE_LOCKED,
+ BLK_ZONE_WRITE_LOCKED,
+ BLK_ZONE_IN_UPDATE,
+};
+
+/**
+ * Zone descriptor. On 64-bits architectures,
+ * this will align on sizeof(long), i.e. 64 B,
+ * and use 64 B.
+ */
+struct blk_zone {
+ struct rb_node node;
+ unsigned long flags;
+ sector_t len;
+ sector_t start;
+ sector_t wp;
+ unsigned int type : 4;
+ unsigned int cond : 4;
+ unsigned int non_seq : 1;
+ unsigned int reset : 1;
+};
+
+#define blk_zone_is_seq_req(z) ((z)->type == BLK_ZONE_TYPE_SEQWRITE_REQ)
+#define blk_zone_is_seq_pref(z) ((z)->type == BLK_ZONE_TYPE_SEQWRITE_PREF)
+#define blk_zone_is_seq(z) (blk_zone_is_seq_req(z) || blk_zone_is_seq_pref(z))
+#define blk_zone_is_conv(z) ((z)->type == BLK_ZONE_TYPE_CONVENTIONAL)
+
+#define blk_zone_is_readonly(z) ((z)->cond == BLK_ZONE_COND_READONLY)
+#define blk_zone_is_offline(z) ((z)->cond == BLK_ZONE_COND_OFFLINE)
+#define blk_zone_is_full(z) ((z)->cond == BLK_ZONE_COND_FULL)
+#define blk_zone_is_empty(z) ((z)->cond == BLK_ZONE_COND_EMPTY)
+#define blk_zone_is_open(z) ((z)->cond == BLK_ZONE_COND_EXP_OPEN)
+
+static inline void blk_lock_zone(struct blk_zone *zone)
+{
+ bit_spin_lock(BLK_ZONE_LOCKED, &zone->flags);
+}
+
+static inline int blk_trylock_zone(struct blk_zone *zone)
+{
+ return bit_spin_trylock(BLK_ZONE_LOCKED, &zone->flags);
+}
+
+static inline void blk_unlock_zone(struct blk_zone *zone)
+{
+ bit_spin_unlock(BLK_ZONE_LOCKED, &zone->flags);
+}
+
+static inline int blk_try_write_lock_zone(struct blk_zone *zone)
+{
+ return !test_and_set_bit(BLK_ZONE_WRITE_LOCKED, &zone->flags);
+}
+
+static inline void blk_write_unlock_zone(struct blk_zone *zone)
+{
+ clear_bit_unlock(BLK_ZONE_WRITE_LOCKED, &zone->flags);
+ smp_mb__after_atomic();
+}
+
+extern void blk_init_zones(struct request_queue *);
+extern void blk_drop_zones(struct request_queue *);
+extern struct blk_zone *blk_insert_zone(struct request_queue *,
+ struct blk_zone *);
+extern struct blk_zone *blk_lookup_zone(struct request_queue *, sector_t);
+
+extern int blkdev_update_zones(struct block_device *, gfp_t);
+extern void blk_wait_for_zone_update(struct blk_zone *);
+#define blk_zone_in_update(z) test_bit(BLK_ZONE_IN_UPDATE, &(z)->flags)
+static inline void blk_clear_zone_update(struct blk_zone *zone)
+{
+ clear_bit_unlock(BLK_ZONE_IN_UPDATE, &zone->flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&zone->flags, BLK_ZONE_IN_UPDATE);
+}
+
+extern struct blk_zone *blkdev_report_zone(struct block_device *,
+ sector_t, bool, gfp_t);
+extern int blkdev_reset_zone(struct block_device *, sector_t, gfp_t);
+extern int blkdev_open_zone(struct block_device *, sector_t, gfp_t);
+extern int blkdev_close_zone(struct block_device *, sector_t, gfp_t);
+extern int blkdev_finish_zone(struct block_device *, sector_t, gfp_t);
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline void blk_init_zones(struct request_queue *q) { };
+static inline void blk_drop_zones(struct request_queue *q) { };
+#endif /* CONFIG_BLK_DEV_ZONED */
+
struct request_queue {
/*
* Together with queue_head for cacheline sharing
@@ -404,6 +512,11 @@ struct request_queue {
unsigned int nr_pending;
#endif
+#ifdef CONFIG_BLK_DEV_ZONED
+ spinlock_t zones_lock;
+ struct rb_root zones;
+#endif
+
/*
* queue settings
*/