diff mbox

[V2,09/12] scsi: sd_zbc: Limit zone write locking to sequential zones

Message ID 20170907161640.30465-10-damien.lemoal@wdc.com (mailing list archive)
State New, archived
Headers show

Commit Message

Damien Le Moal Sept. 7, 2017, 4:16 p.m. UTC
Zoned block devices have no write constraints for conventional zones
so write locking of conventional zones is not necessary and can hurt
performance. To avoid this, introduce the seq_zones bitmap to indicate
if a zone is a sequential one. Use this information to allow any write
to be issued to conventional zones, locking only sequential zones.

As the zone bitmap allocation for seq_zones is identical to the zones
write lock bitmap, introduce the helper sd_zbc_alloc_zone_bitmap().
Using this helper, wait for the disk capacity and number of zones to
stabilize on the second revalidation pass to allocate and initialize
the bitmaps.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@wdc.com>
---
 drivers/scsi/sd.h     |   1 +
 drivers/scsi/sd_zbc.c | 108 ++++++++++++++++++++++++++++++++++++++++++++------
 drivers/scsi/sd_zbc.h |  12 ++++++
 3 files changed, 108 insertions(+), 13 deletions(-)

Comments

Johannes Thumshirn Sept. 8, 2017, 8:39 a.m. UTC | #1
On Fri, Sep 08, 2017 at 01:16:37AM +0900, Damien Le Moal wrote:
> +/*
> + * Allocate a zone bitmap (one bit per zone).
> + */
> +static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
> +{
> +	struct request_queue *q = sdkp->disk->queue;
> +
> +	return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
> +			    * sizeof(unsigned long),
> +			    GFP_KERNEL, q->node);
> +}

It's a shame we have all this overflow checking kcalloc() and
kmalloc_array() functions but none of them is taking NUMA nodes into
account.

But that has nothing to do with your patch, just a general rant,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Christoph Hellwig Sept. 8, 2017, 9:48 a.m. UTC | #2
On Fri, Sep 08, 2017 at 10:39:25AM +0200, Johannes Thumshirn wrote:
> It's a shame we have all this overflow checking kcalloc() and
> kmalloc_array() functions but none of them is taking NUMA nodes into
> account.

Something that could be trivially fixed..
Johannes Thumshirn Sept. 8, 2017, 9:50 a.m. UTC | #3
On Fri, Sep 08, 2017 at 11:48:09AM +0200, Christoph Hellwig wrote:
> On Fri, Sep 08, 2017 at 10:39:25AM +0200, Johannes Thumshirn wrote:
> > It's a shame we have all this overflow checking kcalloc() and
> > kmalloc_array() functions but none of them is taking NUMA nodes into
> > account.
> 
> Something that could be trivially fixed..

I know I'm on it ;-)
diff mbox

Patch

diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index c9a627e7eebd..3ea82c8cecd5 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -77,6 +77,7 @@  struct scsi_disk {
 	unsigned int	zone_blocks;
 	unsigned int	zone_shift;
 	unsigned long	*zones_wlock;
+	unsigned long	*seq_zones;
 	unsigned int	zones_optimal_open;
 	unsigned int	zones_optimal_nonseq;
 	unsigned int	zones_max_open;
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 8b258254a2bb..bcece82a1d8d 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -252,7 +252,7 @@  int sd_zbc_write_lock_zone(struct scsi_cmnd *cmd)
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	sector_t sector = blk_rq_pos(rq);
 	sector_t zone_sectors = sd_zbc_zone_sectors(sdkp);
-	unsigned int zno = sd_zbc_zone_no(sdkp, sector);
+	unsigned int zno;
 
 	/*
 	 * Note: Checks of the alignment of the write command on
@@ -265,13 +265,15 @@  int sd_zbc_write_lock_zone(struct scsi_cmnd *cmd)
 		return BLKPREP_KILL;
 
 	/*
-	 * Do not issue more than one write at a time per
-	 * zone. This solves write ordering problems due to
-	 * the unlocking of the request queue in the dispatch
-	 * path in the non scsi-mq case.
+	 * There is no write constraint on conventional zones, but do not issue
+	 * more than one write at a time per sequential zone. This avoids write
+	 * ordering problems due to the unlocking of the request queue in the
+	 * dispatch path of the non scsi-mq (legacy) case.
 	 */
-	if (sdkp->zones_wlock &&
-	    test_and_set_bit(zno, sdkp->zones_wlock))
+	zno = sd_zbc_zone_no(sdkp, sector);
+	if (!test_bit(zno, sdkp->seq_zones))
+		return BLKPREP_OK;
+	if (test_and_set_bit(zno, sdkp->zones_wlock))
 		return BLKPREP_DEFER;
 
 	WARN_ON_ONCE(cmd->flags & SCMD_ZONE_WRITE_LOCK);
@@ -289,8 +291,9 @@  void sd_zbc_write_unlock_zone(struct scsi_cmnd *cmd)
 	struct request *rq = cmd->request;
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 
-	if (sdkp->zones_wlock && cmd->flags & SCMD_ZONE_WRITE_LOCK) {
+	if (cmd->flags & SCMD_ZONE_WRITE_LOCK) {
 		unsigned int zno = sd_zbc_zone_no(sdkp, blk_rq_pos(rq));
+
 		WARN_ON_ONCE(!test_bit(zno, sdkp->zones_wlock));
 		cmd->flags &= ~SCMD_ZONE_WRITE_LOCK;
 		clear_bit_unlock(zno, sdkp->zones_wlock);
@@ -507,8 +510,67 @@  static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 	return 0;
 }
 
+/*
+ * Initialize the sequential zone bitmap to allow identifying sequential zones.
+ */
+static int sd_zbc_setup_seq_zones(struct scsi_disk *sdkp)
+{
+	unsigned long *seq_zones;
+	sector_t block = 0;
+	unsigned char *buf;
+	unsigned char *rec;
+	unsigned int buf_len;
+	unsigned int list_length;
+	unsigned int n = 0;
+	int ret = -ENOMEM;
+
+	/* Allocate zone type bitmap */
+	seq_zones = sd_zbc_alloc_zone_bitmap(sdkp);
+	if (!seq_zones)
+		return -ENOMEM;
+
+	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	while (block < sdkp->capacity) {
+
+		ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, block);
+		if (ret)
+			goto out;
+
+		/* Parse reported zone descriptors */
+		list_length = get_unaligned_be32(&buf[0]) + 64;
+		rec = buf + 64;
+		buf_len = min(list_length, SD_ZBC_BUF_SIZE);
+		while (rec < buf + buf_len) {
+			if ((rec[0] & 0x0f) != ZBC_ZONE_TYPE_CONV)
+				set_bit(n, seq_zones);
+			block += get_unaligned_be64(&rec[8]);
+			rec += 64;
+			n++;
+		}
+
+	}
+
+	if (n != sdkp->nr_zones)
+		ret = -EIO;
+
+out:
+	kfree(buf);
+	if (ret) {
+		kfree(seq_zones);
+		return ret;
+	}
+
+	sdkp->seq_zones = seq_zones;
+
+	return 0;
+}
+
 static int sd_zbc_setup(struct scsi_disk *sdkp)
 {
+	int ret;
 
 	/* READ16/WRITE16 is mandatory for ZBC disks */
 	sdkp->device->use_16_for_rw = 1;
@@ -520,17 +582,37 @@  static int sd_zbc_setup(struct scsi_disk *sdkp)
 	sdkp->nr_zones =
 		round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;
 
+	/*
+	 * Wait for the disk capacity to stabilize before
+	 * initializing zone related information.
+	 */
+	if (sdkp->first_scan)
+		return 0;
+
 	if (!sdkp->zones_wlock) {
-		sdkp->zones_wlock = kcalloc(BITS_TO_LONGS(sdkp->nr_zones),
-					    sizeof(unsigned long),
-					    GFP_KERNEL);
+		sdkp->zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
 		if (!sdkp->zones_wlock)
 			return -ENOMEM;
 	}
 
+	if (!sdkp->seq_zones) {
+		ret = sd_zbc_setup_seq_zones(sdkp);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
+static void sd_zbc_cleanup(struct scsi_disk *sdkp)
+{
+	kfree(sdkp->seq_zones);
+	sdkp->seq_zones = NULL;
+
+	kfree(sdkp->zones_wlock);
+	sdkp->zones_wlock = NULL;
+}
+
 int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 {
 	int ret;
@@ -582,14 +664,14 @@  int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 
 err:
 	sdkp->capacity = 0;
+	sd_zbc_cleanup(sdkp);
 
 	return ret;
 }
 
 void sd_zbc_remove(struct scsi_disk *sdkp)
 {
-	kfree(sdkp->zones_wlock);
-	sdkp->zones_wlock = NULL;
+	sd_zbc_cleanup(sdkp);
 }
 
 void sd_zbc_print_zones(struct scsi_disk *sdkp)
diff --git a/drivers/scsi/sd_zbc.h b/drivers/scsi/sd_zbc.h
index b34002f0acbe..2b63ee352fa2 100644
--- a/drivers/scsi/sd_zbc.h
+++ b/drivers/scsi/sd_zbc.h
@@ -32,6 +32,18 @@  static inline unsigned int sd_zbc_zone_no(struct scsi_disk *sdkp,
 	return sectors_to_logical(sdkp->device, sector) >> sdkp->zone_shift;
 }
 
+/*
+ * Allocate a zone bitmap (one bit per zone).
+ */
+static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
+{
+	struct request_queue *q = sdkp->disk->queue;
+
+	return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
+			    * sizeof(unsigned long),
+			    GFP_KERNEL, q->node);
+}
+
 #else /* CONFIG_BLK_DEV_ZONED */
 
 static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,