diff mbox

[v2,7/7] blk-zoned: implement ioctls

Message ID 1474888483-29762-8-git-send-email-damien.lemoal@hgst.com (mailing list archive)
State New, archived
Headers show

Commit Message

Damien Le Moal Sept. 26, 2016, 11:14 a.m. UTC
From: Shaun Tancheff <shaun.tancheff@seagate.com>

Adds the new BLKREPORTZONE and BLKRESETZONE ioctls for respectively
obtaining the zone configuration of a zoned block device and resetting
the write pointer of sequential zones of a zoned block device.

The BLKREPORTZONE ioctl maps directly to a single call of the function
blkdev_report_zones. The zone information result is passed as an array
of struct blk_zone identical to the structure used internally for
processing the REQ_OP_ZONE_REPORT operation.  The BLKRESETZONE ioctl
maps to a single call of the blkdev_reset_zones function.

Signed-off-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Damien Le Moal <damien.lemoal@hgst.com>
---
 block/blk-zoned.c             |  95 ++++++++++++++++++++++++++++
 block/ioctl.c                 |   4 ++
 include/linux/blkdev.h        |  65 +++++++------------
 include/uapi/linux/Kbuild     |   1 +
 include/uapi/linux/blkzoned.h | 143 ++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/fs.h       |   4 ++
 6 files changed, 270 insertions(+), 42 deletions(-)
 create mode 100644 include/uapi/linux/blkzoned.h

Comments

Christoph Hellwig Sept. 26, 2016, 4:37 p.m. UTC | #1
> +	zones = kzalloc(sizeof(struct blk_zone) * rep.nr_zones,
> +			GFP_KERNEL);
> +	if (!zones)
> +		return -ENOMEM;

This should use kcalloc to get us underflow checking for the user
controlled allocation size.

> +	if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +
> +	if (rep.nr_zones) {
> +		if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
> +				 sizeof(struct blk_zone) * rep.nr_zones))
> +			ret = -EFAULT;
> +	}

We could actually do this with a single big copy_to_user.  Not that
it really matters, though..

> -/*
> - * Zone type.
> - */
> -enum blk_zone_type {
> -	BLK_ZONE_TYPE_UNKNOWN,
> -	BLK_ZONE_TYPE_CONVENTIONAL,
> -	BLK_ZONE_TYPE_SEQWRITE_REQ,
> -	BLK_ZONE_TYPE_SEQWRITE_PREF,
> -};

Please don't move this code around after it was added just two
patches earlier.  I'd say just split adding the new blkzoned.h
uapi header into a patch of it's own and add that before the
core block code.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shaun Tancheff Sept. 26, 2016, 11:12 p.m. UTC | #2
On Mon, Sep 26, 2016 at 11:37 AM, Christoph Hellwig <hch@infradead.org> wrote:
>> +     zones = kzalloc(sizeof(struct blk_zone) * rep.nr_zones,
>> +                     GFP_KERNEL);
>> +     if (!zones)
>> +             return -ENOMEM;
>
> This should use kcalloc to get us underflow checking for the user
> controlled allocation size.

Ah. yes. Will fix that.

>> +     if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
>> +             ret = -EFAULT;
>> +             goto out;
>> +     }
>> +
>> +     if (rep.nr_zones) {
>> +             if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
>> +                              sizeof(struct blk_zone) * rep.nr_zones))
>> +                     ret = -EFAULT;
>> +     }
>
> We could actually do this with a single big copy_to_user.  Not that
> it really matters, though..

Except our source locations are disjoint (stack and kcalloc'd).

>> -/*
>> - * Zone type.
>> - */
>> -enum blk_zone_type {
>> -     BLK_ZONE_TYPE_UNKNOWN,
>> -     BLK_ZONE_TYPE_CONVENTIONAL,
>> -     BLK_ZONE_TYPE_SEQWRITE_REQ,
>> -     BLK_ZONE_TYPE_SEQWRITE_PREF,
>> -};
>
> Please don't move this code around after it was added just two
> patches earlier.  I'd say just split adding the new blkzoned.h
> uapi header into a patch of it's own and add that before the
> core block code.

Sounds good. Will reshuffle the patchset tonight.

Thanks!
Damien Le Moal Sept. 26, 2016, 11:30 p.m. UTC | #3
Christoph,

On 9/27/16 01:37, Christoph Hellwig wrote:
>> -/*
>> - * Zone type.
>> - */
>> -enum blk_zone_type {
>> -	BLK_ZONE_TYPE_UNKNOWN,
>> -	BLK_ZONE_TYPE_CONVENTIONAL,
>> -	BLK_ZONE_TYPE_SEQWRITE_REQ,
>> -	BLK_ZONE_TYPE_SEQWRITE_PREF,
>> -};
> 
> Please don't move this code around after it was added just two
> patches earlier.  I'd say just split adding the new blkzoned.h
> uapi header into a patch of it's own and add that before the
> core block code.

Or we could just simply merge patches 5 and 7... Even more simple.
Would that be OK ? Shaun, any objection ?

Best regards.
Shaun Tancheff Sept. 26, 2016, 11:58 p.m. UTC | #4
No objection here.

On Mon, Sep 26, 2016 at 6:30 PM, Damien Le Moal <damien.lemoal@hgst.com> wrote:
>
> Christoph,
>
> On 9/27/16 01:37, Christoph Hellwig wrote:
>>> -/*
>>> - * Zone type.
>>> - */
>>> -enum blk_zone_type {
>>> -    BLK_ZONE_TYPE_UNKNOWN,
>>> -    BLK_ZONE_TYPE_CONVENTIONAL,
>>> -    BLK_ZONE_TYPE_SEQWRITE_REQ,
>>> -    BLK_ZONE_TYPE_SEQWRITE_PREF,
>>> -};
>>
>> Please don't move this code around after it was added just two
>> patches earlier.  I'd say just split adding the new blkzoned.h
>> uapi header into a patch of it's own and add that before the
>> core block code.
>
> Or we could just simply merge patches 5 and 7... Even more simple.
> Would that be OK ? Shaun, any objection ?
>
> Best regards.
>
> --
> Damien Le Moal, Ph.D.
> Sr. Manager, System Software Group, HGST Research,
> HGST, a Western Digital brand
> Damien.LeMoal@hgst.com
> (+81) 0466-98-3593 (ext. 513593)
> 1 kirihara-cho, Fujisawa,
> Kanagawa, 252-0888 Japan
> www.hgst.com
> Western Digital Corporation (and its subsidiaries) E-mail Confidentiality Notice & Disclaimer:
>
> This e-mail and any files transmitted with it may contain confidential or legally privileged information of WDC and/or its affiliates, and are intended solely for the use of the individual or entity to which they are addressed. If you are not the intended recipient, any disclosure, copying, distribution or any action taken or omitted to be taken in reliance on it, is prohibited. If you have received this e-mail in error, please notify the sender immediately and delete the e-mail in its entirety from your system.
>
Christoph Hellwig Sept. 27, 2016, 6:24 p.m. UTC | #5
On Mon, Sep 26, 2016 at 06:12:24PM -0500, Shaun Tancheff wrote:
> Except our source locations are disjoint (stack and kcalloc'd).

Indeed.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 473cb0a..8c70bd6 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -12,6 +12,7 @@ 
 #include <linux/module.h>
 #include <linux/rbtree.h>
 #include <linux/blkdev.h>
+#include <linux/blkzoned.h>
 
 static inline sector_t blk_zone_start(struct request_queue *q,
 				      sector_t sector)
@@ -238,3 +239,97 @@  int blkdev_reset_zones(struct block_device *bdev,
 
 	return 0;
 }
+
+/**
+ * BLKREPORTZONE ioctl processing.
+ * Called from blkdev_ioctl.
+ */
+int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
+			      unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct request_queue *q;
+	struct blk_zone_report rep;
+	struct blk_zone *zones;
+	int ret;
+
+	if (!argp)
+		return -EINVAL;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	if (!blk_queue_is_zoned(q))
+		return -ENOTTY;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
+		return -EFAULT;
+
+	if (!rep.nr_zones)
+		return -EINVAL;
+
+	zones = kzalloc(sizeof(struct blk_zone) * rep.nr_zones,
+			GFP_KERNEL);
+	if (!zones)
+		return -ENOMEM;
+
+	ret = blkdev_report_zones(bdev, rep.sector,
+				  zones, &rep.nr_zones,
+				  GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (rep.nr_zones) {
+		if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
+				 sizeof(struct blk_zone) * rep.nr_zones))
+			ret = -EFAULT;
+	}
+
+ out:
+	kfree(zones);
+
+	return ret;
+}
+
+/**
+ * BLKRESETZONE ioctl processing.
+ * Called from blkdev_ioctl.
+ */
+int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct request_queue *q;
+	struct blk_zone_range zrange;
+
+	if (!argp)
+		return -EINVAL;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	if (!blk_queue_is_zoned(q))
+		return -ENOTTY;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (!(mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
+		return -EFAULT;
+
+	return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
+				  GFP_KERNEL);
+}
diff --git a/block/ioctl.c b/block/ioctl.c
index ed2397f..448f78a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -513,6 +513,10 @@  int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 				BLKDEV_DISCARD_SECURE);
 	case BLKZEROOUT:
 		return blk_ioctl_zeroout(bdev, mode, arg);
+	case BLKREPORTZONE:
+		return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
+	case BLKRESETZONE:
+		return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
 	case HDIO_GETGEO:
 		return blkdev_getgeo(bdev, argp);
 	case BLKRAGET:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6034f38..0a75285 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@ 
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
+#include <linux/blkzoned.h>
 
 struct module;
 struct scsi_ioctl_command;
@@ -304,48 +305,6 @@  struct queue_limits {
 
 #ifdef CONFIG_BLK_DEV_ZONED
 
-/*
- * Zone type.
- */
-enum blk_zone_type {
-	BLK_ZONE_TYPE_UNKNOWN,
-	BLK_ZONE_TYPE_CONVENTIONAL,
-	BLK_ZONE_TYPE_SEQWRITE_REQ,
-	BLK_ZONE_TYPE_SEQWRITE_PREF,
-};
-
-/*
- * Zone condition.
- */
-enum blk_zone_cond {
-	BLK_ZONE_COND_NO_WP,
-	BLK_ZONE_COND_EMPTY,
-	BLK_ZONE_COND_IMP_OPEN,
-	BLK_ZONE_COND_EXP_OPEN,
-	BLK_ZONE_COND_CLOSED,
-	BLK_ZONE_COND_READONLY = 0xd,
-	BLK_ZONE_COND_FULL,
-	BLK_ZONE_COND_OFFLINE,
-};
-
-/*
- * Zone descriptor for BLKREPORTZONE.
- * start, len and wp use the regulare 512 B sector unit,
- * regardless of the device logical block size. The overall
- * structure size is 64 B to match the ZBC/ZAC defined zone descriptor
- * and allow support for future additional zone information.
- */
-struct blk_zone {
-	u64	start;		/* Zone start sector */
-	u64	len;		/* Zone length in number of sectors */
-	u64	wp;		/* Zone write pointer position */
-	u8	type;		/* Zone type */
-	u8	cond;		/* Zone condition */
-	u8	non_seq;	/* Non-sequential write resources active */
-	u8	reset;		/* Reset write pointer recommended */
-	u8	reserved[36];
-};
-
 struct blk_zone_report_hdr {
 	unsigned int	nr_zones;
 	u8		padding[60];
@@ -356,6 +315,28 @@  extern int blkdev_report_zones(struct block_device *,
 				unsigned int *, gfp_t);
 extern int blkdev_reset_zones(struct block_device *, sector_t,
 				sector_t, gfp_t);
+
+extern int blkdev_report_zones_ioctl(struct block_device *, fmode_t,
+				     unsigned int, unsigned long);
+extern int blkdev_reset_zones_ioctl(struct block_device *, fmode_t,
+				    unsigned int, unsigned long);
+
+#else /* CONFIG_BLK_DEV_ZONED */
+
+static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
+					    fmode_t mode, unsigned int cmd,
+					    unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
+					   fmode_t mode, unsigned int cmd,
+					   unsigned long arg)
+{
+	return -ENOTTY;
+}
+
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 struct request_queue {
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index dd60439..92466a6 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -70,6 +70,7 @@  header-y += bfs_fs.h
 header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
+header-y += blkzoned.h
 header-y += bpf_common.h
 header-y += bpf_perf_event.h
 header-y += bpf.h
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
new file mode 100644
index 0000000..40d1d7b
--- /dev/null
+++ b/include/uapi/linux/blkzoned.h
@@ -0,0 +1,143 @@ 
+/*
+ * Zoned block devices handling.
+ *
+ * Copyright (C) 2015 Seagate Technology PLC
+ *
+ * Written by: Shaun Tancheff <shaun.tancheff@seagate.com>
+ *
+ * Modified by: Damien Le Moal <damien.lemoal@hgst.com>
+ * Copyright (C) 2016 Western Digital
+ *
+ * This file is licensed under  the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#ifndef _UAPI_BLKZONED_H
+#define _UAPI_BLKZONED_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/**
+ * enum blk_zone_type - Types of zones allowed in a zoned device.
+ *
+ * @BLK_ZONE_TYPE_CONVENTIONAL: The zone has no write pointer and can be writen
+ *                              randomly. Zone reset has no effect on the zone.
+ * @BLK_ZONE_TYPE_SEQWRITE_REQ: The zone must be written sequentially
+ * @BLK_ZONE_TYPE_SEQWRITE_PREF: The zone can be written non-sequentially
+ *
+ * Any other value not defined is reserved and must be considered as invalid.
+ */
+enum blk_zone_type {
+	BLK_ZONE_TYPE_CONVENTIONAL	= 0x1,
+	BLK_ZONE_TYPE_SEQWRITE_REQ	= 0x2,
+	BLK_ZONE_TYPE_SEQWRITE_PREF	= 0x3,
+};
+
+/**
+ * enum blk_zone_cond - Condition [state] of a zone in a zoned device.
+ *
+ * @BLK_ZONE_COND_NOT_WP: The zone has no write pointer, it is conventional.
+ * @BLK_ZONE_COND_EMPTY: The zone is empty.
+ * @BLK_ZONE_COND_IMP_OPEN: The zone is open, but not explicitly opened.
+ * @BLK_ZONE_COND_EXP_OPEN: The zones was explicitly opened by an
+ *                          OPEN ZONE command.
+ * @BLK_ZONE_COND_CLOSED: The zone was [explicitly] closed after writing.
+ * @BLK_ZONE_COND_FULL: The zone is marked as full, possibly by a zone
+ *                      FINISH ZONE command.
+ * @BLK_ZONE_COND_READONLY: The zone is read-only.
+ * @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written).
+ *
+ * The Zone Condition state machine in the ZBC/ZAC standards maps the above
+ * deinitions as:
+ *   - ZC1: Empty         | BLK_ZONE_EMPTY
+ *   - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN
+ *   - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN
+ *   - ZC4: Closed        | BLK_ZONE_CLOSED
+ *   - ZC5: Full          | BLK_ZONE_FULL
+ *   - ZC6: Read Only     | BLK_ZONE_READONLY
+ *   - ZC7: Offline       | BLK_ZONE_OFFLINE
+ *
+ * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should
+ * be considered invalid.
+ */
+enum blk_zone_cond {
+	BLK_ZONE_COND_NOT_WP	= 0x0,
+	BLK_ZONE_COND_EMPTY	= 0x1,
+	BLK_ZONE_COND_IMP_OPEN	= 0x2,
+	BLK_ZONE_COND_EXP_OPEN	= 0x3,
+	BLK_ZONE_COND_CLOSED	= 0x4,
+	BLK_ZONE_COND_READONLY	= 0xD,
+	BLK_ZONE_COND_FULL	= 0xE,
+	BLK_ZONE_COND_OFFLINE	= 0xF,
+};
+
+/**
+ * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl.
+ *
+ * @start: Zone start in 512 B sector units
+ * @len: Zone length in 512 B sector units
+ * @wp: Zone write pointer location in 512 B sector units
+ * @type: see enum blk_zone_type for possible values
+ * @cond: see enum blk_zone_cond for possible values
+ * @non_seq: Flag indicating that the zone is using non-sequential resources
+ *           (for host-aware zoned block devices only).
+ * @reset: Flag indicating that a zone reset is recommended.
+ * @reserved: Padding to 64 B to match the ZBC/ZAC defined zone descriptor size.
+ *
+ * start, len and wp use the regular 512 B sector unit, regardless of the
+ * device logical block size. The overall structure size is 64 B to match the
+ * ZBC/ZAC defined zone descriptor and allow support for future additional
+ * zone information.
+ */
+struct blk_zone {
+	__u64	start;		/* Zone start sector */
+	__u64	len;		/* Zone length in number of sectors */
+	__u64	wp;		/* Zone write pointer position */
+	__u8	type;		/* Zone type */
+	__u8	cond;		/* Zone condition */
+	__u8	non_seq;	/* Non-sequential write resources active */
+	__u8	reset;		/* Reset write pointer recommended */
+	__u8	reserved[36];
+};
+
+/**
+ * struct blk_zone_report - BLKREPORTZONE ioctl request/reply
+ *
+ * @sector: starting sector of report
+ * @nr_zones: IN maximum / OUT actual
+ * @reserved: padding to 16 byte alignment
+ * @zones: Space to hold @nr_zones @zones entries on reply.
+ *
+ * The array of at most @nr_zones must follow this structure in memory.
+ */
+struct blk_zone_report {
+	__u64		sector;
+	__u32		nr_zones;
+	__u8		reserved[4];
+	struct blk_zone zones[0];
+} __packed;
+
+/**
+ * struct blk_zone_range - BLKRESETZONE ioctl request
+ * @sector: starting sector of the first zone to issue reset write pointer
+ * @nr_sectors: Total number of sectors of 1 or more zones to reset
+ */
+struct blk_zone_range {
+	__u64		sector;
+	__u64		nr_sectors;
+};
+
+/**
+ * Zoned block device ioctl's:
+ *
+ * @BLKREPORTZONE: Get zone information. Takes a zone report as argument.
+ *                 The zone report will start from the zone containing the
+ *                 sector specified in the report request structure.
+ * @BLKRESETZONE: Reset the write pointer of the zones in the specified
+ *                sector range. The sector range must be zone aligned.
+ */
+#define BLKREPORTZONE	_IOWR(0x12, 130, struct blk_zone_report)
+#define BLKRESETZONE	_IOW(0x12, 131, struct blk_zone_range)
+
+#endif /* _UAPI_BLKZONED_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 3b00f7c..e0fc7f0 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -222,6 +222,10 @@  struct fsxattr {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
+/*
+ * A jump here: 130-131 are reserved for zoned block devices
+ * (see uapi/linux/blkzoned.h)
+ */
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */