Message ID | 20190621130711.21986-2-mb@lightnvm.io (mailing list archive) |
---|---|
State | Not Applicable, archived |
Delegated to: | Mike Snitzer |
Headers | show |
Series | open, close, finish zone support | expand |
On 19-06-21 15:07:08, Matias Bjørling wrote: > @@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev, > if (!blk_queue_is_zoned(q)) > return -EOPNOTSUPP; > > + if (!op_is_zone_mgmt_op(op)) > + return -EOPNOTSUPP; > + nitpick: -EINVAL looks better to return as Damien pointed out. Otherwise it looks good to me: Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com> -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On 6/22/19 2:51 AM, Damien Le Moal wrote: > Matias, > > Some comments inline below. > > On 2019/06/21 22:07, Matias Bjørling wrote: >> From: Ajay Joshi <ajay.joshi@wdc.com> >> >> Zoned block devices allows one to control zone transitions by using >> explicit commands. The available transitions are: >> >> * Open zone: Transition a zone to open state. >> * Close zone: Transition a zone to closed state. >> * Finish zone: Transition a zone to full state. >> >> Allow kernel to issue these transitions by introducing >> blkdev_zones_mgmt_op() and add three new request opcodes: >> >> * REQ_IO_ZONE_OPEN, REQ_IO_ZONE_CLOSE, and REQ_OP_ZONE_FINISH >> >> Allow user-space to issue the transitions through the following ioctls: >> >> * BLKOPENZONE, BLKCLOSEZONE, and BLKFINISHZONE. >> >> Signed-off-by: Ajay Joshi <ajay.joshi@wdc.com> >> Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com> >> Signed-off-by: Matias Bjørling <matias.bjorling@wdc.com> >> --- >> block/blk-core.c | 3 ++ >> block/blk-zoned.c | 51 ++++++++++++++++++++++--------- >> block/ioctl.c | 5 ++- >> include/linux/blk_types.h | 27 +++++++++++++++-- >> include/linux/blkdev.h | 57 ++++++++++++++++++++++++++++++----- >> include/uapi/linux/blkzoned.h | 17 +++++++++-- >> 6 files changed, 133 insertions(+), 27 deletions(-) >> >> diff --git a/block/blk-core.c b/block/blk-core.c >> index 8340f69670d8..c0f0dbad548d 100644 >> --- a/block/blk-core.c >> +++ b/block/blk-core.c >> @@ -897,6 +897,9 @@ generic_make_request_checks(struct bio *bio) >> goto not_supported; >> break; >> case REQ_OP_ZONE_RESET: >> + case REQ_OP_ZONE_OPEN: >> + case REQ_OP_ZONE_CLOSE: >> + case REQ_OP_ZONE_FINISH: >> if (!blk_queue_is_zoned(q)) >> goto not_supported; >> break; >> diff --git a/block/blk-zoned.c b/block/blk-zoned.c >> index ae7e91bd0618..d0c933593b93 100644 >> --- a/block/blk-zoned.c >> +++ b/block/blk-zoned.c >> @@ -201,20 +201,22 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, >> EXPORT_SYMBOL_GPL(blkdev_report_zones); >> >> /** >> - * blkdev_reset_zones - Reset zones write pointer >> + * blkdev_zones_mgmt_op - Perform the specified operation on the zone(s) >> * @bdev: Target block device >> - * @sector: Start sector of the first zone to reset >> - * @nr_sectors: Number of sectors, at least the length of one zone >> + * @op: Operation to be performed on the zone(s) >> + * @sector: Start sector of the first zone to operate on >> + * @nr_sectors: Number of sectors, at least the length of one zone and >> + * must be zone size aligned. >> * @gfp_mask: Memory allocation flags (for bio_alloc) >> * >> * Description: >> - * Reset the write pointer of the zones contained in the range >> + * Perform the specified operation contained in the range > Perform the specified operation over the sector range >> * @sector..@sector+@nr_sectors. Specifying the entire disk sector range >> * is valid, but the specified range should not contain conventional zones. >> */ >> -int blkdev_reset_zones(struct block_device *bdev, >> - sector_t sector, sector_t nr_sectors, >> - gfp_t gfp_mask) >> +int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op, >> + sector_t sector, sector_t nr_sectors, >> + gfp_t gfp_mask) >> { >> struct request_queue *q = bdev_get_queue(bdev); >> sector_t zone_sectors; >> @@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev, >> if (!blk_queue_is_zoned(q)) >> return -EOPNOTSUPP; >> >> + if (!op_is_zone_mgmt_op(op)) >> + return -EOPNOTSUPP; > > EINVAL may be better here. > >> + >> if (bdev_read_only(bdev)) >> return -EPERM; >> >> @@ -248,7 +253,7 @@ int blkdev_reset_zones(struct block_device *bdev, >> bio = blk_next_bio(bio, 0, gfp_mask); >> bio->bi_iter.bi_sector = sector; >> bio_set_dev(bio, bdev); >> - bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0); >> + bio_set_op_attrs(bio, op, 0); >> >> sector += zone_sectors; >> >> @@ -264,7 +269,7 @@ int blkdev_reset_zones(struct block_device *bdev, >> >> return ret; >> } >> -EXPORT_SYMBOL_GPL(blkdev_reset_zones); >> +EXPORT_SYMBOL_GPL(blkdev_zones_mgmt_op); >> >> /* >> * BLKREPORTZONE ioctl processing. >> @@ -329,15 +334,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, >> } >> >> /* >> - * BLKRESETZONE ioctl processing. >> + * Zone operation (open, close, finish or reset) ioctl processing. >> * Called from blkdev_ioctl. >> */ >> -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, >> - unsigned int cmd, unsigned long arg) >> +int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode, >> + unsigned int cmd, unsigned long arg) >> { >> void __user *argp = (void __user *)arg; >> struct request_queue *q; >> struct blk_zone_range zrange; >> + enum req_opf op; >> >> if (!argp) >> return -EINVAL; >> @@ -358,8 +364,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, >> if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) >> return -EFAULT; >> >> - return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors, >> - GFP_KERNEL); >> + switch (cmd) { >> + case BLKRESETZONE: >> + op = REQ_OP_ZONE_RESET; >> + break; >> + case BLKOPENZONE: >> + op = REQ_OP_ZONE_OPEN; >> + break; >> + case BLKCLOSEZONE: >> + op = REQ_OP_ZONE_CLOSE; >> + break; >> + case BLKFINISHZONE: >> + op = REQ_OP_ZONE_FINISH; >> + break; >> + default: >> + return -ENOTTY; >> + } >> + >> + return blkdev_zones_mgmt_op(bdev, op, zrange.sector, zrange.nr_sectors, >> + GFP_KERNEL); >> } >> >> static inline unsigned long *blk_alloc_zone_bitmap(int node, >> diff --git a/block/ioctl.c b/block/ioctl.c >> index 15a0eb80ada9..df7fe54db158 100644 >> --- a/block/ioctl.c >> +++ b/block/ioctl.c >> @@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, >> case BLKREPORTZONE: >> return blkdev_report_zones_ioctl(bdev, mode, cmd, arg); >> case BLKRESETZONE: >> - return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg); >> + case BLKOPENZONE: >> + case BLKCLOSEZONE: >> + case BLKFINISHZONE: >> + return blkdev_zones_mgmt_op_ioctl(bdev, mode, cmd, arg); >> case BLKGETZONESZ: >> return put_uint(arg, bdev_zone_sectors(bdev)); >> case BLKGETNRZONES: >> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h >> index 95202f80676c..067ef9242275 100644 >> --- a/include/linux/blk_types.h >> +++ b/include/linux/blk_types.h >> @@ -284,13 +284,20 @@ enum req_opf { >> REQ_OP_DISCARD = 3, >> /* securely erase sectors */ >> REQ_OP_SECURE_ERASE = 5, >> - /* reset a zone write pointer */ >> - REQ_OP_ZONE_RESET = 6, >> /* write the same sector many times */ >> REQ_OP_WRITE_SAME = 7, >> /* write the zero filled sector many times */ >> REQ_OP_WRITE_ZEROES = 9, >> >> + /* reset a zone write pointer */ >> + REQ_OP_ZONE_RESET = 16, >> + /* Open zone(s) */ >> + REQ_OP_ZONE_OPEN = 17, >> + /* Close zone(s) */ >> + REQ_OP_ZONE_CLOSE = 18, >> + /* Finish zone(s) */ >> + REQ_OP_ZONE_FINISH = 19, >> + >> /* SCSI passthrough using struct scsi_request */ >> REQ_OP_SCSI_IN = 32, >> REQ_OP_SCSI_OUT = 33, >> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, >> bio->bi_opf = op | op_flags; >> } >> >> +/* >> + * Check if the op is zoned operation. > Check if op is a zone management operation. >> + */ >> +static inline bool op_is_zone_mgmt_op(enum req_opf op) > > Similarly to "op_is_write" pattern, "op_is_zone_mgmt" may be a better name. > >> +{ >> + switch (op) { >> + case REQ_OP_ZONE_RESET: >> + case REQ_OP_ZONE_OPEN: >> + case REQ_OP_ZONE_CLOSE: >> + case REQ_OP_ZONE_FINISH: >> + return true; >> + default: >> + return false; >> + } >> +} >> + >> static inline bool op_is_write(unsigned int op) >> { >> return (op & 1); >> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h >> index 592669bcc536..943084f9dc9c 100644 >> --- a/include/linux/blkdev.h >> +++ b/include/linux/blkdev.h >> @@ -348,14 +348,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev); >> extern int blkdev_report_zones(struct block_device *bdev, >> sector_t sector, struct blk_zone *zones, >> unsigned int *nr_zones, gfp_t gfp_mask); >> -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors, >> - sector_t nr_sectors, gfp_t gfp_mask); >> extern int blk_revalidate_disk_zones(struct gendisk *disk); >> >> extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, >> unsigned int cmd, unsigned long arg); >> -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, >> - unsigned int cmd, unsigned long arg); >> +extern int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode, >> + unsigned int cmd, unsigned long arg); >> +extern int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op, >> + sector_t sector, sector_t nr_sectors, >> + gfp_t gfp_mask); > > To keep the grouping of declarations, may be declare this one where > blkdev_reset_zones() was ? > >> >> #else /* CONFIG_BLK_DEV_ZONED */ >> >> @@ -376,15 +377,57 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev, >> return -ENOTTY; >> } >> >> -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev, >> - fmode_t mode, unsigned int cmd, >> - unsigned long arg) >> +static inline int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, >> + fmode_t mode, unsigned int cmd, >> + unsigned long arg) >> +{ >> + return -ENOTTY; >> +} >> + >> +static inline int blkdev_zones_mgmt_op(struct block_device *bdev, >> + enum req_opf op, >> + sector_t sector, sector_t nr_sectors, >> + gfp_t gfp_mask) >> { >> return -ENOTTY; > > That should be -ENOTSUPP. This is not an ioctl. The ioctl call is above this one. > >> } >> >> #endif /* CONFIG_BLK_DEV_ZONED */ >> >> +static inline int blkdev_reset_zones(struct block_device *bdev, >> + sector_t sector, sector_t nr_sectors, >> + gfp_t gfp_mask) >> +{ >> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_RESET, >> + sector, nr_sectors, gfp_mask); >> +} >> + >> +static inline int blkdev_open_zones(struct block_device *bdev, >> + sector_t sector, sector_t nr_sectors, >> + gfp_t gfp_mask) >> +{ >> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_OPEN, >> + sector, nr_sectors, gfp_mask); >> +} >> + >> +static inline int blkdev_close_zones(struct block_device *bdev, >> + sector_t sector, sector_t nr_sectors, >> + gfp_t gfp_mask) >> +{ >> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_CLOSE, >> + sector, nr_sectors, >> + gfp_mask); >> +} >> + >> +static inline int blkdev_finish_zones(struct block_device *bdev, >> + sector_t sector, sector_t nr_sectors, >> + gfp_t gfp_mask) >> +{ >> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_FINISH, >> + sector, nr_sectors, >> + gfp_mask); >> +} >> + >> struct request_queue { >> /* >> * Together with queue_head for cacheline sharing >> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h >> index 498eec813494..701e0692b8d3 100644 >> --- a/include/uapi/linux/blkzoned.h >> +++ b/include/uapi/linux/blkzoned.h >> @@ -120,9 +120,11 @@ struct blk_zone_report { >> }; >> >> /** >> - * struct blk_zone_range - BLKRESETZONE ioctl request >> - * @sector: starting sector of the first zone to issue reset write pointer >> - * @nr_sectors: Total number of sectors of 1 or more zones to reset >> + * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/ >> + * BLKCLOSEZONE/BLKFINISHZONE ioctl >> + * request >> + * @sector: starting sector of the first zone to operate on >> + * @nr_sectors: Total number of sectors of all zones to operate on >> */ >> struct blk_zone_range { >> __u64 sector; >> @@ -139,10 +141,19 @@ struct blk_zone_range { >> * sector range. The sector range must be zone aligned. >> * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors. >> * @BLKGETNRZONES: Get the total number of zones of the device. >> + * @BLKOPENZONE: Open the zones in the specified sector range. The >> + * sector range must be zone aligned. >> + * @BLKCLOSEZONE: Close the zones in the specified sector range. The >> + * sector range must be zone aligned. >> + * @BLKFINISHZONE: Finish the zones in the specified sector range. The >> + * sector range must be zone aligned. >> */ >> #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report) >> #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range) >> #define BLKGETZONESZ _IOR(0x12, 132, __u32) >> #define BLKGETNRZONES _IOR(0x12, 133, __u32) >> +#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range) >> +#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range) >> +#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range) >> >> #endif /* _UAPI_BLKZONED_H */ >> > > Thanks Damien. I was wondering if we should, instead of introducing three new ioctls, make a v2 of the zone mgmt API? Something like the following data structure being passed from user-space: struct blk_zone_mgmt { __u8 opcode; __u8 resv[3]; __u32 flags; __u64 sector; __u64 nr_sectors; __u64 resv1; /* to make room if we where do pass a data data pointer through this API */ }; -Matias -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On 6/21/19 6:07 AM, Matias Bjørling wrote: > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > index 95202f80676c..067ef9242275 100644 > --- a/include/linux/blk_types.h > +++ b/include/linux/blk_types.h > @@ -284,13 +284,20 @@ enum req_opf { > REQ_OP_DISCARD = 3, > /* securely erase sectors */ > REQ_OP_SECURE_ERASE = 5, > - /* reset a zone write pointer */ > - REQ_OP_ZONE_RESET = 6, > /* write the same sector many times */ > REQ_OP_WRITE_SAME = 7, > /* write the zero filled sector many times */ > REQ_OP_WRITE_ZEROES = 9, > > + /* reset a zone write pointer */ > + REQ_OP_ZONE_RESET = 16, > + /* Open zone(s) */ > + REQ_OP_ZONE_OPEN = 17, > + /* Close zone(s) */ > + REQ_OP_ZONE_CLOSE = 18, > + /* Finish zone(s) */ > + REQ_OP_ZONE_FINISH = 19, > + > /* SCSI passthrough using struct scsi_request */ > REQ_OP_SCSI_IN = 32, > REQ_OP_SCSI_OUT = 33, > @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, > bio->bi_opf = op | op_flags; > } Are the new operation types ever passed to op_is_write()? The definition of that function is as follows: static inline bool op_is_write(unsigned int op) { return (op & 1); } -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On 6/24/19 12:43 PM, Bart Van Assche wrote: > On 6/21/19 6:07 AM, Matias Bjørling wrote: >> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h >> index 95202f80676c..067ef9242275 100644 >> --- a/include/linux/blk_types.h >> +++ b/include/linux/blk_types.h >> @@ -284,13 +284,20 @@ enum req_opf { >> REQ_OP_DISCARD = 3, >> /* securely erase sectors */ >> REQ_OP_SECURE_ERASE = 5, >> - /* reset a zone write pointer */ >> - REQ_OP_ZONE_RESET = 6, >> /* write the same sector many times */ >> REQ_OP_WRITE_SAME = 7, >> /* write the zero filled sector many times */ >> REQ_OP_WRITE_ZEROES = 9, >> >> + /* reset a zone write pointer */ >> + REQ_OP_ZONE_RESET = 16, >> + /* Open zone(s) */ >> + REQ_OP_ZONE_OPEN = 17, >> + /* Close zone(s) */ >> + REQ_OP_ZONE_CLOSE = 18, >> + /* Finish zone(s) */ >> + REQ_OP_ZONE_FINISH = 19, >> + >> /* SCSI passthrough using struct scsi_request */ >> REQ_OP_SCSI_IN = 32, >> REQ_OP_SCSI_OUT = 33, >> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, >> bio->bi_opf = op | op_flags; >> } > > Are the new operation types ever passed to op_is_write()? The definition > of that function is as follows: > May be we should change numbering since blktrace also relies on the having on_is_write() without looking at the rq_ops(). 197 * Data direction bit lookup 198 */ 199 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 200 BLK_TC_ACT(BLK_TC_WRITE) }; <---- 201 202 #define BLK_TC_RAHEAD BLK_TC_AHEAD 203 #define BLK_TC_PREFLUSH BLK_TC_FLUSH 204 205 /* The ilog2() calls fall out because they're constant */ 206 #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ 207 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) 208 209 /* 210 * The worker for the various blk_add_trace*() types. Fills out a 211 * blk_io_trace structure and places it in a per-cpu subbuffer. 212 */ 213 static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, 214 int op, int op_flags, u32 what, int error, int pdu_len, 215 void *pdu_data, union kernfs_node_id *cgid) 216 { 217 struct task_struct *tsk = current; 218 struct ring_buffer_event *event = NULL; 219 struct ring_buffer *buffer = NULL; 220 struct blk_io_trace *t; 221 unsigned long flags = 0; 222 unsigned long *sequence; 223 pid_t pid; 224 int cpu, pc = 0; 225 bool blk_tracer = blk_tracer_enabled; 226 ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; 227 228 if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) 229 return; 230 231 what |= ddir_act[op_is_write(op) ? WRITE : READ]; <--- > static inline bool op_is_write(unsigned int op) > { > return (op & 1); > } > -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote: > On 6/24/19 12:43 PM, Bart Van Assche wrote: >> On 6/21/19 6:07 AM, Matias Bjørling wrote: >>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h >>> index 95202f80676c..067ef9242275 100644 >>> --- a/include/linux/blk_types.h >>> +++ b/include/linux/blk_types.h >>> @@ -284,13 +284,20 @@ enum req_opf { >>> REQ_OP_DISCARD = 3, >>> /* securely erase sectors */ >>> REQ_OP_SECURE_ERASE = 5, >>> - /* reset a zone write pointer */ >>> - REQ_OP_ZONE_RESET = 6, >>> /* write the same sector many times */ >>> REQ_OP_WRITE_SAME = 7, >>> /* write the zero filled sector many times */ >>> REQ_OP_WRITE_ZEROES = 9, >>> >>> + /* reset a zone write pointer */ >>> + REQ_OP_ZONE_RESET = 16, >>> + /* Open zone(s) */ >>> + REQ_OP_ZONE_OPEN = 17, >>> + /* Close zone(s) */ >>> + REQ_OP_ZONE_CLOSE = 18, >>> + /* Finish zone(s) */ >>> + REQ_OP_ZONE_FINISH = 19, >>> + >>> /* SCSI passthrough using struct scsi_request */ >>> REQ_OP_SCSI_IN = 32, >>> REQ_OP_SCSI_OUT = 33, >>> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, >>> bio->bi_opf = op | op_flags; >>> } >> >> Are the new operation types ever passed to op_is_write()? The definition >> of that function is as follows: >> > May be we should change numbering since blktrace also relies on the > having on_is_write() without looking at the rq_ops(). > > 197 * Data direction bit lookup > 198 */ > 199 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), > 200 BLK_TC_ACT(BLK_TC_WRITE) }; <---- > 201 > 202 #define BLK_TC_RAHEAD BLK_TC_AHEAD > 203 #define BLK_TC_PREFLUSH BLK_TC_FLUSH > 204 > 205 /* The ilog2() calls fall out because they're constant */ > 206 #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ > 207 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## > __name)) > 208 > 209 /* > 210 * The worker for the various blk_add_trace*() types. Fills out a > 211 * blk_io_trace structure and places it in a per-cpu subbuffer. > 212 */ > 213 static void __blk_add_trace(struct blk_trace *bt, sector_t sector, > int bytes, > 214 int op, int op_flags, u32 what, int error, > int pdu_len, > 215 void *pdu_data, union kernfs_node_id *cgid) > 216 { > 217 struct task_struct *tsk = current; > 218 struct ring_buffer_event *event = NULL; > 219 struct ring_buffer *buffer = NULL; > 220 struct blk_io_trace *t; > 221 unsigned long flags = 0; > 222 unsigned long *sequence; > 223 pid_t pid; > 224 int cpu, pc = 0; > 225 bool blk_tracer = blk_tracer_enabled; > 226 ssize_t cgid_len = cgid ? sizeof(*cgid) : 0; > 227 > 228 if (unlikely(bt->trace_state != Blktrace_running && > !blk_tracer)) > 229 return; > 230 > 231 what |= ddir_act[op_is_write(op) ? WRITE : READ]; <--- > > >> static inline bool op_is_write(unsigned int op) >> { >> return (op & 1); >> } >> > The zone mgmt commands are neither write nor reads commands. I guess, one could characterize them as write commands, but they don't write any data, they update a state of a zone on a drive. One should keep it as is? and make sure the zone mgmt commands don't get categorized as either read/write. -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On 6/25/19 3:35 AM, Matias Bjørling wrote: > On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote: >> On 6/24/19 12:43 PM, Bart Van Assche wrote: >>> static inline bool op_is_write(unsigned int op) >>> { >>> return (op & 1); >>> } >>> >> > > The zone mgmt commands are neither write nor reads commands. I guess, > one could characterize them as write commands, but they don't write any > data, they update a state of a zone on a drive. One should keep it as > is? and make sure the zone mgmt commands don't get categorized as either > read/write. Since the open, close and finish operations support modifying zone data I propose to characterize these as write commands. How about the following additional changes: - Make bio_check_ro() refuse open/close/flush/reset zone operations for read-only partitions (see also commit a32e236eb93e ("Partially revert "block: fail op_is_write() requests to read-only partitions"") # v4.18). - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ" into something that uses blk_op_str(). - Add open/close/flush zone support be added in blk_partition_remap(). Thanks, Bart. -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
> On 25 Jun 2019, at 17.55, Bart Van Assche <bvanassche@acm.org> wrote: > > On 6/25/19 3:35 AM, Matias Bjørling wrote: >> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote: >>> On 6/24/19 12:43 PM, Bart Van Assche wrote: >>>> static inline bool op_is_write(unsigned int op) >>>> { >>>> return (op & 1); >>>> } >> The zone mgmt commands are neither write nor reads commands. I guess, one could characterize them as write commands, but they don't write any data, they update a state of a zone on a drive. One should keep it as is? and make sure the zone mgmt commands don't get categorized as either read/write. > > Since the open, close and finish operations support modifying zone data I propose to characterize these as write commands. How about the following additional changes: > - Make bio_check_ro() refuse open/close/flush/reset zone operations for read-only partitions (see also commit a32e236eb93e ("Partially revert "block: fail op_is_write() requests to read-only partitions"") # v4.18). > - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ" into something that uses blk_op_str(). > - Add open/close/flush zone support be added in blk_partition_remap(). > > Thanks, > > Bart. Agreed. This is also what we do with REQ_OP_DISCARD and it makes things easier in the driver. Apart from the return comment from Damien and Minwoo, the patch looks good to me. Reviewed-by: Javier González <javier@javigon.com> -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On 06/25/2019 08:56 AM, Bart Van Assche wrote: > On 6/25/19 3:35 AM, Matias Bjørling wrote: >> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote: >>> On 6/24/19 12:43 PM, Bart Van Assche wrote: >>>> static inline bool op_is_write(unsigned int op) >>>> { >>>> return (op & 1); >>>> } >>>> >>> >> >> The zone mgmt commands are neither write nor reads commands. I guess, >> one could characterize them as write commands, but they don't write any >> data, they update a state of a zone on a drive. One should keep it as >> is? and make sure the zone mgmt commands don't get categorized as either >> read/write. > > Since the open, close and finish operations support modifying zone data > I propose to characterize these as write commands. How about the > following additional changes: > - Make bio_check_ro() refuse open/close/flush/reset zone operations for ^ Since finish also listed above which supports modifying data do we need to add finish here with flush in above line ? > read-only partitions (see also commit a32e236eb93e ("Partially revert > "block: fail op_is_write() requests to read-only partitions"") # v4.18). > - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ" > into something that uses blk_op_str(). Good idea, I've a patch for blk_op_str() and debugfs just waiting for this to merge. Does it make sense to add that patch in this series ? > - Add open/close/flush zone support be added in blk_partition_remap(). same here for finish ? > > Thanks, > > Bart. > -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On Tue, Jun 25, 2019 at 6:51 PM Chaitanya Kulkarni <Chaitanya.Kulkarni@wdc.com> wrote: > > On 06/25/2019 08:56 AM, Bart Van Assche wrote: > > On 6/25/19 3:35 AM, Matias Bjørling wrote: > >> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote: > >>> On 6/24/19 12:43 PM, Bart Van Assche wrote: > >>>> static inline bool op_is_write(unsigned int op) > >>>> { > >>>> return (op & 1); > >>>> } > >>>> > >>> > >> > >> The zone mgmt commands are neither write nor reads commands. I guess, > >> one could characterize them as write commands, but they don't write any > >> data, they update a state of a zone on a drive. One should keep it as > >> is? and make sure the zone mgmt commands don't get categorized as either > >> read/write. > > > > Since the open, close and finish operations support modifying zone data > > I propose to characterize these as write commands. How about the > > following additional changes: > > - Make bio_check_ro() refuse open/close/flush/reset zone operations for > ^ > Since finish also listed above which supports modifying data do we need > to add finish here with flush in above line ? > > > read-only partitions (see also commit a32e236eb93e ("Partially revert > > "block: fail op_is_write() requests to read-only partitions"") # v4.18). > > - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ" > > into something that uses blk_op_str(). > Good idea, I've a patch for blk_op_str() and debugfs just waiting for > this to merge. Does it make sense to add that patch in this series ? Ship it off separately. Your patches can go in first. > > - Add open/close/flush zone support be added in blk_partition_remap(). > same here for finish ? > > > > Thanks, > > > > Bart. > > > -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On 2019/06/26 0:56, Bart Van Assche wrote: > On 6/25/19 3:35 AM, Matias Bjørling wrote: >> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote: >>> On 6/24/19 12:43 PM, Bart Van Assche wrote: >>>> static inline bool op_is_write(unsigned int op) >>>> { >>>> return (op & 1); >>>> } >>>> >>> >> >> The zone mgmt commands are neither write nor reads commands. I guess, >> one could characterize them as write commands, but they don't write any >> data, they update a state of a zone on a drive. One should keep it as >> is? and make sure the zone mgmt commands don't get categorized as either >> read/write. > > Since the open, close and finish operations support modifying zone data > I propose to characterize these as write commands. How about the > following additional changes: > - Make bio_check_ro() refuse open/close/flush/reset zone operations for > read-only partitions (see also commit a32e236eb93e ("Partially revert > "block: fail op_is_write() requests to read-only partitions"") # v4.18). > - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ" > into something that uses blk_op_str(). > - Add open/close/flush zone support be added in blk_partition_remap(). Sounds good to me. And indeed, all operations should be failed for RO devices/partitions. Of note though is that only reset and finish operations have an effect on the zone data. Open and close have no effect at all on the zone data and only change the zone condition/state. But since they do change something on the drive, we can still consider them as "write" operations. Best regards.
On 2019/06/26 1:51, Chaitanya Kulkarni wrote: > On 06/25/2019 08:56 AM, Bart Van Assche wrote: >> On 6/25/19 3:35 AM, Matias Bjørling wrote: >>> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote: >>>> On 6/24/19 12:43 PM, Bart Van Assche wrote: >>>>> static inline bool op_is_write(unsigned int op) >>>>> { >>>>> return (op & 1); >>>>> } >>>>> >>>> >>> >>> The zone mgmt commands are neither write nor reads commands. I guess, >>> one could characterize them as write commands, but they don't write any >>> data, they update a state of a zone on a drive. One should keep it as >>> is? and make sure the zone mgmt commands don't get categorized as either >>> read/write. >> >> Since the open, close and finish operations support modifying zone data >> I propose to characterize these as write commands. How about the >> following additional changes: >> - Make bio_check_ro() refuse open/close/flush/reset zone operations for > ^ > Since finish also listed above which supports modifying data do we need > to add finish here with flush in above line ? There is no "zone flush" operation. I guess Bart made a typo here and meant "finish". "flush" on a zoned disk is not different from regular disks. > >> read-only partitions (see also commit a32e236eb93e ("Partially revert >> "block: fail op_is_write() requests to read-only partitions"") # v4.18). >> - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ" >> into something that uses blk_op_str(). > Good idea, I've a patch for blk_op_str() and debugfs just waiting for > this to merge. Does it make sense to add that patch in this series ? >> - Add open/close/flush zone support be added in blk_partition_remap(). > same here for finish ? >> >> Thanks, >> >> Bart. >> > >
diff --git a/block/blk-core.c b/block/blk-core.c index 8340f69670d8..c0f0dbad548d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -897,6 +897,9 @@ generic_make_request_checks(struct bio *bio) goto not_supported; break; case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: if (!blk_queue_is_zoned(q)) goto not_supported; break; diff --git a/block/blk-zoned.c b/block/blk-zoned.c index ae7e91bd0618..d0c933593b93 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -201,20 +201,22 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, EXPORT_SYMBOL_GPL(blkdev_report_zones); /** - * blkdev_reset_zones - Reset zones write pointer + * blkdev_zones_mgmt_op - Perform the specified operation on the zone(s) * @bdev: Target block device - * @sector: Start sector of the first zone to reset - * @nr_sectors: Number of sectors, at least the length of one zone + * @op: Operation to be performed on the zone(s) + * @sector: Start sector of the first zone to operate on + * @nr_sectors: Number of sectors, at least the length of one zone and + * must be zone size aligned. * @gfp_mask: Memory allocation flags (for bio_alloc) * * Description: - * Reset the write pointer of the zones contained in the range + * Perform the specified operation contained in the range * @sector..@sector+@nr_sectors. Specifying the entire disk sector range * is valid, but the specified range should not contain conventional zones. */ -int blkdev_reset_zones(struct block_device *bdev, - sector_t sector, sector_t nr_sectors, - gfp_t gfp_mask) +int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) { struct request_queue *q = bdev_get_queue(bdev); sector_t zone_sectors; @@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev, if (!blk_queue_is_zoned(q)) return -EOPNOTSUPP; + if (!op_is_zone_mgmt_op(op)) + return -EOPNOTSUPP; + if (bdev_read_only(bdev)) return -EPERM; @@ -248,7 +253,7 @@ int blkdev_reset_zones(struct block_device *bdev, bio = blk_next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; bio_set_dev(bio, bdev); - bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0); + bio_set_op_attrs(bio, op, 0); sector += zone_sectors; @@ -264,7 +269,7 @@ int blkdev_reset_zones(struct block_device *bdev, return ret; } -EXPORT_SYMBOL_GPL(blkdev_reset_zones); +EXPORT_SYMBOL_GPL(blkdev_zones_mgmt_op); /* * BLKREPORTZONE ioctl processing. @@ -329,15 +334,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, } /* - * BLKRESETZONE ioctl processing. + * Zone operation (open, close, finish or reset) ioctl processing. * Called from blkdev_ioctl. */ -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) +int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct request_queue *q; struct blk_zone_range zrange; + enum req_opf op; if (!argp) return -EINVAL; @@ -358,8 +364,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) return -EFAULT; - return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors, - GFP_KERNEL); + switch (cmd) { + case BLKRESETZONE: + op = REQ_OP_ZONE_RESET; + break; + case BLKOPENZONE: + op = REQ_OP_ZONE_OPEN; + break; + case BLKCLOSEZONE: + op = REQ_OP_ZONE_CLOSE; + break; + case BLKFINISHZONE: + op = REQ_OP_ZONE_FINISH; + break; + default: + return -ENOTTY; + } + + return blkdev_zones_mgmt_op(bdev, op, zrange.sector, zrange.nr_sectors, + GFP_KERNEL); } static inline unsigned long *blk_alloc_zone_bitmap(int node, diff --git a/block/ioctl.c b/block/ioctl.c index 15a0eb80ada9..df7fe54db158 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKREPORTZONE: return blkdev_report_zones_ioctl(bdev, mode, cmd, arg); case BLKRESETZONE: - return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg); + case BLKOPENZONE: + case BLKCLOSEZONE: + case BLKFINISHZONE: + return blkdev_zones_mgmt_op_ioctl(bdev, mode, cmd, arg); case BLKGETZONESZ: return put_uint(arg, bdev_zone_sectors(bdev)); case BLKGETNRZONES: diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 95202f80676c..067ef9242275 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -284,13 +284,20 @@ enum req_opf { REQ_OP_DISCARD = 3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = 5, - /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = 6, /* write the same sector many times */ REQ_OP_WRITE_SAME = 7, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, + /* reset a zone write pointer */ + REQ_OP_ZONE_RESET = 16, + /* Open zone(s) */ + REQ_OP_ZONE_OPEN = 17, + /* Close zone(s) */ + REQ_OP_ZONE_CLOSE = 18, + /* Finish zone(s) */ + REQ_OP_ZONE_FINISH = 19, + /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, REQ_OP_SCSI_OUT = 33, @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, bio->bi_opf = op | op_flags; } +/* + * Check if the op is zoned operation. + */ +static inline bool op_is_zone_mgmt_op(enum req_opf op) +{ + switch (op) { + case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: + return true; + default: + return false; + } +} + static inline bool op_is_write(unsigned int op) { return (op & 1); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 592669bcc536..943084f9dc9c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -348,14 +348,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev); extern int blkdev_report_zones(struct block_device *bdev, sector_t sector, struct blk_zone *zones, unsigned int *nr_zones, gfp_t gfp_mask); -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors, - sector_t nr_sectors, gfp_t gfp_mask); extern int blk_revalidate_disk_zones(struct gendisk *disk); extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg); +extern int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg); +extern int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask); #else /* CONFIG_BLK_DEV_ZONED */ @@ -376,15 +377,57 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev, return -ENOTTY; } -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev, - fmode_t mode, unsigned int cmd, - unsigned long arg) +static inline int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, + fmode_t mode, unsigned int cmd, + unsigned long arg) +{ + return -ENOTTY; +} + +static inline int blkdev_zones_mgmt_op(struct block_device *bdev, + enum req_opf op, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) { return -ENOTTY; } #endif /* CONFIG_BLK_DEV_ZONED */ +static inline int blkdev_reset_zones(struct block_device *bdev, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) +{ + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_RESET, + sector, nr_sectors, gfp_mask); +} + +static inline int blkdev_open_zones(struct block_device *bdev, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) +{ + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_OPEN, + sector, nr_sectors, gfp_mask); +} + +static inline int blkdev_close_zones(struct block_device *bdev, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) +{ + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_CLOSE, + sector, nr_sectors, + gfp_mask); +} + +static inline int blkdev_finish_zones(struct block_device *bdev, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) +{ + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_FINISH, + sector, nr_sectors, + gfp_mask); +} + struct request_queue { /* * Together with queue_head for cacheline sharing diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 498eec813494..701e0692b8d3 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -120,9 +120,11 @@ struct blk_zone_report { }; /** - * struct blk_zone_range - BLKRESETZONE ioctl request - * @sector: starting sector of the first zone to issue reset write pointer - * @nr_sectors: Total number of sectors of 1 or more zones to reset + * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/ + * BLKCLOSEZONE/BLKFINISHZONE ioctl + * request + * @sector: starting sector of the first zone to operate on + * @nr_sectors: Total number of sectors of all zones to operate on */ struct blk_zone_range { __u64 sector; @@ -139,10 +141,19 @@ struct blk_zone_range { * sector range. The sector range must be zone aligned. * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors. * @BLKGETNRZONES: Get the total number of zones of the device. + * @BLKOPENZONE: Open the zones in the specified sector range. The + * sector range must be zone aligned. + * @BLKCLOSEZONE: Close the zones in the specified sector range. The + * sector range must be zone aligned. + * @BLKFINISHZONE: Finish the zones in the specified sector range. The + * sector range must be zone aligned. */ #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report) #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range) #define BLKGETZONESZ _IOR(0x12, 132, __u32) #define BLKGETNRZONES _IOR(0x12, 133, __u32) +#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range) +#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range) +#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range) #endif /* _UAPI_BLKZONED_H */