Message ID | 20220613040947.16266-1-faithilikerun@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [RFC,v1] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls. | expand |
On 6/13/22 13:09, Sam Li wrote: > By adding zone management operations in BlockDriver, storage > controller emulation can use the new block layer APIs including > zone_report, zone_reset, zone_open, zone_close, and zone_finish. Like a real review, commenting inline below. > > Signed-off-by: Sam Li <faithilikerun@gmail.com> > --- > block/block-backend.c | 20 ++++ > block/coroutines.h | 6 ++ > block/file-posix.c | 174 ++++++++++++++++++++++++++++++ > block/io.c | 26 +++++ > include/block/block-common.h | 8 ++ > include/block/block-io.h | 11 ++ > include/block/block_int-common.h | 6 ++ > qemu-io-cmds.c | 66 ++++++++++++ > tests/qemu-iotests/tests/zoned.sh | 47 ++++++++ > 9 files changed, 364 insertions(+) > create mode 100644 tests/qemu-iotests/tests/zoned.sh > > diff --git a/block/block-backend.c b/block/block-backend.c > index e0e1aff4b1..4695e5f9fe 100644 > --- a/block/block-backend.c > +++ b/block/block-backend.c > @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB { > int ret; > } BlockBackendAIOCB; > > + > + No need for these extra blank line. > static const AIOCBInfo block_backend_aiocb_info = { > .get_aio_context = blk_aiocb_get_aio_context, > .aiocb_size = sizeof(BlockBackendAIOCB), > @@ -1810,6 +1812,23 @@ int blk_flush(BlockBackend *blk) > return ret; > } > > +int blk_co_zone_report(BlockBackend *blk) > +{ > + int ret; > + ret = bdrv_co_zone_report(blk->root->bs, NULL, 0, 0, 0, 0); > + > + return ret; Simplify: return bdrv_co_zone_report(blk->root->bs, NULL, 0, 0, 0, 0); But I can see that this code is temporary anyway :) > +} > + > +int blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op) > +{ > + int ret; > + ret = bdrv_co_zone_mgmt(blk->root->bs, op, 0, 100); > + > + return ret; > +} > + > + > void blk_drain(BlockBackend *blk) > { > BlockDriverState *bs = blk_bs(blk); > @@ -2634,3 +2653,4 @@ int blk_make_empty(BlockBackend *blk, Error **errp) > > return bdrv_make_empty(blk->root, errp); > } > + Please do not randomly add blank lines. > diff --git a/block/coroutines.h b/block/coroutines.h > index 830ecaa733..e29222a68a 100644 > --- a/block/coroutines.h > +++ b/block/coroutines.h > @@ -81,6 +81,10 @@ blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); > > int coroutine_fn blk_co_do_flush(BlockBackend *blk); > > +int coroutine_fn blk_co_zone_report(BlockBackend *blk); > + > +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op); > + > > /* > * "I/O or GS" API functions. These functions can run without > @@ -129,4 +133,6 @@ blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); > > int generated_co_wrapper blk_do_flush(BlockBackend *blk); > > + > + No need for these blank lines. > #endif /* BLOCK_COROUTINES_H */ > diff --git a/block/file-posix.c b/block/file-posix.c > index 48cd096624..42646acc4e 100644 > --- a/block/file-posix.c > +++ b/block/file-posix.c > @@ -73,6 +73,7 @@ > #include <linux/hdreg.h> > #include <linux/magic.h> > #include <scsi/sg.h> > +#include <linux/blkzoned.h> > #ifdef __s390__ > #include <asm/dasd.h> > #endif > @@ -178,6 +179,167 @@ typedef struct BDRVRawReopenState { > bool check_cache_dropped; > } BDRVRawReopenState; > > + Another extra blank line not needed. > +enum zone_type { > + BLK_ZT_CONV = BLK_ZONE_TYPE_CONVENTIONAL, > + BLK_ZT_SWR = BLK_ZONE_TYPE_SEQWRITE_REQ, > + BLK_ZT_SWP = BLK_ZONE_TYPE_SEQWRITE_PREF, > +}; > + > +enum zone_cond { > + BLK_ZS_NOT_WP = BLK_ZONE_COND_NOT_WP, > + BLK_ZS_EMPTY = BLK_ZONE_COND_EMPTY, > + BLK_ZS_IOPEN = BLK_ZONE_COND_IMP_OPEN, > + BLK_ZS_EOPEN = BLK_ZONE_COND_EXP_OPEN, > + BLK_ZS_CLOSED = BLK_ZONE_COND_CLOSED, > + BLK_ZS_RDONLY = BLK_ZONE_COND_READONLY, > + BLK_ZS_FULL = BLK_ZONE_COND_FULL, > + BLK_ZS_OFFLINE = BLK_ZONE_COND_OFFLINE, > +}; > + > +/* > + * Zone descriptor data structure. > + * Provide information on a zone with all position and size values in bytes. > + */ > +struct BlockZoneDescriptor { > + uint64_t start; > + uint32_t length; > + uint32_t cap; > + uint64_t wp; > + enum zone_type type; > + enum zone_cond cond; > +}; > + > +enum zone_model { > + ZBD_HOST_MANAGED, > + ZBD_HOST_AWARE, Please stay consistent with the macro prefixes. Use BLK_Zxxx here too. Likely BLK_ZM_HOST_MANAGED and BLK_ZM_HOST_AWARE. All of the above definitions should not be in block/file-posix.c because they must be common to the entire block layer backend (regardless of the driver implementing the zone operations). So these need to go into some common .h header file so that both backend drivers (e.g. file-posix) and front end (virtio-blk) can both see the same definitions. > +}; > + > +/** > + * Zone device information data structure. > + * Provide information on a device. > + */ > +typedef struct zbd_dev { > + enum zone_model model; > + uint32_t block_size; > + uint32_t write_granularity; > + uint32_t nr_zones; > + struct BlockZoneDescriptor *zones; /* array of zones */ > + uint32_t max_nr_open_zones; /* maximum number of explicitly open zones */ > + uint32_t max_nr_active_zones; > +} zbd_dev; > + > +/** > + * zone report - Get a zone block device's information in the > + * form of an array of zone descriptors. > + * > + * @param bs: passing zone block device file descriptor > + * @param zones: Space to hold zone information on reply > + * @param offset: the location in the zone block device > + * @param len: the length of reported zone information* > + * @param partial: if partial has zero value, it is the number > + * of zones that can be reported; else, set the nr_zones to the > + * number of fully transferred zone descriptors in the data buffer. > + * @return 0 on success, -1 on failure > + */ > +static int raw_co_zone_report(BlockDriverState *bs, > + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, > + int64_t offset, int64_t len, uint8_t partial) { Remove the partial argument from this interface. We have decided to drop it from the virtio specs and linux ioctl do not need it. Also, it is generally good practice to have the first arguments of a function be the "inputs" and the last ones the outputs. So: static int raw_co_zone_report(BlockDriverState *bs, int64_t offset, int64_t len, uint32_t *nr_zones, struct BlockZoneDescriptor *zones) would be better, I think. In any case, this function interface needs rethinking. E.g., what are the inputs and the outputs ? * Option (1) Input: (a) the offset where to start the report from, (b) the array of zones to fill up and (c) the maximum number of zones that the caller wants (which may be just 1). Ouput: (a) the array of zones filled up and (b) the number of zone decriptors that were filled in the array of zones, which can be LESS than what the caller requested. The interface then is: static int raw_co_zone_report(BlockDriverState *bs, int64_t offset, uint32_t *nr_zones, struct BlockZoneDescriptor *zones); * Option (2) Input: (a) the offset where to start the report from, (b) the maximum number of zones that the caller wants (which may be just 1). Ouput: (a) an allocated and filled array of zones and (b) the number of zone decriptors that were filled in the allocated array of zones, which can be LESS than what the caller requested. The interface then is: static int raw_co_zone_report(BlockDriverState *bs, int64_t offset, uint32_t *nr_zones, struct BlockZoneDescriptor **zones); Note the "**" since you will return an array of zones (that is, a pointer). For option (1), the caller will be responsible to manage the array of zones and free it when it is not needed anymore. For (2), the caller will still need to free the array of zones when not needed anymore BUT since the caller has no idea how that array was allocated, (it may be an anonymous mmap region !), you would need a "free array of zones" function too. So option (2) is more complicated. So I strongly suggest you go for solution (1): the virtio-blk driver (or qemu-io for your test) will allocate the array of zones and pass it along as an argument for this function to use it and fill the array. > + printf("%s\n", "start to report zone"); > + BDRVRawState *s = bs->opaque; > + > + struct blk_zone_report *rep; > + uint32_t rep_nr_zones = 0; > + int ret = 0, i = 0, start_sector = 0; Not sure about qemu code style rules, but generally: 1) Mixing code and declarations is a BAD idea. It makes code reading harder. So move that printf after the declarations. 2) No blank lines in the middle of the declarations. > + > + if (ioctl(s->fd, BLKGETNRZONES, &rep_nr_zones)) { > + /* Done */ > + error_report("number of zones not available"); > + } You do not need this. You have the nr_zones input argument and if that is too large, then the kernel will simply return less zones than what the caller asked for. You do need to check that nr_zones is not 0. > + > + fprintf(stdout, "Got %u zone descriptors\n", rep_nr_zones); > + rep = malloc(sizeof(struct blk_zone_report) + > + rep_nr_zones * sizeof(struct blk_zone)); > + if (!rep) { > + return -1; > + } > + > + rep->nr_zones = rep_nr_zones; > + rep->sector = start_sector; > + > + while (i < rep_nr_zones) { > + ret = ioctl(s->fd, BLKREPORTZONE, rep); > + if (ret != 0) { > + error_report("can't report zone"); > + } > + > + fprintf(stdout, "start: 0x%llx, len 0x%llx, cap 0x%llx, " > + "wptr 0x%llx reset: %u non-seq:%u, zcond:%u, [type: %u]\n", > + rep->zones[i].start, rep->zones[i].len, > + rep->zones[i].capacity, rep->zones[i].wp, > + rep->zones[i].reset, rep->zones[i].non_seq, > + rep->zones[i].cond, rep->zones[i].type); > + ++i; The ioctl in the loop may have returned a lot more than 1 zone. The actual number that is returned is indicated by rep->nr_zones after the ioctl call. So instead of i++, you need "i += rep->nr_zones". And you also need to stop (break) if rep->nr_zones is 0. > + } > + > + free(rep); You are not updating the nr_zones argument "*nr_zones = i;" Overall: last week, we decided that you should not worry about the ioctl for now and simply fill directly some value for the first zone in the "struct BlockZoneDescriptor *zones" array and return only that one zone. And the printf should be in your qemu-io test so that you can check that you are seeing the zone descriptors being propagated from the driver to the caller. > + > + return ret; > +} > + > +/** > + * zone management operations - Execute an operation on a zone > + */ > +static int raw_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, > + int64_t offset, int64_t len) { > + BDRVRawState *s = bs->opaque; > + struct blk_zone_range range; > + const char *ioctl_name; > + uint64_t ioctl_op; > + int ret; > + > + switch (op) { > + case zone_open: > + ioctl_name = "BLKOPENZONE"; > + ioctl_op = BLKOPENZONE; > + break; > + case zone_close: > + ioctl_name = "BLKCLOSEZONE"; > + ioctl_op = BLKCLOSEZONE; > + break; > + case zone_finish: > + ioctl_name = "BLKFINISHZONE"; > + ioctl_op = BLKFINISHZONE; > + break; > + case zone_reset: > + ioctl_name = "BLKRESETZONE"; > + ioctl_op = BLKRESETZONE; > + break; > + default: > + error_report("Invalid zone operation 0x%x", op); > + errno = -EINVAL; > + return -1; > + } > + > + /* Execute the operation */ > + range.sector = 0; This should be "= offset". > + range.nr_sectors = 100; And this should be "= len". You have the arguments, use them ! > + ret = ioctl(s->fd, ioctl_op, &range); > + if (ret != 0) { > + if (errno == ENOTTY) { > + error_report("ioctl %s is not supported", ioctl_name); > + errno = ENOTSUP; This is most likely not necessary since you will have checked already that the device is a zoned device, so you know it accepts these ioctls. > + } else { > + error_report("ioctl %s failed %d", > + ioctl_name, errno); > + } > + return -1; > + } > + > + return 0; > +} > + > + > static int fd_open(BlockDriverState *bs) > { > BDRVRawState *s = bs->opaque; > @@ -3324,6 +3486,9 @@ BlockDriver bdrv_file = { > .bdrv_abort_perm_update = raw_abort_perm_update, > .create_opts = &raw_create_opts, > .mutable_opts = mutable_opts, > + > + .bdrv_co_zone_report = raw_co_zone_report, > + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, > }; > > /***********************************************/ > @@ -3697,6 +3862,9 @@ static BlockDriver bdrv_host_device = { > .bdrv_probe_blocksizes = hdev_probe_blocksizes, > .bdrv_probe_geometry = hdev_probe_geometry, > > + .bdrv_co_zone_report = raw_co_zone_report, > + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, These probably need to be in a #ifdef __linux__ / #endif since these functions use linux only ioctl. However, you are unconditionally adding the method to bdrv_host_device, which is also going to be used for regular disks. So not good. You may need to define a: "BlockDriver bdrv_zoned_host_device" which is the same as bdrv_host_device + adding the zone operation. When the driver is initialized, you choose how it is registered aftewr testing if the host device is zoned or not. > + > /* generic scsi device */ > #ifdef __linux__ > .bdrv_co_ioctl = hdev_co_ioctl, > @@ -3809,6 +3977,9 @@ static BlockDriver bdrv_host_cdrom = { > .bdrv_io_unplug = raw_aio_unplug, > .bdrv_attach_aio_context = raw_aio_attach_aio_context, > > + .bdrv_co_zone_report = raw_co_zone_report, > + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, CROM do not have zones :) > + > .bdrv_co_truncate = raw_co_truncate, > .bdrv_getlength = raw_getlength, > .has_variable_length = true, > @@ -3939,6 +4110,9 @@ static BlockDriver bdrv_host_cdrom = { > .bdrv_io_unplug = raw_aio_unplug, > .bdrv_attach_aio_context = raw_aio_attach_aio_context, > > + .bdrv_co_zone_report = raw_co_zone_report, > + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, > + > .bdrv_co_truncate = raw_co_truncate, > .bdrv_getlength = raw_getlength, > .has_variable_length = true, > diff --git a/block/io.c b/block/io.c > index 789e6373d5..04f108c128 100644 > --- a/block/io.c > +++ b/block/io.c > @@ -3258,6 +3258,32 @@ out: > return co.ret; > } > > +int bdrv_co_zone_report(BlockDriverState *bs, > + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, > + int64_t offset, int64_t len, uint8_t partial) > +{ > + int ret; > + ret = bs->drv->bdrv_co_zone_report(bs, zones, nr_zones, > + offset, len, partial); > + if (!ret) { > + return -ENOTSUP; Nope. ret needs to be returned as is here. But you need to guard against invalid calls with something like: if (!bs->drv->bdrv_co_zone_report) return -ENOTSUP; Before actully calling bs->drv->bdrv_co_zone_report. > + } > + > + return ret; > +} > + > +int bdrv_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, > + int64_t offset, int64_t len) > +{ > + int ret; > + ret = bs->drv->bdrv_co_zone_mgmt(bs, op, offset, len); > + if (!ret) { > + return -ENOTSUP; Same comment as above. > + } > + > + return ret; > +} > + > void *qemu_blockalign(BlockDriverState *bs, size_t size) > { > IO_CODE(); > diff --git a/include/block/block-common.h b/include/block/block-common.h > index fdb7306e78..62eeeba8da 100644 > --- a/include/block/block-common.h > +++ b/include/block/block-common.h > @@ -48,6 +48,14 @@ > typedef struct BlockDriver BlockDriver; > typedef struct BdrvChild BdrvChild; > typedef struct BdrvChildClass BdrvChildClass; > +/* zone descriptor data structure */ > +typedef struct BlockZoneDescriptor BlockZoneDescriptor; > +enum zone_op { > + zone_open, > + zone_close, > + zone_finish, > + zone_reset, > +}; > > typedef struct BlockDriverInfo { > /* in bytes, 0 if irrelevant */ > diff --git a/include/block/block-io.h b/include/block/block-io.h > index 62c84f0519..a915b554ad 100644 > --- a/include/block/block-io.h > +++ b/include/block/block-io.h > @@ -80,6 +80,14 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); > /* Ensure contents are flushed to disk. */ > int coroutine_fn bdrv_co_flush(BlockDriverState *bs); > > +/* Report zone information of zone block device. */ > +int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, > + struct BlockZoneDescriptor *zones, > + uint32_t *nr_zones, int64_t offset, > + int64_t len, uint8_t partial); > +int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, > + int64_t offset, int64_t len); > + > int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); > bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); > int bdrv_block_status(BlockDriverState *bs, int64_t offset, > @@ -290,6 +298,9 @@ bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); > int generated_co_wrapper > bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); > > +int generated_co_wrapper blk_zone_report(BlockBackend *blk); > +int generated_co_wrapper blk_zone_mgmt(BlockBackend *blk, enum zone_op op); > + > /** > * bdrv_parent_drained_begin_single: > * > diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h > index 8947abab76..520a733481 100644 > --- a/include/block/block_int-common.h > +++ b/include/block/block_int-common.h > @@ -691,6 +691,12 @@ struct BlockDriver { > QEMUIOVector *qiov, > int64_t pos); > > + int coroutine_fn (*bdrv_co_zone_report)(BlockDriverState *bs, > + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, > + int64_t offset, int64_t len, uint8_t partial); > + int coroutine_fn (*bdrv_co_zone_mgmt)(BlockDriverState *bs, enum zone_op op, > + int64_t offset, int64_t len); > + > /* removable device specific */ > bool (*bdrv_is_inserted)(BlockDriverState *bs); > void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); > diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c > index 2f0d8ac25a..ca02b4047e 100644 > --- a/qemu-io-cmds.c > +++ b/qemu-io-cmds.c > @@ -1706,6 +1706,67 @@ static const cmdinfo_t flush_cmd = { > .oneline = "flush all in-core file state to disk", > }; > > +static int zone_report_f(BlockBackend *blk, int argc, char **argv) > +{ > + return blk_zone_report(blk); > +} > + > +static const cmdinfo_t zone_report_cmd = { > + .name = "zone_report", > + .altname = "f", > + .cfunc = zone_report_f, > + .oneline = "report zone information in zone block device", > +}; > + > +static int zone_open_f(BlockBackend *blk, int argc, char **argv) > +{ > + return blk_zone_mgmt(blk, zone_open); > +} > + > +static const cmdinfo_t zone_open_cmd = { > + .name = "zone_open", > + .altname = "f", > + .cfunc = zone_open_f, > + .oneline = "explicit open a range of zones in zone block device", > +}; > + > +static int zone_close_f(BlockBackend *blk, int argc, char **argv) > +{ > + return blk_zone_mgmt(blk, zone_close); > +} > + > +static const cmdinfo_t zone_close_cmd = { > + .name = "zone_close", > + .altname = "f", > + .cfunc = zone_close_f, > + .oneline = "close a range of zones in zone block device", > +}; > + > +static int zone_finish_f(BlockBackend *blk, int argc, char **argv) > +{ > + return blk_zone_mgmt(blk, zone_finish); > +} > + > +static const cmdinfo_t zone_finish_cmd = { > + .name = "zone_finish", > + .altname = "f", > + .cfunc = zone_finish_f, > + .oneline = "finish a range of zones in zone block device", > +}; > + > +static int zone_reset_f(BlockBackend *blk, int argc, char **argv) > +{ > + return blk_zone_mgmt(blk, zone_reset); > +} > + > +static const cmdinfo_t zone_reset_cmd = { > + .name = "zone_reset", > + .altname = "f", > + .cfunc = zone_reset_f, > + .oneline = "reset a zone write pointer in zone block device", > +}; > + > + > static int truncate_f(BlockBackend *blk, int argc, char **argv); > static const cmdinfo_t truncate_cmd = { > .name = "truncate", > @@ -2498,6 +2559,11 @@ static void __attribute((constructor)) init_qemuio_commands(void) > qemuio_add_command(&aio_write_cmd); > qemuio_add_command(&aio_flush_cmd); > qemuio_add_command(&flush_cmd); > + qemuio_add_command(&zone_report_cmd); > + qemuio_add_command(&zone_open_cmd); > + qemuio_add_command(&zone_close_cmd); > + qemuio_add_command(&zone_finish_cmd); > + qemuio_add_command(&zone_reset_cmd); > qemuio_add_command(&truncate_cmd); > qemuio_add_command(&length_cmd); > qemuio_add_command(&info_cmd); > diff --git a/tests/qemu-iotests/tests/zoned.sh b/tests/qemu-iotests/tests/zoned.sh > new file mode 100644 > index 0000000000..a1596e8340 > --- /dev/null > +++ b/tests/qemu-iotests/tests/zoned.sh > @@ -0,0 +1,47 @@ > +#!/usr/bin/env bash > +# > +# Test zone management operations. > +# > + > + > + > +seq=`basename $0` > +echo "QA output created by $seq" > + > +status=1 # failure is the default! > + > +_cleanup() > +{ > + _cleanup_test_img > +} > +trap "_cleanup; exit \$status" 0 1 2 3 15 > + > +# get standard environment, filters and checks > +. ./common.rc > +. ./common.filter > +. ./common.pattern > + > +# much of this could be generic for any format supporting compression. > +_supported_fmt qcow qcow2 > +_supported_proto file > +_supported_os Linux > + > +TEST_OFFSETS="0 1000 4000" > +TEST_LENS="1000" > +TEST_OPS="zone_report zone_open zone_close zone_finish zone_reset zone_append" > + > + > +echo "Testing a null_blk device" > +echo > + > +for offset in $TEST_OFFSETS; do > + echo "At offset $offset:" > + io_test "write -b" $offset $CLUSTER_SIZE 8 > + io_test "read -b" $offset $CLUSTER_SIZE 8 > + _check_test_img > +done > + > +# success, all done > +echo "*** done" > +rm -f $seq.full > +status=0
Damien Le Moal <damien.lemoal@opensource.wdc.com> 于2022年6月13日周一 14:24写道: > > On 6/13/22 13:09, Sam Li wrote: > > By adding zone management operations in BlockDriver, storage > > controller emulation can use the new block layer APIs including > > zone_report, zone_reset, zone_open, zone_close, and zone_finish. > > Like a real review, commenting inline below. > > > > > Signed-off-by: Sam Li <faithilikerun@gmail.com> > > --- > > block/block-backend.c | 20 ++++ > > block/coroutines.h | 6 ++ > > block/file-posix.c | 174 ++++++++++++++++++++++++++++++ > > block/io.c | 26 +++++ > > include/block/block-common.h | 8 ++ > > include/block/block-io.h | 11 ++ > > include/block/block_int-common.h | 6 ++ > > qemu-io-cmds.c | 66 ++++++++++++ > > tests/qemu-iotests/tests/zoned.sh | 47 ++++++++ > > 9 files changed, 364 insertions(+) > > create mode 100644 tests/qemu-iotests/tests/zoned.sh > > > > diff --git a/block/block-backend.c b/block/block-backend.c > > index e0e1aff4b1..4695e5f9fe 100644 > > --- a/block/block-backend.c > > +++ b/block/block-backend.c > > @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB { > > int ret; > > } BlockBackendAIOCB; > > > > + > > + > > No need for these extra blank line. > > > static const AIOCBInfo block_backend_aiocb_info = { > > .get_aio_context = blk_aiocb_get_aio_context, > > .aiocb_size = sizeof(BlockBackendAIOCB), > > @@ -1810,6 +1812,23 @@ int blk_flush(BlockBackend *blk) > > return ret; > > } > > > > +int blk_co_zone_report(BlockBackend *blk) > > +{ > > + int ret; > > + ret = bdrv_co_zone_report(blk->root->bs, NULL, 0, 0, 0, 0); > > + > > + return ret; > > Simplify: > > return bdrv_co_zone_report(blk->root->bs, NULL, 0, 0, 0, 0); > > But I can see that this code is temporary anyway :) > > > +} > > + > > +int blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op) > > +{ > > + int ret; > > + ret = bdrv_co_zone_mgmt(blk->root->bs, op, 0, 100); > > + > > + return ret; > > +} > > + > > + > > void blk_drain(BlockBackend *blk) > > { > > BlockDriverState *bs = blk_bs(blk); > > @@ -2634,3 +2653,4 @@ int blk_make_empty(BlockBackend *blk, Error **errp) > > > > return bdrv_make_empty(blk->root, errp); > > } > > + > > Please do not randomly add blank lines. > > > diff --git a/block/coroutines.h b/block/coroutines.h > > index 830ecaa733..e29222a68a 100644 > > --- a/block/coroutines.h > > +++ b/block/coroutines.h > > @@ -81,6 +81,10 @@ blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); > > > > int coroutine_fn blk_co_do_flush(BlockBackend *blk); > > > > +int coroutine_fn blk_co_zone_report(BlockBackend *blk); > > + > > +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op); > > + > > > > /* > > * "I/O or GS" API functions. These functions can run without > > @@ -129,4 +133,6 @@ blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); > > > > int generated_co_wrapper blk_do_flush(BlockBackend *blk); > > > > + > > + > > No need for these blank lines. > > > #endif /* BLOCK_COROUTINES_H */ > > diff --git a/block/file-posix.c b/block/file-posix.c > > index 48cd096624..42646acc4e 100644 > > --- a/block/file-posix.c > > +++ b/block/file-posix.c > > @@ -73,6 +73,7 @@ > > #include <linux/hdreg.h> > > #include <linux/magic.h> > > #include <scsi/sg.h> > > +#include <linux/blkzoned.h> > > #ifdef __s390__ > > #include <asm/dasd.h> > > #endif > > @@ -178,6 +179,167 @@ typedef struct BDRVRawReopenState { > > bool check_cache_dropped; > > } BDRVRawReopenState; > > > > + > > Another extra blank line not needed. > > > +enum zone_type { > > + BLK_ZT_CONV = BLK_ZONE_TYPE_CONVENTIONAL, > > + BLK_ZT_SWR = BLK_ZONE_TYPE_SEQWRITE_REQ, > > + BLK_ZT_SWP = BLK_ZONE_TYPE_SEQWRITE_PREF, > > +}; > > + > > +enum zone_cond { > > + BLK_ZS_NOT_WP = BLK_ZONE_COND_NOT_WP, > > + BLK_ZS_EMPTY = BLK_ZONE_COND_EMPTY, > > + BLK_ZS_IOPEN = BLK_ZONE_COND_IMP_OPEN, > > + BLK_ZS_EOPEN = BLK_ZONE_COND_EXP_OPEN, > > + BLK_ZS_CLOSED = BLK_ZONE_COND_CLOSED, > > + BLK_ZS_RDONLY = BLK_ZONE_COND_READONLY, > > + BLK_ZS_FULL = BLK_ZONE_COND_FULL, > > + BLK_ZS_OFFLINE = BLK_ZONE_COND_OFFLINE, > > +}; > > + > > +/* > > + * Zone descriptor data structure. > > + * Provide information on a zone with all position and size values in bytes. > > + */ > > +struct BlockZoneDescriptor { > > + uint64_t start; > > + uint32_t length; > > + uint32_t cap; > > + uint64_t wp; > > + enum zone_type type; > > + enum zone_cond cond; > > +}; > > + > > +enum zone_model { > > + ZBD_HOST_MANAGED, > > + ZBD_HOST_AWARE, > > Please stay consistent with the macro prefixes. Use BLK_Zxxx here too. > Likely BLK_ZM_HOST_MANAGED and BLK_ZM_HOST_AWARE. > > All of the above definitions should not be in block/file-posix.c because > they must be common to the entire block layer backend (regardless of the > driver implementing the zone operations). So these need to go into some > common .h header file so that both backend drivers (e.g. file-posix) and > front end (virtio-blk) can both see the same definitions. > > > +}; > > + > > +/** > > + * Zone device information data structure. > > + * Provide information on a device. > > + */ > > +typedef struct zbd_dev { > > + enum zone_model model; > > + uint32_t block_size; > > + uint32_t write_granularity; > > + uint32_t nr_zones; > > + struct BlockZoneDescriptor *zones; /* array of zones */ > > + uint32_t max_nr_open_zones; /* maximum number of explicitly open zones */ > > + uint32_t max_nr_active_zones; > > +} zbd_dev; > > + > > +/** > > + * zone report - Get a zone block device's information in the > > + * form of an array of zone descriptors. > > + * > > + * @param bs: passing zone block device file descriptor > > + * @param zones: Space to hold zone information on reply > > + * @param offset: the location in the zone block device > > + * @param len: the length of reported zone information* > > + * @param partial: if partial has zero value, it is the number > > + * of zones that can be reported; else, set the nr_zones to the > > + * number of fully transferred zone descriptors in the data buffer. > > + * @return 0 on success, -1 on failure > > + */ > > +static int raw_co_zone_report(BlockDriverState *bs, > > + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, > > + int64_t offset, int64_t len, uint8_t partial) { > > Remove the partial argument from this interface. We have decided to drop > it from the virtio specs and linux ioctl do not need it. Also, it is > generally good practice to have the first arguments of a function be the > "inputs" and the last ones the outputs. So: > > static int raw_co_zone_report(BlockDriverState *bs, > int64_t offset, int64_t len, > uint32_t *nr_zones, > struct BlockZoneDescriptor *zones) > > would be better, I think. > In any case, this function interface needs rethinking. E.g., what are the > inputs and the outputs ? > > * Option (1) > > Input: (a) the offset where to start the report from, (b) the array of > zones to fill up and (c) the maximum number of zones that the caller wants > (which may be just 1). > Ouput: (a) the array of zones filled up and (b) the number of zone > decriptors that were filled in the array of zones, which can be LESS than > what the caller requested. > > The interface then is: > > static int raw_co_zone_report(BlockDriverState *bs, > int64_t offset, uint32_t *nr_zones, > struct BlockZoneDescriptor *zones); > > * Option (2) > > Input: (a) the offset where to start the report from, (b) the maximum > number of zones that the caller wants (which may be just 1). > Ouput: (a) an allocated and filled array of zones and (b) the number of > zone decriptors that were filled in the allocated array of zones, which > can be LESS than what the caller requested. > > The interface then is: > > static int raw_co_zone_report(BlockDriverState *bs, > int64_t offset, uint32_t *nr_zones, > struct BlockZoneDescriptor **zones); > > Note the "**" since you will return an array of zones (that is, a pointer). > > For option (1), the caller will be responsible to manage the array of > zones and free it when it is not needed anymore. For (2), the caller will > still need to free the array of zones when not needed anymore BUT since > the caller has no idea how that array was allocated, (it may be an > anonymous mmap region !), you would need a "free array of zones" function too. > > So option (2) is more complicated. So I strongly suggest you go for > solution (1): the virtio-blk driver (or qemu-io for your test) will > allocate the array of zones and pass it along as an argument for this > function to use it and fill the array. It's kind of like the delete_file before. I should finish solution 1 before moving to improvements. Thanks for reviewing. The advice is very helpful and I'll fix these problems as soon as possible. Best regards, Sam > > > + printf("%s\n", "start to report zone"); > > + BDRVRawState *s = bs->opaque; > > + > > + struct blk_zone_report *rep; > > + uint32_t rep_nr_zones = 0; > > + int ret = 0, i = 0, start_sector = 0; > > Not sure about qemu code style rules, but generally: > 1) Mixing code and declarations is a BAD idea. It makes code reading > harder. So move that printf after the declarations. > 2) No blank lines in the middle of the declarations. > > > + > > + if (ioctl(s->fd, BLKGETNRZONES, &rep_nr_zones)) { > > + /* Done */ > > + error_report("number of zones not available"); > > + } > > You do not need this. You have the nr_zones input argument and if that is > too large, then the kernel will simply return less zones than what the > caller asked for. You do need to check that nr_zones is not 0. > > > + > > + fprintf(stdout, "Got %u zone descriptors\n", rep_nr_zones); > > + rep = malloc(sizeof(struct blk_zone_report) + > > + rep_nr_zones * sizeof(struct blk_zone)); > > + if (!rep) { > > + return -1; > > + } > > + > > + rep->nr_zones = rep_nr_zones; > > + rep->sector = start_sector; > > + > > + while (i < rep_nr_zones) { > > + ret = ioctl(s->fd, BLKREPORTZONE, rep); > > + if (ret != 0) { > > + error_report("can't report zone"); > > + } > > + > > + fprintf(stdout, "start: 0x%llx, len 0x%llx, cap 0x%llx, " > > + "wptr 0x%llx reset: %u non-seq:%u, zcond:%u, [type: %u]\n", > > + rep->zones[i].start, rep->zones[i].len, > > + rep->zones[i].capacity, rep->zones[i].wp, > > + rep->zones[i].reset, rep->zones[i].non_seq, > > + rep->zones[i].cond, rep->zones[i].type); > > + ++i; > > The ioctl in the loop may have returned a lot more than 1 zone. The actual > number that is returned is indicated by rep->nr_zones after the ioctl > call. So instead of i++, you need "i += rep->nr_zones". And you also need > to stop (break) if rep->nr_zones is 0. > > > + } > > + > > + free(rep); > > You are not updating the nr_zones argument "*nr_zones = i;" > > Overall: last week, we decided that you should not worry about the ioctl > for now and simply fill directly some value for the first zone in the > "struct BlockZoneDescriptor *zones" array and return only that one zone. > And the printf should be in your qemu-io test so that you can check that > you are seeing the zone descriptors being propagated from the driver to > the caller. > > > + > > + return ret; > > +} > > + > > +/** > > + * zone management operations - Execute an operation on a zone > > + */ > > +static int raw_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, > > + int64_t offset, int64_t len) { > > + BDRVRawState *s = bs->opaque; > > + struct blk_zone_range range; > > + const char *ioctl_name; > > + uint64_t ioctl_op; > > + int ret; > > + > > + switch (op) { > > + case zone_open: > > + ioctl_name = "BLKOPENZONE"; > > + ioctl_op = BLKOPENZONE; > > + break; > > + case zone_close: > > + ioctl_name = "BLKCLOSEZONE"; > > + ioctl_op = BLKCLOSEZONE; > > + break; > > + case zone_finish: > > + ioctl_name = "BLKFINISHZONE"; > > + ioctl_op = BLKFINISHZONE; > > + break; > > + case zone_reset: > > + ioctl_name = "BLKRESETZONE"; > > + ioctl_op = BLKRESETZONE; > > + break; > > + default: > > + error_report("Invalid zone operation 0x%x", op); > > + errno = -EINVAL; > > + return -1; > > + } > > + > > + /* Execute the operation */ > > + range.sector = 0; > > This should be "= offset". > > > + range.nr_sectors = 100; > > And this should be "= len". > > You have the arguments, use them ! > > > + ret = ioctl(s->fd, ioctl_op, &range); > > + if (ret != 0) { > > + if (errno == ENOTTY) { > > + error_report("ioctl %s is not supported", ioctl_name); > > + errno = ENOTSUP; > > This is most likely not necessary since you will have checked already that > the device is a zoned device, so you know it accepts these ioctls. > > > + } else { > > + error_report("ioctl %s failed %d", > > + ioctl_name, errno); > > + } > > + return -1; > > + } > > + > > + return 0; > > +} > > + > > + > > static int fd_open(BlockDriverState *bs) > > { > > BDRVRawState *s = bs->opaque; > > @@ -3324,6 +3486,9 @@ BlockDriver bdrv_file = { > > .bdrv_abort_perm_update = raw_abort_perm_update, > > .create_opts = &raw_create_opts, > > .mutable_opts = mutable_opts, > > + > > + .bdrv_co_zone_report = raw_co_zone_report, > > + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, > > }; > > > > /***********************************************/ > > @@ -3697,6 +3862,9 @@ static BlockDriver bdrv_host_device = { > > .bdrv_probe_blocksizes = hdev_probe_blocksizes, > > .bdrv_probe_geometry = hdev_probe_geometry, > > > > + .bdrv_co_zone_report = raw_co_zone_report, > > + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, > > These probably need to be in a #ifdef __linux__ / #endif since these > functions use linux only ioctl. However, you are unconditionally adding > the method to bdrv_host_device, which is also going to be used for regular > disks. So not good. You may need to define a: > "BlockDriver bdrv_zoned_host_device" which is the same as bdrv_host_device > + adding the zone operation. When the driver is initialized, you choose > how it is registered aftewr testing if the host device is zoned or not. > > > + > > /* generic scsi device */ > > #ifdef __linux__ > > .bdrv_co_ioctl = hdev_co_ioctl, > > @@ -3809,6 +3977,9 @@ static BlockDriver bdrv_host_cdrom = { > > .bdrv_io_unplug = raw_aio_unplug, > > .bdrv_attach_aio_context = raw_aio_attach_aio_context, > > > > + .bdrv_co_zone_report = raw_co_zone_report, > > + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, > > CROM do not have zones :) > > > + > > .bdrv_co_truncate = raw_co_truncate, > > .bdrv_getlength = raw_getlength, > > .has_variable_length = true, > > @@ -3939,6 +4110,9 @@ static BlockDriver bdrv_host_cdrom = { > > .bdrv_io_unplug = raw_aio_unplug, > > .bdrv_attach_aio_context = raw_aio_attach_aio_context, > > > > + .bdrv_co_zone_report = raw_co_zone_report, > > + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, > > + > > .bdrv_co_truncate = raw_co_truncate, > > .bdrv_getlength = raw_getlength, > > .has_variable_length = true, > > diff --git a/block/io.c b/block/io.c > > index 789e6373d5..04f108c128 100644 > > --- a/block/io.c > > +++ b/block/io.c > > @@ -3258,6 +3258,32 @@ out: > > return co.ret; > > } > > > > +int bdrv_co_zone_report(BlockDriverState *bs, > > + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, > > + int64_t offset, int64_t len, uint8_t partial) > > +{ > > + int ret; > > + ret = bs->drv->bdrv_co_zone_report(bs, zones, nr_zones, > > + offset, len, partial); > > + if (!ret) { > > + return -ENOTSUP; > > Nope. ret needs to be returned as is here. But you need to guard against > invalid calls with something like: > > if (!bs->drv->bdrv_co_zone_report) > return -ENOTSUP; > > Before actully calling bs->drv->bdrv_co_zone_report. > > > + } > > + > > + return ret; > > +} > > + > > +int bdrv_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, > > + int64_t offset, int64_t len) > > +{ > > + int ret; > > + ret = bs->drv->bdrv_co_zone_mgmt(bs, op, offset, len); > > + if (!ret) { > > + return -ENOTSUP; > > Same comment as above. > > > + } > > + > > + return ret; > > +} > > + > > void *qemu_blockalign(BlockDriverState *bs, size_t size) > > { > > IO_CODE(); > > diff --git a/include/block/block-common.h b/include/block/block-common.h > > index fdb7306e78..62eeeba8da 100644 > > --- a/include/block/block-common.h > > +++ b/include/block/block-common.h > > @@ -48,6 +48,14 @@ > > typedef struct BlockDriver BlockDriver; > > typedef struct BdrvChild BdrvChild; > > typedef struct BdrvChildClass BdrvChildClass; > > +/* zone descriptor data structure */ > > +typedef struct BlockZoneDescriptor BlockZoneDescriptor; > > +enum zone_op { > > + zone_open, > > + zone_close, > > + zone_finish, > > + zone_reset, > > +}; > > > > typedef struct BlockDriverInfo { > > /* in bytes, 0 if irrelevant */ > > diff --git a/include/block/block-io.h b/include/block/block-io.h > > index 62c84f0519..a915b554ad 100644 > > --- a/include/block/block-io.h > > +++ b/include/block/block-io.h > > @@ -80,6 +80,14 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); > > /* Ensure contents are flushed to disk. */ > > int coroutine_fn bdrv_co_flush(BlockDriverState *bs); > > > > +/* Report zone information of zone block device. */ > > +int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, > > + struct BlockZoneDescriptor *zones, > > + uint32_t *nr_zones, int64_t offset, > > + int64_t len, uint8_t partial); > > +int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, > > + int64_t offset, int64_t len); > > + > > int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); > > bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); > > int bdrv_block_status(BlockDriverState *bs, int64_t offset, > > @@ -290,6 +298,9 @@ bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); > > int generated_co_wrapper > > bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); > > > > +int generated_co_wrapper blk_zone_report(BlockBackend *blk); > > +int generated_co_wrapper blk_zone_mgmt(BlockBackend *blk, enum zone_op op); > > + > > /** > > * bdrv_parent_drained_begin_single: > > * > > diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h > > index 8947abab76..520a733481 100644 > > --- a/include/block/block_int-common.h > > +++ b/include/block/block_int-common.h > > @@ -691,6 +691,12 @@ struct BlockDriver { > > QEMUIOVector *qiov, > > int64_t pos); > > > > + int coroutine_fn (*bdrv_co_zone_report)(BlockDriverState *bs, > > + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, > > + int64_t offset, int64_t len, uint8_t partial); > > + int coroutine_fn (*bdrv_co_zone_mgmt)(BlockDriverState *bs, enum zone_op op, > > + int64_t offset, int64_t len); > > + > > /* removable device specific */ > > bool (*bdrv_is_inserted)(BlockDriverState *bs); > > void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); > > diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c > > index 2f0d8ac25a..ca02b4047e 100644 > > --- a/qemu-io-cmds.c > > +++ b/qemu-io-cmds.c > > @@ -1706,6 +1706,67 @@ static const cmdinfo_t flush_cmd = { > > .oneline = "flush all in-core file state to disk", > > }; > > > > +static int zone_report_f(BlockBackend *blk, int argc, char **argv) > > +{ > > + return blk_zone_report(blk); > > +} > > + > > +static const cmdinfo_t zone_report_cmd = { > > + .name = "zone_report", > > + .altname = "f", > > + .cfunc = zone_report_f, > > + .oneline = "report zone information in zone block device", > > +}; > > + > > +static int zone_open_f(BlockBackend *blk, int argc, char **argv) > > +{ > > + return blk_zone_mgmt(blk, zone_open); > > +} > > + > > +static const cmdinfo_t zone_open_cmd = { > > + .name = "zone_open", > > + .altname = "f", > > + .cfunc = zone_open_f, > > + .oneline = "explicit open a range of zones in zone block device", > > +}; > > + > > +static int zone_close_f(BlockBackend *blk, int argc, char **argv) > > +{ > > + return blk_zone_mgmt(blk, zone_close); > > +} > > + > > +static const cmdinfo_t zone_close_cmd = { > > + .name = "zone_close", > > + .altname = "f", > > + .cfunc = zone_close_f, > > + .oneline = "close a range of zones in zone block device", > > +}; > > + > > +static int zone_finish_f(BlockBackend *blk, int argc, char **argv) > > +{ > > + return blk_zone_mgmt(blk, zone_finish); > > +} > > + > > +static const cmdinfo_t zone_finish_cmd = { > > + .name = "zone_finish", > > + .altname = "f", > > + .cfunc = zone_finish_f, > > + .oneline = "finish a range of zones in zone block device", > > +}; > > + > > +static int zone_reset_f(BlockBackend *blk, int argc, char **argv) > > +{ > > + return blk_zone_mgmt(blk, zone_reset); > > +} > > + > > +static const cmdinfo_t zone_reset_cmd = { > > + .name = "zone_reset", > > + .altname = "f", > > + .cfunc = zone_reset_f, > > + .oneline = "reset a zone write pointer in zone block device", > > +}; > > + > > + > > static int truncate_f(BlockBackend *blk, int argc, char **argv); > > static const cmdinfo_t truncate_cmd = { > > .name = "truncate", > > @@ -2498,6 +2559,11 @@ static void __attribute((constructor)) init_qemuio_commands(void) > > qemuio_add_command(&aio_write_cmd); > > qemuio_add_command(&aio_flush_cmd); > > qemuio_add_command(&flush_cmd); > > + qemuio_add_command(&zone_report_cmd); > > + qemuio_add_command(&zone_open_cmd); > > + qemuio_add_command(&zone_close_cmd); > > + qemuio_add_command(&zone_finish_cmd); > > + qemuio_add_command(&zone_reset_cmd); > > qemuio_add_command(&truncate_cmd); > > qemuio_add_command(&length_cmd); > > qemuio_add_command(&info_cmd); > > diff --git a/tests/qemu-iotests/tests/zoned.sh b/tests/qemu-iotests/tests/zoned.sh > > new file mode 100644 > > index 0000000000..a1596e8340 > > --- /dev/null > > +++ b/tests/qemu-iotests/tests/zoned.sh > > @@ -0,0 +1,47 @@ > > +#!/usr/bin/env bash > > +# > > +# Test zone management operations. > > +# > > + > > + > > + > > +seq=`basename $0` > > +echo "QA output created by $seq" > > + > > +status=1 # failure is the default! > > + > > +_cleanup() > > +{ > > + _cleanup_test_img > > +} > > +trap "_cleanup; exit \$status" 0 1 2 3 15 > > + > > +# get standard environment, filters and checks > > +. ./common.rc > > +. ./common.filter > > +. ./common.pattern > > + > > +# much of this could be generic for any format supporting compression. > > +_supported_fmt qcow qcow2 > > +_supported_proto file > > +_supported_os Linux > > + > > +TEST_OFFSETS="0 1000 4000" > > +TEST_LENS="1000" > > +TEST_OPS="zone_report zone_open zone_close zone_finish zone_reset zone_append" > > + > > + > > +echo "Testing a null_blk device" > > +echo > > + > > +for offset in $TEST_OFFSETS; do > > + echo "At offset $offset:" > > + io_test "write -b" $offset $CLUSTER_SIZE 8 > > + io_test "read -b" $offset $CLUSTER_SIZE 8 > > + _check_test_img > > +done > > + > > +# success, all done > > +echo "*** done" > > +rm -f $seq.full > > +status=0 > > > -- > Damien Le Moal > Western Digital Research
On 6/13/22 18:12, Sam Li wrote: [...] >>> +}; >>> + >>> +/** >>> + * Zone device information data structure. >>> + * Provide information on a device. >>> + */ >>> +typedef struct zbd_dev { >>> + enum zone_model model; >>> + uint32_t block_size; >>> + uint32_t write_granularity; >>> + uint32_t nr_zones; >>> + struct BlockZoneDescriptor *zones; /* array of zones */ >>> + uint32_t max_nr_open_zones; /* maximum number of explicitly open zones */ >>> + uint32_t max_nr_active_zones; >>> +} zbd_dev; >>> + >>> +/** >>> + * zone report - Get a zone block device's information in the >>> + * form of an array of zone descriptors. >>> + * >>> + * @param bs: passing zone block device file descriptor >>> + * @param zones: Space to hold zone information on reply >>> + * @param offset: the location in the zone block device >>> + * @param len: the length of reported zone information* >>> + * @param partial: if partial has zero value, it is the number >>> + * of zones that can be reported; else, set the nr_zones to the >>> + * number of fully transferred zone descriptors in the data buffer. >>> + * @return 0 on success, -1 on failure >>> + */ >>> +static int raw_co_zone_report(BlockDriverState *bs, >>> + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, >>> + int64_t offset, int64_t len, uint8_t partial) { >> >> Remove the partial argument from this interface. We have decided to drop >> it from the virtio specs and linux ioctl do not need it. Also, it is >> generally good practice to have the first arguments of a function be the >> "inputs" and the last ones the outputs. So: >> >> static int raw_co_zone_report(BlockDriverState *bs, >> int64_t offset, int64_t len, >> uint32_t *nr_zones, >> struct BlockZoneDescriptor *zones) >> >> would be better, I think. >> In any case, this function interface needs rethinking. E.g., what are the >> inputs and the outputs ? >> >> * Option (1) >> >> Input: (a) the offset where to start the report from, (b) the array of >> zones to fill up and (c) the maximum number of zones that the caller wants >> (which may be just 1). >> Ouput: (a) the array of zones filled up and (b) the number of zone >> decriptors that were filled in the array of zones, which can be LESS than >> what the caller requested. >> >> The interface then is: >> >> static int raw_co_zone_report(BlockDriverState *bs, >> int64_t offset, uint32_t *nr_zones, >> struct BlockZoneDescriptor *zones); >> >> * Option (2) >> >> Input: (a) the offset where to start the report from, (b) the maximum >> number of zones that the caller wants (which may be just 1). >> Ouput: (a) an allocated and filled array of zones and (b) the number of >> zone decriptors that were filled in the allocated array of zones, which >> can be LESS than what the caller requested. >> >> The interface then is: >> >> static int raw_co_zone_report(BlockDriverState *bs, >> int64_t offset, uint32_t *nr_zones, >> struct BlockZoneDescriptor **zones); >> >> Note the "**" since you will return an array of zones (that is, a pointer). >> >> For option (1), the caller will be responsible to manage the array of >> zones and free it when it is not needed anymore. For (2), the caller will >> still need to free the array of zones when not needed anymore BUT since >> the caller has no idea how that array was allocated, (it may be an >> anonymous mmap region !), you would need a "free array of zones" function too. >> >> So option (2) is more complicated. So I strongly suggest you go for >> solution (1): the virtio-blk driver (or qemu-io for your test) will >> allocate the array of zones and pass it along as an argument for this >> function to use it and fill the array. > > It's kind of like the delete_file before. I should finish solution 1 > before moving to improvements. > > Thanks for reviewing. The advice is very helpful and I'll fix these > problems as soon as possible. As we discussed last week during the call, first do not worry about the ioctl for report zones. Get the interface working so that you can call it from qemu-io exactly like the virtio-blk driver would (well almost the same), that is, with all the arguments. So for instance, pass along the report start offset and use that to generate a single zone report with a zone descriptor that has a start offset *equal* to the offset specified in qemu-io. And set the other zone fields (length, capacity, wp, etc) to some value. With that, when you run your zone report test with qemu-io, you can print the zone descriptor you get and check that the zone fields match. That would mean you are passing along the data correctly between the layers. With that in place, you can then implement the actual ioctl() call to get the real zone info from the drive. > > Best regards, > Sam > >> >>> + printf("%s\n", "start to report zone"); >>> + BDRVRawState *s = bs->opaque; >>> + >>> + struct blk_zone_report *rep; >>> + uint32_t rep_nr_zones = 0; >>> + int ret = 0, i = 0, start_sector = 0; >> >> Not sure about qemu code style rules, but generally: >> 1) Mixing code and declarations is a BAD idea. It makes code reading >> harder. So move that printf after the declarations. >> 2) No blank lines in the middle of the declarations. >> >>> + >>> + if (ioctl(s->fd, BLKGETNRZONES, &rep_nr_zones)) { >>> + /* Done */ >>> + error_report("number of zones not available"); >>> + } >> >> You do not need this. You have the nr_zones input argument and if that is >> too large, then the kernel will simply return less zones than what the >> caller asked for. You do need to check that nr_zones is not 0. >> >>> + >>> + fprintf(stdout, "Got %u zone descriptors\n", rep_nr_zones); >>> + rep = malloc(sizeof(struct blk_zone_report) + >>> + rep_nr_zones * sizeof(struct blk_zone)); >>> + if (!rep) { >>> + return -1; >>> + } >>> + >>> + rep->nr_zones = rep_nr_zones; >>> + rep->sector = start_sector; >>> + >>> + while (i < rep_nr_zones) { >>> + ret = ioctl(s->fd, BLKREPORTZONE, rep); >>> + if (ret != 0) { >>> + error_report("can't report zone"); >>> + } >>> + >>> + fprintf(stdout, "start: 0x%llx, len 0x%llx, cap 0x%llx, " >>> + "wptr 0x%llx reset: %u non-seq:%u, zcond:%u, [type: %u]\n", >>> + rep->zones[i].start, rep->zones[i].len, >>> + rep->zones[i].capacity, rep->zones[i].wp, >>> + rep->zones[i].reset, rep->zones[i].non_seq, >>> + rep->zones[i].cond, rep->zones[i].type); >>> + ++i; >> >> The ioctl in the loop may have returned a lot more than 1 zone. The actual >> number that is returned is indicated by rep->nr_zones after the ioctl >> call. So instead of i++, you need "i += rep->nr_zones". And you also need >> to stop (break) if rep->nr_zones is 0. >> >>> + } >>> + >>> + free(rep); >> >> You are not updating the nr_zones argument "*nr_zones = i;" >> >> Overall: last week, we decided that you should not worry about the ioctl >> for now and simply fill directly some value for the first zone in the >> "struct BlockZoneDescriptor *zones" array and return only that one zone. >> And the printf should be in your qemu-io test so that you can check that >> you are seeing the zone descriptors being propagated from the driver to >> the caller. >> >>> + >>> + return ret; >>> +} >>> + >>> +/** >>> + * zone management operations - Execute an operation on a zone >>> + */ >>> +static int raw_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, >>> + int64_t offset, int64_t len) { >>> + BDRVRawState *s = bs->opaque; >>> + struct blk_zone_range range; >>> + const char *ioctl_name; >>> + uint64_t ioctl_op; >>> + int ret; >>> + >>> + switch (op) { >>> + case zone_open: >>> + ioctl_name = "BLKOPENZONE"; >>> + ioctl_op = BLKOPENZONE; >>> + break; >>> + case zone_close: >>> + ioctl_name = "BLKCLOSEZONE"; >>> + ioctl_op = BLKCLOSEZONE; >>> + break; >>> + case zone_finish: >>> + ioctl_name = "BLKFINISHZONE"; >>> + ioctl_op = BLKFINISHZONE; >>> + break; >>> + case zone_reset: >>> + ioctl_name = "BLKRESETZONE"; >>> + ioctl_op = BLKRESETZONE; >>> + break; >>> + default: >>> + error_report("Invalid zone operation 0x%x", op); >>> + errno = -EINVAL; >>> + return -1; >>> + } >>> + >>> + /* Execute the operation */ >>> + range.sector = 0; >> >> This should be "= offset". >> >>> + range.nr_sectors = 100; >> >> And this should be "= len". >> >> You have the arguments, use them ! >> >>> + ret = ioctl(s->fd, ioctl_op, &range); >>> + if (ret != 0) { >>> + if (errno == ENOTTY) { >>> + error_report("ioctl %s is not supported", ioctl_name); >>> + errno = ENOTSUP; >> >> This is most likely not necessary since you will have checked already that >> the device is a zoned device, so you know it accepts these ioctls. >> >>> + } else { >>> + error_report("ioctl %s failed %d", >>> + ioctl_name, errno); >>> + } >>> + return -1; >>> + } >>> + >>> + return 0; >>> +} >>> + >>> + >>> static int fd_open(BlockDriverState *bs) >>> { >>> BDRVRawState *s = bs->opaque; >>> @@ -3324,6 +3486,9 @@ BlockDriver bdrv_file = { >>> .bdrv_abort_perm_update = raw_abort_perm_update, >>> .create_opts = &raw_create_opts, >>> .mutable_opts = mutable_opts, >>> + >>> + .bdrv_co_zone_report = raw_co_zone_report, >>> + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, >>> }; >>> >>> /***********************************************/ >>> @@ -3697,6 +3862,9 @@ static BlockDriver bdrv_host_device = { >>> .bdrv_probe_blocksizes = hdev_probe_blocksizes, >>> .bdrv_probe_geometry = hdev_probe_geometry, >>> >>> + .bdrv_co_zone_report = raw_co_zone_report, >>> + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, >> >> These probably need to be in a #ifdef __linux__ / #endif since these >> functions use linux only ioctl. However, you are unconditionally adding >> the method to bdrv_host_device, which is also going to be used for regular >> disks. So not good. You may need to define a: >> "BlockDriver bdrv_zoned_host_device" which is the same as bdrv_host_device >> + adding the zone operation. When the driver is initialized, you choose >> how it is registered aftewr testing if the host device is zoned or not. >> >>> + >>> /* generic scsi device */ >>> #ifdef __linux__ >>> .bdrv_co_ioctl = hdev_co_ioctl, >>> @@ -3809,6 +3977,9 @@ static BlockDriver bdrv_host_cdrom = { >>> .bdrv_io_unplug = raw_aio_unplug, >>> .bdrv_attach_aio_context = raw_aio_attach_aio_context, >>> >>> + .bdrv_co_zone_report = raw_co_zone_report, >>> + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, >> >> CROM do not have zones :) >> >>> + >>> .bdrv_co_truncate = raw_co_truncate, >>> .bdrv_getlength = raw_getlength, >>> .has_variable_length = true, >>> @@ -3939,6 +4110,9 @@ static BlockDriver bdrv_host_cdrom = { >>> .bdrv_io_unplug = raw_aio_unplug, >>> .bdrv_attach_aio_context = raw_aio_attach_aio_context, >>> >>> + .bdrv_co_zone_report = raw_co_zone_report, >>> + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, >>> + >>> .bdrv_co_truncate = raw_co_truncate, >>> .bdrv_getlength = raw_getlength, >>> .has_variable_length = true, >>> diff --git a/block/io.c b/block/io.c >>> index 789e6373d5..04f108c128 100644 >>> --- a/block/io.c >>> +++ b/block/io.c >>> @@ -3258,6 +3258,32 @@ out: >>> return co.ret; >>> } >>> >>> +int bdrv_co_zone_report(BlockDriverState *bs, >>> + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, >>> + int64_t offset, int64_t len, uint8_t partial) >>> +{ >>> + int ret; >>> + ret = bs->drv->bdrv_co_zone_report(bs, zones, nr_zones, >>> + offset, len, partial); >>> + if (!ret) { >>> + return -ENOTSUP; >> >> Nope. ret needs to be returned as is here. But you need to guard against >> invalid calls with something like: >> >> if (!bs->drv->bdrv_co_zone_report) >> return -ENOTSUP; >> >> Before actully calling bs->drv->bdrv_co_zone_report. >> >>> + } >>> + >>> + return ret; >>> +} >>> + >>> +int bdrv_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, >>> + int64_t offset, int64_t len) >>> +{ >>> + int ret; >>> + ret = bs->drv->bdrv_co_zone_mgmt(bs, op, offset, len); >>> + if (!ret) { >>> + return -ENOTSUP; >> >> Same comment as above. >> >>> + } >>> + >>> + return ret; >>> +} >>> + >>> void *qemu_blockalign(BlockDriverState *bs, size_t size) >>> { >>> IO_CODE(); >>> diff --git a/include/block/block-common.h b/include/block/block-common.h >>> index fdb7306e78..62eeeba8da 100644 >>> --- a/include/block/block-common.h >>> +++ b/include/block/block-common.h >>> @@ -48,6 +48,14 @@ >>> typedef struct BlockDriver BlockDriver; >>> typedef struct BdrvChild BdrvChild; >>> typedef struct BdrvChildClass BdrvChildClass; >>> +/* zone descriptor data structure */ >>> +typedef struct BlockZoneDescriptor BlockZoneDescriptor; >>> +enum zone_op { >>> + zone_open, >>> + zone_close, >>> + zone_finish, >>> + zone_reset, >>> +}; >>> >>> typedef struct BlockDriverInfo { >>> /* in bytes, 0 if irrelevant */ >>> diff --git a/include/block/block-io.h b/include/block/block-io.h >>> index 62c84f0519..a915b554ad 100644 >>> --- a/include/block/block-io.h >>> +++ b/include/block/block-io.h >>> @@ -80,6 +80,14 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); >>> /* Ensure contents are flushed to disk. */ >>> int coroutine_fn bdrv_co_flush(BlockDriverState *bs); >>> >>> +/* Report zone information of zone block device. */ >>> +int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, >>> + struct BlockZoneDescriptor *zones, >>> + uint32_t *nr_zones, int64_t offset, >>> + int64_t len, uint8_t partial); >>> +int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, >>> + int64_t offset, int64_t len); >>> + >>> int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); >>> bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); >>> int bdrv_block_status(BlockDriverState *bs, int64_t offset, >>> @@ -290,6 +298,9 @@ bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); >>> int generated_co_wrapper >>> bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); >>> >>> +int generated_co_wrapper blk_zone_report(BlockBackend *blk); >>> +int generated_co_wrapper blk_zone_mgmt(BlockBackend *blk, enum zone_op op); >>> + >>> /** >>> * bdrv_parent_drained_begin_single: >>> * >>> diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h >>> index 8947abab76..520a733481 100644 >>> --- a/include/block/block_int-common.h >>> +++ b/include/block/block_int-common.h >>> @@ -691,6 +691,12 @@ struct BlockDriver { >>> QEMUIOVector *qiov, >>> int64_t pos); >>> >>> + int coroutine_fn (*bdrv_co_zone_report)(BlockDriverState *bs, >>> + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, >>> + int64_t offset, int64_t len, uint8_t partial); >>> + int coroutine_fn (*bdrv_co_zone_mgmt)(BlockDriverState *bs, enum zone_op op, >>> + int64_t offset, int64_t len); >>> + >>> /* removable device specific */ >>> bool (*bdrv_is_inserted)(BlockDriverState *bs); >>> void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); >>> diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c >>> index 2f0d8ac25a..ca02b4047e 100644 >>> --- a/qemu-io-cmds.c >>> +++ b/qemu-io-cmds.c >>> @@ -1706,6 +1706,67 @@ static const cmdinfo_t flush_cmd = { >>> .oneline = "flush all in-core file state to disk", >>> }; >>> >>> +static int zone_report_f(BlockBackend *blk, int argc, char **argv) >>> +{ >>> + return blk_zone_report(blk); >>> +} >>> + >>> +static const cmdinfo_t zone_report_cmd = { >>> + .name = "zone_report", >>> + .altname = "f", >>> + .cfunc = zone_report_f, >>> + .oneline = "report zone information in zone block device", >>> +}; >>> + >>> +static int zone_open_f(BlockBackend *blk, int argc, char **argv) >>> +{ >>> + return blk_zone_mgmt(blk, zone_open); >>> +} >>> + >>> +static const cmdinfo_t zone_open_cmd = { >>> + .name = "zone_open", >>> + .altname = "f", >>> + .cfunc = zone_open_f, >>> + .oneline = "explicit open a range of zones in zone block device", >>> +}; >>> + >>> +static int zone_close_f(BlockBackend *blk, int argc, char **argv) >>> +{ >>> + return blk_zone_mgmt(blk, zone_close); >>> +} >>> + >>> +static const cmdinfo_t zone_close_cmd = { >>> + .name = "zone_close", >>> + .altname = "f", >>> + .cfunc = zone_close_f, >>> + .oneline = "close a range of zones in zone block device", >>> +}; >>> + >>> +static int zone_finish_f(BlockBackend *blk, int argc, char **argv) >>> +{ >>> + return blk_zone_mgmt(blk, zone_finish); >>> +} >>> + >>> +static const cmdinfo_t zone_finish_cmd = { >>> + .name = "zone_finish", >>> + .altname = "f", >>> + .cfunc = zone_finish_f, >>> + .oneline = "finish a range of zones in zone block device", >>> +}; >>> + >>> +static int zone_reset_f(BlockBackend *blk, int argc, char **argv) >>> +{ >>> + return blk_zone_mgmt(blk, zone_reset); >>> +} >>> + >>> +static const cmdinfo_t zone_reset_cmd = { >>> + .name = "zone_reset", >>> + .altname = "f", >>> + .cfunc = zone_reset_f, >>> + .oneline = "reset a zone write pointer in zone block device", >>> +}; >>> + >>> + >>> static int truncate_f(BlockBackend *blk, int argc, char **argv); >>> static const cmdinfo_t truncate_cmd = { >>> .name = "truncate", >>> @@ -2498,6 +2559,11 @@ static void __attribute((constructor)) init_qemuio_commands(void) >>> qemuio_add_command(&aio_write_cmd); >>> qemuio_add_command(&aio_flush_cmd); >>> qemuio_add_command(&flush_cmd); >>> + qemuio_add_command(&zone_report_cmd); >>> + qemuio_add_command(&zone_open_cmd); >>> + qemuio_add_command(&zone_close_cmd); >>> + qemuio_add_command(&zone_finish_cmd); >>> + qemuio_add_command(&zone_reset_cmd); >>> qemuio_add_command(&truncate_cmd); >>> qemuio_add_command(&length_cmd); >>> qemuio_add_command(&info_cmd); >>> diff --git a/tests/qemu-iotests/tests/zoned.sh b/tests/qemu-iotests/tests/zoned.sh >>> new file mode 100644 >>> index 0000000000..a1596e8340 >>> --- /dev/null >>> +++ b/tests/qemu-iotests/tests/zoned.sh >>> @@ -0,0 +1,47 @@ >>> +#!/usr/bin/env bash >>> +# >>> +# Test zone management operations. >>> +# >>> + >>> + >>> + >>> +seq=`basename $0` >>> +echo "QA output created by $seq" >>> + >>> +status=1 # failure is the default! >>> + >>> +_cleanup() >>> +{ >>> + _cleanup_test_img >>> +} >>> +trap "_cleanup; exit \$status" 0 1 2 3 15 >>> + >>> +# get standard environment, filters and checks >>> +. ./common.rc >>> +. ./common.filter >>> +. ./common.pattern >>> + >>> +# much of this could be generic for any format supporting compression. >>> +_supported_fmt qcow qcow2 >>> +_supported_proto file >>> +_supported_os Linux >>> + >>> +TEST_OFFSETS="0 1000 4000" >>> +TEST_LENS="1000" >>> +TEST_OPS="zone_report zone_open zone_close zone_finish zone_reset zone_append" >>> + >>> + >>> +echo "Testing a null_blk device" >>> +echo >>> + >>> +for offset in $TEST_OFFSETS; do >>> + echo "At offset $offset:" >>> + io_test "write -b" $offset $CLUSTER_SIZE 8 >>> + io_test "read -b" $offset $CLUSTER_SIZE 8 >>> + _check_test_img >>> +done >>> + >>> +# success, all done >>> +echo "*** done" >>> +rm -f $seq.full >>> +status=0 >> >> >> -- >> Damien Le Moal >> Western Digital Research >
diff --git a/block/block-backend.c b/block/block-backend.c index e0e1aff4b1..4695e5f9fe 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB { int ret; } BlockBackendAIOCB; + + static const AIOCBInfo block_backend_aiocb_info = { .get_aio_context = blk_aiocb_get_aio_context, .aiocb_size = sizeof(BlockBackendAIOCB), @@ -1810,6 +1812,23 @@ int blk_flush(BlockBackend *blk) return ret; } +int blk_co_zone_report(BlockBackend *blk) +{ + int ret; + ret = bdrv_co_zone_report(blk->root->bs, NULL, 0, 0, 0, 0); + + return ret; +} + +int blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op) +{ + int ret; + ret = bdrv_co_zone_mgmt(blk->root->bs, op, 0, 100); + + return ret; +} + + void blk_drain(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); @@ -2634,3 +2653,4 @@ int blk_make_empty(BlockBackend *blk, Error **errp) return bdrv_make_empty(blk->root, errp); } + diff --git a/block/coroutines.h b/block/coroutines.h index 830ecaa733..e29222a68a 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -81,6 +81,10 @@ blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); int coroutine_fn blk_co_do_flush(BlockBackend *blk); +int coroutine_fn blk_co_zone_report(BlockBackend *blk); + +int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op); + /* * "I/O or GS" API functions. These functions can run without @@ -129,4 +133,6 @@ blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); int generated_co_wrapper blk_do_flush(BlockBackend *blk); + + #endif /* BLOCK_COROUTINES_H */ diff --git a/block/file-posix.c b/block/file-posix.c index 48cd096624..42646acc4e 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -73,6 +73,7 @@ #include <linux/hdreg.h> #include <linux/magic.h> #include <scsi/sg.h> +#include <linux/blkzoned.h> #ifdef __s390__ #include <asm/dasd.h> #endif @@ -178,6 +179,167 @@ typedef struct BDRVRawReopenState { bool check_cache_dropped; } BDRVRawReopenState; + +enum zone_type { + BLK_ZT_CONV = BLK_ZONE_TYPE_CONVENTIONAL, + BLK_ZT_SWR = BLK_ZONE_TYPE_SEQWRITE_REQ, + BLK_ZT_SWP = BLK_ZONE_TYPE_SEQWRITE_PREF, +}; + +enum zone_cond { + BLK_ZS_NOT_WP = BLK_ZONE_COND_NOT_WP, + BLK_ZS_EMPTY = BLK_ZONE_COND_EMPTY, + BLK_ZS_IOPEN = BLK_ZONE_COND_IMP_OPEN, + BLK_ZS_EOPEN = BLK_ZONE_COND_EXP_OPEN, + BLK_ZS_CLOSED = BLK_ZONE_COND_CLOSED, + BLK_ZS_RDONLY = BLK_ZONE_COND_READONLY, + BLK_ZS_FULL = BLK_ZONE_COND_FULL, + BLK_ZS_OFFLINE = BLK_ZONE_COND_OFFLINE, +}; + +/* + * Zone descriptor data structure. + * Provide information on a zone with all position and size values in bytes. + */ +struct BlockZoneDescriptor { + uint64_t start; + uint32_t length; + uint32_t cap; + uint64_t wp; + enum zone_type type; + enum zone_cond cond; +}; + +enum zone_model { + ZBD_HOST_MANAGED, + ZBD_HOST_AWARE, +}; + +/** + * Zone device information data structure. + * Provide information on a device. + */ +typedef struct zbd_dev { + enum zone_model model; + uint32_t block_size; + uint32_t write_granularity; + uint32_t nr_zones; + struct BlockZoneDescriptor *zones; /* array of zones */ + uint32_t max_nr_open_zones; /* maximum number of explicitly open zones */ + uint32_t max_nr_active_zones; +} zbd_dev; + +/** + * zone report - Get a zone block device's information in the + * form of an array of zone descriptors. + * + * @param bs: passing zone block device file descriptor + * @param zones: Space to hold zone information on reply + * @param offset: the location in the zone block device + * @param len: the length of reported zone information* + * @param partial: if partial has zero value, it is the number + * of zones that can be reported; else, set the nr_zones to the + * number of fully transferred zone descriptors in the data buffer. + * @return 0 on success, -1 on failure + */ +static int raw_co_zone_report(BlockDriverState *bs, + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, + int64_t offset, int64_t len, uint8_t partial) { + printf("%s\n", "start to report zone"); + BDRVRawState *s = bs->opaque; + + struct blk_zone_report *rep; + uint32_t rep_nr_zones = 0; + int ret = 0, i = 0, start_sector = 0; + + if (ioctl(s->fd, BLKGETNRZONES, &rep_nr_zones)) { + /* Done */ + error_report("number of zones not available"); + } + + fprintf(stdout, "Got %u zone descriptors\n", rep_nr_zones); + rep = malloc(sizeof(struct blk_zone_report) + + rep_nr_zones * sizeof(struct blk_zone)); + if (!rep) { + return -1; + } + + rep->nr_zones = rep_nr_zones; + rep->sector = start_sector; + + while (i < rep_nr_zones) { + ret = ioctl(s->fd, BLKREPORTZONE, rep); + if (ret != 0) { + error_report("can't report zone"); + } + + fprintf(stdout, "start: 0x%llx, len 0x%llx, cap 0x%llx, " + "wptr 0x%llx reset: %u non-seq:%u, zcond:%u, [type: %u]\n", + rep->zones[i].start, rep->zones[i].len, + rep->zones[i].capacity, rep->zones[i].wp, + rep->zones[i].reset, rep->zones[i].non_seq, + rep->zones[i].cond, rep->zones[i].type); + ++i; + } + + free(rep); + + return ret; +} + +/** + * zone management operations - Execute an operation on a zone + */ +static int raw_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, + int64_t offset, int64_t len) { + BDRVRawState *s = bs->opaque; + struct blk_zone_range range; + const char *ioctl_name; + uint64_t ioctl_op; + int ret; + + switch (op) { + case zone_open: + ioctl_name = "BLKOPENZONE"; + ioctl_op = BLKOPENZONE; + break; + case zone_close: + ioctl_name = "BLKCLOSEZONE"; + ioctl_op = BLKCLOSEZONE; + break; + case zone_finish: + ioctl_name = "BLKFINISHZONE"; + ioctl_op = BLKFINISHZONE; + break; + case zone_reset: + ioctl_name = "BLKRESETZONE"; + ioctl_op = BLKRESETZONE; + break; + default: + error_report("Invalid zone operation 0x%x", op); + errno = -EINVAL; + return -1; + } + + /* Execute the operation */ + range.sector = 0; + range.nr_sectors = 100; + ret = ioctl(s->fd, ioctl_op, &range); + if (ret != 0) { + if (errno == ENOTTY) { + error_report("ioctl %s is not supported", ioctl_name); + errno = ENOTSUP; + } else { + error_report("ioctl %s failed %d", + ioctl_name, errno); + } + return -1; + } + + return 0; +} + + static int fd_open(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; @@ -3324,6 +3486,9 @@ BlockDriver bdrv_file = { .bdrv_abort_perm_update = raw_abort_perm_update, .create_opts = &raw_create_opts, .mutable_opts = mutable_opts, + + .bdrv_co_zone_report = raw_co_zone_report, + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, }; /***********************************************/ @@ -3697,6 +3862,9 @@ static BlockDriver bdrv_host_device = { .bdrv_probe_blocksizes = hdev_probe_blocksizes, .bdrv_probe_geometry = hdev_probe_geometry, + .bdrv_co_zone_report = raw_co_zone_report, + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, + /* generic scsi device */ #ifdef __linux__ .bdrv_co_ioctl = hdev_co_ioctl, @@ -3809,6 +3977,9 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_io_unplug = raw_aio_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, + .bdrv_co_zone_report = raw_co_zone_report, + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .has_variable_length = true, @@ -3939,6 +4110,9 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_io_unplug = raw_aio_unplug, .bdrv_attach_aio_context = raw_aio_attach_aio_context, + .bdrv_co_zone_report = raw_co_zone_report, + .bdrv_co_zone_mgmt = raw_co_zone_mgmt, + .bdrv_co_truncate = raw_co_truncate, .bdrv_getlength = raw_getlength, .has_variable_length = true, diff --git a/block/io.c b/block/io.c index 789e6373d5..04f108c128 100644 --- a/block/io.c +++ b/block/io.c @@ -3258,6 +3258,32 @@ out: return co.ret; } +int bdrv_co_zone_report(BlockDriverState *bs, + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, + int64_t offset, int64_t len, uint8_t partial) +{ + int ret; + ret = bs->drv->bdrv_co_zone_report(bs, zones, nr_zones, + offset, len, partial); + if (!ret) { + return -ENOTSUP; + } + + return ret; +} + +int bdrv_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, + int64_t offset, int64_t len) +{ + int ret; + ret = bs->drv->bdrv_co_zone_mgmt(bs, op, offset, len); + if (!ret) { + return -ENOTSUP; + } + + return ret; +} + void *qemu_blockalign(BlockDriverState *bs, size_t size) { IO_CODE(); diff --git a/include/block/block-common.h b/include/block/block-common.h index fdb7306e78..62eeeba8da 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -48,6 +48,14 @@ typedef struct BlockDriver BlockDriver; typedef struct BdrvChild BdrvChild; typedef struct BdrvChildClass BdrvChildClass; +/* zone descriptor data structure */ +typedef struct BlockZoneDescriptor BlockZoneDescriptor; +enum zone_op { + zone_open, + zone_close, + zone_finish, + zone_reset, +}; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ diff --git a/include/block/block-io.h b/include/block/block-io.h index 62c84f0519..a915b554ad 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -80,6 +80,14 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); /* Ensure contents are flushed to disk. */ int coroutine_fn bdrv_co_flush(BlockDriverState *bs); +/* Report zone information of zone block device. */ +int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, + struct BlockZoneDescriptor *zones, + uint32_t *nr_zones, int64_t offset, + int64_t len, uint8_t partial); +int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, enum zone_op op, + int64_t offset, int64_t len); + int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int bdrv_block_status(BlockDriverState *bs, int64_t offset, @@ -290,6 +298,9 @@ bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int generated_co_wrapper bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); +int generated_co_wrapper blk_zone_report(BlockBackend *blk); +int generated_co_wrapper blk_zone_mgmt(BlockBackend *blk, enum zone_op op); + /** * bdrv_parent_drained_begin_single: * diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 8947abab76..520a733481 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -691,6 +691,12 @@ struct BlockDriver { QEMUIOVector *qiov, int64_t pos); + int coroutine_fn (*bdrv_co_zone_report)(BlockDriverState *bs, + struct BlockZoneDescriptor *zones, uint32_t *nr_zones, + int64_t offset, int64_t len, uint8_t partial); + int coroutine_fn (*bdrv_co_zone_mgmt)(BlockDriverState *bs, enum zone_op op, + int64_t offset, int64_t len); + /* removable device specific */ bool (*bdrv_is_inserted)(BlockDriverState *bs); void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 2f0d8ac25a..ca02b4047e 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -1706,6 +1706,67 @@ static const cmdinfo_t flush_cmd = { .oneline = "flush all in-core file state to disk", }; +static int zone_report_f(BlockBackend *blk, int argc, char **argv) +{ + return blk_zone_report(blk); +} + +static const cmdinfo_t zone_report_cmd = { + .name = "zone_report", + .altname = "f", + .cfunc = zone_report_f, + .oneline = "report zone information in zone block device", +}; + +static int zone_open_f(BlockBackend *blk, int argc, char **argv) +{ + return blk_zone_mgmt(blk, zone_open); +} + +static const cmdinfo_t zone_open_cmd = { + .name = "zone_open", + .altname = "f", + .cfunc = zone_open_f, + .oneline = "explicit open a range of zones in zone block device", +}; + +static int zone_close_f(BlockBackend *blk, int argc, char **argv) +{ + return blk_zone_mgmt(blk, zone_close); +} + +static const cmdinfo_t zone_close_cmd = { + .name = "zone_close", + .altname = "f", + .cfunc = zone_close_f, + .oneline = "close a range of zones in zone block device", +}; + +static int zone_finish_f(BlockBackend *blk, int argc, char **argv) +{ + return blk_zone_mgmt(blk, zone_finish); +} + +static const cmdinfo_t zone_finish_cmd = { + .name = "zone_finish", + .altname = "f", + .cfunc = zone_finish_f, + .oneline = "finish a range of zones in zone block device", +}; + +static int zone_reset_f(BlockBackend *blk, int argc, char **argv) +{ + return blk_zone_mgmt(blk, zone_reset); +} + +static const cmdinfo_t zone_reset_cmd = { + .name = "zone_reset", + .altname = "f", + .cfunc = zone_reset_f, + .oneline = "reset a zone write pointer in zone block device", +}; + + static int truncate_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t truncate_cmd = { .name = "truncate", @@ -2498,6 +2559,11 @@ static void __attribute((constructor)) init_qemuio_commands(void) qemuio_add_command(&aio_write_cmd); qemuio_add_command(&aio_flush_cmd); qemuio_add_command(&flush_cmd); + qemuio_add_command(&zone_report_cmd); + qemuio_add_command(&zone_open_cmd); + qemuio_add_command(&zone_close_cmd); + qemuio_add_command(&zone_finish_cmd); + qemuio_add_command(&zone_reset_cmd); qemuio_add_command(&truncate_cmd); qemuio_add_command(&length_cmd); qemuio_add_command(&info_cmd); diff --git a/tests/qemu-iotests/tests/zoned.sh b/tests/qemu-iotests/tests/zoned.sh new file mode 100644 index 0000000000..a1596e8340 --- /dev/null +++ b/tests/qemu-iotests/tests/zoned.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# +# Test zone management operations. +# + + + +seq=`basename $0` +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +# much of this could be generic for any format supporting compression. +_supported_fmt qcow qcow2 +_supported_proto file +_supported_os Linux + +TEST_OFFSETS="0 1000 4000" +TEST_LENS="1000" +TEST_OPS="zone_report zone_open zone_close zone_finish zone_reset zone_append" + + +echo "Testing a null_blk device" +echo + +for offset in $TEST_OFFSETS; do + echo "At offset $offset:" + io_test "write -b" $offset $CLUSTER_SIZE 8 + io_test "read -b" $offset $CLUSTER_SIZE 8 + _check_test_img +done + +# success, all done +echo "*** done" +rm -f $seq.full +status=0
By adding zone management operations in BlockDriver, storage controller emulation can use the new block layer APIs including zone_report, zone_reset, zone_open, zone_close, and zone_finish. Signed-off-by: Sam Li <faithilikerun@gmail.com> --- block/block-backend.c | 20 ++++ block/coroutines.h | 6 ++ block/file-posix.c | 174 ++++++++++++++++++++++++++++++ block/io.c | 26 +++++ include/block/block-common.h | 8 ++ include/block/block-io.h | 11 ++ include/block/block_int-common.h | 6 ++ qemu-io-cmds.c | 66 ++++++++++++ tests/qemu-iotests/tests/zoned.sh | 47 ++++++++ 9 files changed, 364 insertions(+) create mode 100644 tests/qemu-iotests/tests/zoned.sh