Message ID | 20191218163228.1613099-9-stefanha@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | io_uring: add Linux io_uring AIO engine | expand |
On Wed, Dec 18, 2019 at 04:32:21PM +0000, Stefan Hajnoczi wrote: > From: Aarushi Mehta <mehta.aaru20@gmail.com> > > Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> > Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > --- > block/file-posix.c | 95 ++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 75 insertions(+), 20 deletions(-) > > diff --git a/block/file-posix.c b/block/file-posix.c > index 1b805bd938..a42a90e59d 100644 > --- a/block/file-posix.c > +++ b/block/file-posix.c > @@ -156,6 +156,7 @@ typedef struct BDRVRawState { > bool has_write_zeroes:1; > bool discard_zeroes:1; > bool use_linux_aio:1; > + bool use_linux_io_uring:1; > bool page_cache_inconsistent:1; > bool has_fallocate; > bool needs_alignment; > @@ -444,7 +445,7 @@ static QemuOptsList raw_runtime_opts = { > { > .name = "aio", > .type = QEMU_OPT_STRING, > - .help = "host AIO implementation (threads, native)", > + .help = "host AIO implementation (threads, native, io_uring)", > }, > { > .name = "locking", > @@ -503,9 +504,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, > goto fail; > } > > - aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO) > - ? BLOCKDEV_AIO_OPTIONS_NATIVE > - : BLOCKDEV_AIO_OPTIONS_THREADS; > + if (bdrv_flags & BDRV_O_NATIVE_AIO) { > + aio_default = BLOCKDEV_AIO_OPTIONS_NATIVE; > + } else { > + aio_default = BLOCKDEV_AIO_OPTIONS_THREADS; > + } This is only a cosmetic change? > aio = qapi_enum_parse(&BlockdevAioOptions_lookup, > qemu_opt_get(opts, "aio"), > aio_default, &local_err); > @@ -514,7 +517,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, > ret = -EINVAL; > goto fail; > } > + > s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE); > +#ifdef CONFIG_LINUX_IO_URING > + s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING); > +#endif > > locking = qapi_enum_parse(&OnOffAuto_lookup, > qemu_opt_get(opts, "locking"), > @@ -578,7 +585,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, > s->shared_perm = BLK_PERM_ALL; > > #ifdef CONFIG_LINUX_AIO > - /* Currently Linux does AIO only for files opened with O_DIRECT */ > + /* Currently Linux does AIO only for files opened with O_DIRECT */ Also this is a not related fix, if you respin maybe we should split in a new patch or say something in the commit message. > if (s->use_linux_aio) { > if (!(s->open_flags & O_DIRECT)) { > error_setg(errp, "aio=native was specified, but it requires " > @@ -600,6 +607,22 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, > } > #endif /* !defined(CONFIG_LINUX_AIO) */ > > +#ifdef CONFIG_LINUX_IO_URING > + if (s->use_linux_io_uring) { > + if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) { > + error_prepend(errp, "Unable to use io_uring: "); > + goto fail; > + } > + } > +#else > + if (s->use_linux_io_uring) { > + error_setg(errp, "aio=io_uring was specified, but is not supported " > + "in this build."); > + ret = -EINVAL; > + goto fail; > + } > +#endif /* !defined(CONFIG_LINUX_IO_URING) */ > + > s->has_discard = true; > s->has_write_zeroes = true; > if ((bs->open_flags & BDRV_O_NOCACHE) != 0) { > @@ -1877,21 +1900,25 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, > return -EIO; > > /* > - * Check if the underlying device requires requests to be aligned, > - * and if the request we are trying to submit is aligned or not. > - * If this is the case tell the low-level driver that it needs > - * to copy the buffer. > + * When using O_DIRECT, the request must be aligned to be able to use > + * either libaio or io_uring interface. If not fail back to regular thread > + * pool read/write code which emulates this for us if we > + * set QEMU_AIO_MISALIGNED. > */ > - if (s->needs_alignment) { > - if (!bdrv_qiov_is_aligned(bs, qiov)) { > - type |= QEMU_AIO_MISALIGNED; > + if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) { > + type |= QEMU_AIO_MISALIGNED; > +#ifdef CONFIG_LINUX_IO_URING > + } else if (s->use_linux_io_uring) { > + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); > + assert(qiov->size == bytes); > + return luring_co_submit(bs, aio, s->fd, offset, qiov, type); > +#endif > #ifdef CONFIG_LINUX_AIO > - } else if (s->use_linux_aio) { > - LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); > - assert(qiov->size == bytes); > - return laio_co_submit(bs, aio, s->fd, offset, qiov, type); > + } else if (s->use_linux_aio) { This code block was executed if "s->needs_alignment" was true, now we don't check it. Could this be a problem? > + LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); > + assert(qiov->size == bytes); > + return laio_co_submit(bs, aio, s->fd, offset, qiov, type); > #endif > - } > } > > acb = (RawPosixAIOData) { > @@ -1927,24 +1954,36 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, > > static void raw_aio_plug(BlockDriverState *bs) > { > + BDRVRawState __attribute__((unused)) *s = bs->opaque; > #ifdef CONFIG_LINUX_AIO > - BDRVRawState *s = bs->opaque; > if (s->use_linux_aio) { > LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); > laio_io_plug(bs, aio); > } > #endif > +#ifdef CONFIG_LINUX_IO_URING > + if (s->use_linux_io_uring) { > + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); > + luring_io_plug(bs, aio); > + } > +#endif > } > > static void raw_aio_unplug(BlockDriverState *bs) > { > + BDRVRawState __attribute__((unused)) *s = bs->opaque; > #ifdef CONFIG_LINUX_AIO > - BDRVRawState *s = bs->opaque; > if (s->use_linux_aio) { > LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); > laio_io_unplug(bs, aio); > } > #endif > +#ifdef CONFIG_LINUX_IO_URING > + if (s->use_linux_io_uring) { > + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); > + luring_io_unplug(bs, aio); > + } > +#endif > } > > static int raw_co_flush_to_disk(BlockDriverState *bs) > @@ -1964,14 +2003,20 @@ static int raw_co_flush_to_disk(BlockDriverState *bs) > .aio_type = QEMU_AIO_FLUSH, > }; > > +#ifdef CONFIG_LINUX_IO_URING > + if (s->use_linux_io_uring) { > + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); > + return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH); > + } > +#endif > return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb); > } > > static void raw_aio_attach_aio_context(BlockDriverState *bs, > AioContext *new_context) > { > + BDRVRawState __attribute__((unused)) *s = bs->opaque; > #ifdef CONFIG_LINUX_AIO > - BDRVRawState *s = bs->opaque; > if (s->use_linux_aio) { > Error *local_err = NULL; > if (!aio_setup_linux_aio(new_context, &local_err)) { > @@ -1981,6 +2026,16 @@ static void raw_aio_attach_aio_context(BlockDriverState *bs, > } > } > #endif > +#ifdef CONFIG_LINUX_IO_URING > + if (s->use_linux_io_uring) { > + Error *local_err; > + if (!aio_setup_linux_io_uring(new_context, &local_err)) { > + error_reportf_err(local_err, "Unable to use linux io_uring, " > + "falling back to thread pool: "); > + s->use_linux_io_uring = false; > + } > + } > +#endif > } > > static void raw_close(BlockDriverState *bs) > -- > 2.23.0 > >
On Mon, Jan 13, 2020 at 12:49:27PM +0100, Stefano Garzarella wrote: > On Wed, Dec 18, 2019 at 04:32:21PM +0000, Stefan Hajnoczi wrote: > > @@ -503,9 +504,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, > > goto fail; > > } > > > > - aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO) > > - ? BLOCKDEV_AIO_OPTIONS_NATIVE > > - : BLOCKDEV_AIO_OPTIONS_THREADS; > > + if (bdrv_flags & BDRV_O_NATIVE_AIO) { > > + aio_default = BLOCKDEV_AIO_OPTIONS_NATIVE; > > + } else { > > + aio_default = BLOCKDEV_AIO_OPTIONS_THREADS; > > + } > > This is only a cosmetic change? ... > > @@ -578,7 +585,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, > > s->shared_perm = BLK_PERM_ALL; > > > > #ifdef CONFIG_LINUX_AIO > > - /* Currently Linux does AIO only for files opened with O_DIRECT */ > > + /* Currently Linux does AIO only for files opened with O_DIRECT */ > > Also this is a not related fix, if you respin maybe we should split in a > new patch or say something in the commit message. Thanks, I'll remove whitespace changes and unrelated reformatting from this patch. > > @@ -1877,21 +1900,25 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, > > return -EIO; > > > > /* > > - * Check if the underlying device requires requests to be aligned, > > - * and if the request we are trying to submit is aligned or not. > > - * If this is the case tell the low-level driver that it needs > > - * to copy the buffer. > > + * When using O_DIRECT, the request must be aligned to be able to use > > + * either libaio or io_uring interface. If not fail back to regular thread > > + * pool read/write code which emulates this for us if we > > + * set QEMU_AIO_MISALIGNED. > > */ > > - if (s->needs_alignment) { > > - if (!bdrv_qiov_is_aligned(bs, qiov)) { > > - type |= QEMU_AIO_MISALIGNED; > > + if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) { > > + type |= QEMU_AIO_MISALIGNED; > > +#ifdef CONFIG_LINUX_IO_URING > > + } else if (s->use_linux_io_uring) { > > + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); > > + assert(qiov->size == bytes); > > + return luring_co_submit(bs, aio, s->fd, offset, qiov, type); > > +#endif > > #ifdef CONFIG_LINUX_AIO > > - } else if (s->use_linux_aio) { > > - LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); > > - assert(qiov->size == bytes); > > - return laio_co_submit(bs, aio, s->fd, offset, qiov, type); > > + } else if (s->use_linux_aio) { > > This code block was executed if "s->needs_alignment" was true, now we don't > check it. Could this be a problem? From raw_open_common(): /* Currently Linux does AIO only for files opened with O_DIRECT */ if (s->use_linux_aio) { if (!(s->open_flags & O_DIRECT)) { error_setg(errp, "aio=native was specified, but it requires " "cache.direct=on, which was not specified."); ret = -EINVAL; goto fail; There is no change in behavior since use_linux_aio is only true when needs_alignment is set.
diff --git a/block/file-posix.c b/block/file-posix.c index 1b805bd938..a42a90e59d 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -156,6 +156,7 @@ typedef struct BDRVRawState { bool has_write_zeroes:1; bool discard_zeroes:1; bool use_linux_aio:1; + bool use_linux_io_uring:1; bool page_cache_inconsistent:1; bool has_fallocate; bool needs_alignment; @@ -444,7 +445,7 @@ static QemuOptsList raw_runtime_opts = { { .name = "aio", .type = QEMU_OPT_STRING, - .help = "host AIO implementation (threads, native)", + .help = "host AIO implementation (threads, native, io_uring)", }, { .name = "locking", @@ -503,9 +504,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, goto fail; } - aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO) - ? BLOCKDEV_AIO_OPTIONS_NATIVE - : BLOCKDEV_AIO_OPTIONS_THREADS; + if (bdrv_flags & BDRV_O_NATIVE_AIO) { + aio_default = BLOCKDEV_AIO_OPTIONS_NATIVE; + } else { + aio_default = BLOCKDEV_AIO_OPTIONS_THREADS; + } aio = qapi_enum_parse(&BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"), aio_default, &local_err); @@ -514,7 +517,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, ret = -EINVAL; goto fail; } + s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE); +#ifdef CONFIG_LINUX_IO_URING + s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING); +#endif locking = qapi_enum_parse(&OnOffAuto_lookup, qemu_opt_get(opts, "locking"), @@ -578,7 +585,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, s->shared_perm = BLK_PERM_ALL; #ifdef CONFIG_LINUX_AIO - /* Currently Linux does AIO only for files opened with O_DIRECT */ + /* Currently Linux does AIO only for files opened with O_DIRECT */ if (s->use_linux_aio) { if (!(s->open_flags & O_DIRECT)) { error_setg(errp, "aio=native was specified, but it requires " @@ -600,6 +607,22 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } #endif /* !defined(CONFIG_LINUX_AIO) */ +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) { + error_prepend(errp, "Unable to use io_uring: "); + goto fail; + } + } +#else + if (s->use_linux_io_uring) { + error_setg(errp, "aio=io_uring was specified, but is not supported " + "in this build."); + ret = -EINVAL; + goto fail; + } +#endif /* !defined(CONFIG_LINUX_IO_URING) */ + s->has_discard = true; s->has_write_zeroes = true; if ((bs->open_flags & BDRV_O_NOCACHE) != 0) { @@ -1877,21 +1900,25 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, return -EIO; /* - * Check if the underlying device requires requests to be aligned, - * and if the request we are trying to submit is aligned or not. - * If this is the case tell the low-level driver that it needs - * to copy the buffer. + * When using O_DIRECT, the request must be aligned to be able to use + * either libaio or io_uring interface. If not fail back to regular thread + * pool read/write code which emulates this for us if we + * set QEMU_AIO_MISALIGNED. */ - if (s->needs_alignment) { - if (!bdrv_qiov_is_aligned(bs, qiov)) { - type |= QEMU_AIO_MISALIGNED; + if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) { + type |= QEMU_AIO_MISALIGNED; +#ifdef CONFIG_LINUX_IO_URING + } else if (s->use_linux_io_uring) { + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); + assert(qiov->size == bytes); + return luring_co_submit(bs, aio, s->fd, offset, qiov, type); +#endif #ifdef CONFIG_LINUX_AIO - } else if (s->use_linux_aio) { - LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); - assert(qiov->size == bytes); - return laio_co_submit(bs, aio, s->fd, offset, qiov, type); + } else if (s->use_linux_aio) { + LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); + assert(qiov->size == bytes); + return laio_co_submit(bs, aio, s->fd, offset, qiov, type); #endif - } } acb = (RawPosixAIOData) { @@ -1927,24 +1954,36 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, static void raw_aio_plug(BlockDriverState *bs) { + BDRVRawState __attribute__((unused)) *s = bs->opaque; #ifdef CONFIG_LINUX_AIO - BDRVRawState *s = bs->opaque; if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); laio_io_plug(bs, aio); } #endif +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); + luring_io_plug(bs, aio); + } +#endif } static void raw_aio_unplug(BlockDriverState *bs) { + BDRVRawState __attribute__((unused)) *s = bs->opaque; #ifdef CONFIG_LINUX_AIO - BDRVRawState *s = bs->opaque; if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); laio_io_unplug(bs, aio); } #endif +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); + luring_io_unplug(bs, aio); + } +#endif } static int raw_co_flush_to_disk(BlockDriverState *bs) @@ -1964,14 +2003,20 @@ static int raw_co_flush_to_disk(BlockDriverState *bs) .aio_type = QEMU_AIO_FLUSH, }; +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); + return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH); + } +#endif return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb); } static void raw_aio_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { + BDRVRawState __attribute__((unused)) *s = bs->opaque; #ifdef CONFIG_LINUX_AIO - BDRVRawState *s = bs->opaque; if (s->use_linux_aio) { Error *local_err = NULL; if (!aio_setup_linux_aio(new_context, &local_err)) { @@ -1981,6 +2026,16 @@ static void raw_aio_attach_aio_context(BlockDriverState *bs, } } #endif +#ifdef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + Error *local_err; + if (!aio_setup_linux_io_uring(new_context, &local_err)) { + error_reportf_err(local_err, "Unable to use linux io_uring, " + "falling back to thread pool: "); + s->use_linux_io_uring = false; + } + } +#endif } static void raw_close(BlockDriverState *bs)