@@ -13,6 +13,7 @@
#include "qemu/queue.h"
#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
+#include "qemu/coroutine.h"
#include <libaio.h>
@@ -30,6 +31,7 @@
struct qemu_laiocb {
BlockAIOCB common;
+ Coroutine *co;
LinuxAioState *ctx;
struct iocb iocb;
ssize_t ret;
@@ -88,9 +90,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
}
}
}
- laiocb->common.cb(laiocb->common.opaque, ret);
- qemu_aio_unref(laiocb);
+ laiocb->ret = ret;
+ if (laiocb->co) {
+ qemu_coroutine_enter(laiocb->co, NULL);
+ } else {
+ laiocb->common.cb(laiocb->common.opaque, ret);
+ qemu_aio_unref(laiocb);
+ }
}
/* The completion BH fetches completed I/O requests and invokes their
@@ -232,22 +239,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
}
}
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
+ int type)
{
- struct qemu_laiocb *laiocb;
- struct iocb *iocbs;
- off_t offset = sector_num * 512;
-
- laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
- laiocb->nbytes = nb_sectors * 512;
- laiocb->ctx = s;
- laiocb->ret = -EINPROGRESS;
- laiocb->is_read = (type == QEMU_AIO_READ);
- laiocb->qiov = qiov;
-
- iocbs = &laiocb->iocb;
+ LinuxAioState *s = laiocb->ctx;
+ struct iocb *iocbs = &laiocb->iocb;
+ QEMUIOVector *qiov = laiocb->qiov;
switch (type) {
case QEMU_AIO_WRITE:
@@ -260,7 +257,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
default:
fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
__func__, type);
- goto out_free_aiocb;
+ return -EIO;
}
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
@@ -270,11 +267,55 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
(!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
ioq_submit(s);
}
- return &laiocb->common;
-out_free_aiocb:
- qemu_aio_unref(laiocb);
- return NULL;
+ return 0;
+}
+
+int laio_submit_co(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type)
+{
+ off_t offset = sector_num * 512;
+ int ret;
+
+ struct qemu_laiocb laiocb = {
+ .co = qemu_coroutine_self(),
+ .nbytes = nb_sectors * 512,
+ .ctx = s,
+ .is_read = (type == QEMU_AIO_READ),
+ .qiov = qiov,
+ };
+
+ ret = laio_do_submit(fd, &laiocb, offset, type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ qemu_coroutine_yield();
+ return laiocb.ret;
+}
+
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque, int type)
+{
+ struct qemu_laiocb *laiocb;
+ off_t offset = sector_num * 512;
+ int ret;
+
+ laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
+ laiocb->nbytes = nb_sectors * 512;
+ laiocb->ctx = s;
+ laiocb->ret = -EINPROGRESS;
+ laiocb->is_read = (type == QEMU_AIO_READ);
+ laiocb->qiov = qiov;
+
+ ret = laio_do_submit(fd, laiocb, offset, type);
+ if (ret < 0) {
+ qemu_aio_unref(laiocb);
+ return NULL;
+ }
+
+ return &laiocb->common;
}
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
@@ -38,6 +38,8 @@
typedef struct LinuxAioState LinuxAioState;
LinuxAioState *laio_init(void);
void laio_cleanup(LinuxAioState *s);
+int laio_submit_co(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type);
BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type);
@@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
if (fd_open(bs) < 0)
- return NULL;
+ return -EIO;
/*
* Check if the underlying device requires requests to be aligned,
@@ -1345,14 +1344,26 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {
- return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
- nb_sectors, cb, opaque, type);
+ return laio_submit_co(bs, s->aio_ctx, s->fd, sector_num, qiov,
+ nb_sectors, type);
#endif
}
}
- return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
- cb, opaque, type);
+ return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov,
+ nb_sectors * BDRV_SECTOR_SIZE, type);
+}
+
+static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ);
+}
+
+static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE);
}
static void raw_aio_plug(BlockDriverState *bs)
@@ -1375,22 +1386,6 @@ static void raw_aio_unplug(BlockDriverState *bs)
#endif
}
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_WRITE);
-}
-
static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque)
{
@@ -1957,8 +1952,8 @@ BlockDriver bdrv_file = {
.bdrv_co_get_block_status = raw_co_get_block_status,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2405,8 +2400,8 @@ static BlockDriver bdrv_host_device = {
.create_opts = &raw_create_opts,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
@@ -2535,8 +2530,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
@@ -2670,8 +2665,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_readv = raw_co_readv,
+ .bdrv_co_writev = raw_co_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
In order to use the modern byte-based .bdrv_co_preadv/pwritev() interface, this patch switches raw-posix to coroutine-based interfaces as a first step. In terms of semantics and performance, it doesn't make a difference with the existing code whether we go from a coroutine to a callback-based interface already in block/io.c or only in linux-aio.c As there have been concerns in the past that this change may be a step in the wrong direction with respect to a possible AIO fast path, the old callback-based interface for linux-aio is left around and can be reactivated when a fast path (e.g. directly from virtio-blk dataplane, bypassing the whole block layer) is implemented. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/linux-aio.c | 85 +++++++++++++++++++++++++++++++++++++++++-------------- block/raw-aio.h | 2 ++ block/raw-posix.c | 59 ++++++++++++++++++-------------------- 3 files changed, 92 insertions(+), 54 deletions(-)