@@ -28,6 +28,7 @@
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
#include "qemu/atomic.h"
+#include "block/raw-aio.h"
/***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */
@@ -241,6 +242,14 @@ aio_ctx_finalize(GSource *source)
qemu_bh_delete(ctx->notify_dummy_bh);
thread_pool_free(ctx->thread_pool);
+#ifdef CONFIG_LINUX_AIO
+ if (ctx->linux_aio) {
+ laio_detach_aio_context(ctx->linux_aio, ctx);
+ laio_cleanup(ctx->linux_aio);
+ ctx->linux_aio = NULL;
+ }
+#endif
+
qemu_mutex_lock(&ctx->bh_lock);
while (ctx->first_bh) {
QEMUBH *next = ctx->first_bh->next;
@@ -281,6 +290,17 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
return ctx->thread_pool;
}
+#ifdef CONFIG_LINUX_AIO
+LinuxAioState *aio_get_linux_aio(AioContext *ctx)
+{
+ if (!ctx->linux_aio) {
+ ctx->linux_aio = laio_init();
+ laio_attach_aio_context(ctx->linux_aio, ctx);
+ }
+ return ctx->linux_aio;
+}
+#endif
+
void aio_notify(AioContext *ctx)
{
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
@@ -344,6 +364,9 @@ AioContext *aio_context_new(Error **errp)
false,
(EventNotifierHandler *)
event_notifier_dummy_cb);
+#ifdef CONFIG_LINUX_AIO
+ ctx->linux_aio = NULL;
+#endif
ctx->thread_pool = NULL;
qemu_mutex_init(&ctx->bh_lock);
rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
@@ -47,6 +47,8 @@ typedef struct {
} LaioQueue;
struct LinuxAioState {
+ AioContext *aio_context;
+
io_context_t ctx;
EventNotifier e;
@@ -283,6 +285,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
{
+ s->aio_context = new_context;
s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
aio_set_event_notifier(new_context, &s->e, false,
qemu_laio_completion_cb);
@@ -31,7 +31,7 @@
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/iov.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
#include "qapi/util.h"
#include "qapi/qmp/qstring.h"
@@ -135,10 +135,6 @@ typedef struct BDRVRawState {
int open_flags;
size_t buf_align;
-#ifdef CONFIG_LINUX_AIO
- int use_aio;
- LinuxAioState *aio_ctx;
-#endif
#ifdef CONFIG_XFS
bool is_xfs:1;
#endif
@@ -152,9 +148,6 @@ typedef struct BDRVRawState {
typedef struct BDRVRawReopenState {
int fd;
int open_flags;
-#ifdef CONFIG_LINUX_AIO
- int use_aio;
-#endif
} BDRVRawReopenState;
static int fd_open(BlockDriverState *bs);
@@ -372,58 +365,15 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
}
}
-static void raw_detach_aio_context(BlockDriverState *bs)
-{
#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
-
- if (s->use_aio) {
- laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
- }
-#endif
-}
-
-static void raw_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
+static bool raw_use_aio(int bdrv_flags)
{
-#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
-
- if (s->use_aio) {
- laio_attach_aio_context(s->aio_ctx, new_context);
- }
-#endif
-}
-
-#ifdef CONFIG_LINUX_AIO
-static int raw_set_aio(LinuxAioState **aio_ctx, int *use_aio, int bdrv_flags)
-{
- int ret = -1;
- assert(aio_ctx != NULL);
- assert(use_aio != NULL);
/*
* Currently Linux do AIO only for files opened with O_DIRECT
* specified so check NOCACHE flag too
*/
- if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
- (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
-
- /* if non-NULL, laio_init() has already been run */
- if (*aio_ctx == NULL) {
- *aio_ctx = laio_init();
- if (!*aio_ctx) {
- goto error;
- }
- }
- *use_aio = 1;
- } else {
- *use_aio = 0;
- }
-
- ret = 0;
-
-error:
- return ret;
+ return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+ (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
}
#endif
@@ -492,13 +442,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->fd = fd;
#ifdef CONFIG_LINUX_AIO
- if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
- qemu_close(fd);
- ret = -errno;
- error_setg_errno(errp, -ret, "Could not set AIO state");
- goto fail;
- }
- if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
+ if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
error_setg(errp, "aio=native was specified, but it requires "
"cache.direct=on, which was not specified.");
ret = -EINVAL;
@@ -564,8 +508,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif
- raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
ret = 0;
fail:
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -606,18 +548,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
state->opaque = g_new0(BDRVRawReopenState, 1);
raw_s = state->opaque;
-#ifdef CONFIG_LINUX_AIO
- raw_s->use_aio = s->use_aio;
-
- /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
- * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
- * won't override aio_ctx if aio_ctx is non-NULL */
- if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
- error_setg(errp, "Could not set AIO state");
- return -1;
- }
-#endif
-
if (s->type == FTYPE_CD) {
raw_s->open_flags |= O_NONBLOCK;
}
@@ -700,9 +630,6 @@ static void raw_reopen_commit(BDRVReopenState *state)
qemu_close(s->fd);
s->fd = raw_s->fd;
-#ifdef CONFIG_LINUX_AIO
- s->use_aio = raw_s->use_aio;
-#endif
g_free(state->opaque);
state->opaque = NULL;
@@ -1317,8 +1244,9 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
- } else if (s->use_aio) {
- return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
+ } else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+ return laio_submit(bs, aio, s->fd, sector_num, qiov,
nb_sectors, cb, opaque, type);
#endif
}
@@ -1331,9 +1259,9 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
static void raw_aio_plug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
- if (s->use_aio) {
- laio_io_plug(bs, s->aio_ctx);
+ if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+ laio_io_plug(bs, aio);
}
#endif
}
@@ -1341,9 +1269,9 @@ static void raw_aio_plug(BlockDriverState *bs)
static void raw_aio_unplug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
- if (s->use_aio) {
- laio_io_unplug(bs, s->aio_ctx);
+ if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+ laio_io_unplug(bs, aio);
}
#endif
}
@@ -1379,13 +1307,6 @@ static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
- raw_detach_aio_context(bs);
-
-#ifdef CONFIG_LINUX_AIO
- if (s->use_aio) {
- laio_cleanup(s->aio_ctx);
- }
-#endif
if (s->fd >= 0) {
qemu_close(s->fd);
s->fd = -1;
@@ -1944,9 +1865,6 @@ BlockDriver bdrv_file = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
.create_opts = &raw_create_opts,
};
@@ -2308,9 +2226,6 @@ static BlockDriver bdrv_host_device = {
.bdrv_probe_blocksizes = hdev_probe_blocksizes,
.bdrv_probe_geometry = hdev_probe_geometry,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
/* generic scsi device */
#ifdef __linux__
.bdrv_aio_ioctl = hdev_aio_ioctl,
@@ -2435,9 +2350,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
@@ -2570,9 +2482,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
@@ -26,7 +26,7 @@
#include "qemu/timer.h"
#include "block/block_int.h"
#include "qemu/module.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/iov.h"
@@ -48,6 +48,9 @@ typedef struct AioHandler AioHandler;
typedef void QEMUBHFunc(void *opaque);
typedef void IOHandler(void *opaque);
+struct ThreadPool;
+struct LinuxAioState;
+
struct AioContext {
GSource source;
@@ -120,6 +123,13 @@ struct AioContext {
/* Thread pool for performing work and receiving completion callbacks */
struct ThreadPool *thread_pool;
+#ifdef CONFIG_LINUX_AIO
+ /* State for native Linux AIO. Uses aio_context_acquire/release for
+ * locking.
+ */
+ struct LinuxAioState *linux_aio;
+#endif
+
/* TimerLists for calling timers - one per clock type */
QEMUTimerListGroup tlg;
@@ -336,6 +346,9 @@ GSource *aio_get_g_source(AioContext *ctx);
/* Return the ThreadPool bound to this AioContext */
struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
+/* Return the LinuxAioState bound to this AioContext */
+struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
+
/**
* aio_timer_new:
* @ctx: the aio context
This has better performance because it executes fewer system calls and does not use a bottom half per disk. Originally proposed by Ming Lei. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- async.c | 23 +++++++ block/linux-aio.c | 3 + block/raw-posix.c | 119 +++++-------------------------------- block/raw-win32.c | 2 +- include/block/aio.h | 13 ++++ {block => include/block}/raw-aio.h | 0 6 files changed, 54 insertions(+), 106 deletions(-) rename {block => include/block}/raw-aio.h (100%) diff --git a/block/raw-aio.h b/include/block/raw-aio.h similarity index 100% rename from block/raw-aio.h rename to include/block/raw-aio.h