diff mbox

[for,2.7,resend] linux-aio: share one LinuxAioState within an AioContext

Message ID 1467650000-51385-1-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini July 4, 2016, 4:33 p.m. UTC
This has better performance because it executes fewer system calls
and does not use a bottom half per disk.

Originally proposed by Ming Lei.

Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 async.c                            |  23 +++++++
 block/linux-aio.c                  |  10 ++--
 block/raw-posix.c                  | 119 +++++--------------------------------
 block/raw-win32.c                  |   2 +-
 include/block/aio.h                |  13 ++++
 {block => include/block}/raw-aio.h |   0
 6 files changed, 57 insertions(+), 110 deletions(-)
 rename {block => include/block}/raw-aio.h (100%)

Comments

Paolo Bonzini July 13, 2016, 1:25 p.m. UTC | #1
Ping.

On 04/07/2016 18:33, Paolo Bonzini wrote:
> This has better performance because it executes fewer system calls
> and does not use a bottom half per disk.
> 
> Originally proposed by Ming Lei.
> 
> Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  async.c                            |  23 +++++++
>  block/linux-aio.c                  |  10 ++--
>  block/raw-posix.c                  | 119 +++++--------------------------------
>  block/raw-win32.c                  |   2 +-
>  include/block/aio.h                |  13 ++++
>  {block => include/block}/raw-aio.h |   0
>  6 files changed, 57 insertions(+), 110 deletions(-)
>  rename {block => include/block}/raw-aio.h (100%)
> 
> diff --git a/async.c b/async.c
> index b4bf205..6caa98c 100644
> --- a/async.c
> +++ b/async.c
> @@ -29,6 +29,7 @@
>  #include "block/thread-pool.h"
>  #include "qemu/main-loop.h"
>  #include "qemu/atomic.h"
> +#include "block/raw-aio.h"
>  
>  /***********************************************************/
>  /* bottom halves (can be seen as timers which expire ASAP) */
> @@ -242,6 +243,14 @@ aio_ctx_finalize(GSource     *source)
>      qemu_bh_delete(ctx->notify_dummy_bh);
>      thread_pool_free(ctx->thread_pool);
>  
> +#ifdef CONFIG_LINUX_AIO
> +    if (ctx->linux_aio) {
> +        laio_detach_aio_context(ctx->linux_aio, ctx);
> +        laio_cleanup(ctx->linux_aio);
> +        ctx->linux_aio = NULL;
> +    }
> +#endif
> +
>      qemu_mutex_lock(&ctx->bh_lock);
>      while (ctx->first_bh) {
>          QEMUBH *next = ctx->first_bh->next;
> @@ -282,6 +291,17 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
>      return ctx->thread_pool;
>  }
>  
> +#ifdef CONFIG_LINUX_AIO
> +LinuxAioState *aio_get_linux_aio(AioContext *ctx)
> +{
> +    if (!ctx->linux_aio) {
> +        ctx->linux_aio = laio_init();
> +        laio_attach_aio_context(ctx->linux_aio, ctx);
> +    }
> +    return ctx->linux_aio;
> +}
> +#endif
> +
>  void aio_notify(AioContext *ctx)
>  {
>      /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
> @@ -345,6 +365,9 @@ AioContext *aio_context_new(Error **errp)
>                             false,
>                             (EventNotifierHandler *)
>                             event_notifier_dummy_cb);
> +#ifdef CONFIG_LINUX_AIO
> +    ctx->linux_aio = NULL;
> +#endif
>      ctx->thread_pool = NULL;
>      qemu_mutex_init(&ctx->bh_lock);
>      rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
> diff --git a/block/linux-aio.c b/block/linux-aio.c
> index e468960..3eb0a0e 100644
> --- a/block/linux-aio.c
> +++ b/block/linux-aio.c
> @@ -50,6 +50,8 @@ typedef struct {
>  } LaioQueue;
>  
>  struct LinuxAioState {
> +    AioContext *aio_context;
> +
>      io_context_t ctx;
>      EventNotifier e;
>  
> @@ -227,15 +229,14 @@ static void ioq_submit(LinuxAioState *s)
>  
>  void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
>  {
> -    assert(!s->io_q.plugged);
> -    s->io_q.plugged = 1;
> +    s->io_q.plugged++;
>  }
>  
>  void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
>  {
>      assert(s->io_q.plugged);
> -    s->io_q.plugged = 0;
> -    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
> +    if (--s->io_q.plugged == 0 &&
> +        !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
>          ioq_submit(s);
>      }
>  }
> @@ -325,6 +326,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
>  
>  void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
>  {
> +    s->aio_context = new_context;
>      s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
>      aio_set_event_notifier(new_context, &s->e, false,
>                             qemu_laio_completion_cb);
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index bef7a67..aedf575 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -32,7 +32,7 @@
>  #include "trace.h"
>  #include "block/thread-pool.h"
>  #include "qemu/iov.h"
> -#include "raw-aio.h"
> +#include "block/raw-aio.h"
>  #include "qapi/util.h"
>  #include "qapi/qmp/qstring.h"
>  
> @@ -137,10 +137,6 @@ typedef struct BDRVRawState {
>      int open_flags;
>      size_t buf_align;
>  
> -#ifdef CONFIG_LINUX_AIO
> -    int use_aio;
> -    LinuxAioState *aio_ctx;
> -#endif
>  #ifdef CONFIG_XFS
>      bool is_xfs:1;
>  #endif
> @@ -154,9 +150,6 @@ typedef struct BDRVRawState {
>  typedef struct BDRVRawReopenState {
>      int fd;
>      int open_flags;
> -#ifdef CONFIG_LINUX_AIO
> -    int use_aio;
> -#endif
>  } BDRVRawReopenState;
>  
>  static int fd_open(BlockDriverState *bs);
> @@ -374,58 +367,15 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
>      }
>  }
>  
> -static void raw_detach_aio_context(BlockDriverState *bs)
> -{
>  #ifdef CONFIG_LINUX_AIO
> -    BDRVRawState *s = bs->opaque;
> -
> -    if (s->use_aio) {
> -        laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
> -    }
> -#endif
> -}
> -
> -static void raw_attach_aio_context(BlockDriverState *bs,
> -                                   AioContext *new_context)
> +static bool raw_use_aio(int bdrv_flags)
>  {
> -#ifdef CONFIG_LINUX_AIO
> -    BDRVRawState *s = bs->opaque;
> -
> -    if (s->use_aio) {
> -        laio_attach_aio_context(s->aio_ctx, new_context);
> -    }
> -#endif
> -}
> -
> -#ifdef CONFIG_LINUX_AIO
> -static int raw_set_aio(LinuxAioState **aio_ctx, int *use_aio, int bdrv_flags)
> -{
> -    int ret = -1;
> -    assert(aio_ctx != NULL);
> -    assert(use_aio != NULL);
>      /*
>       * Currently Linux do AIO only for files opened with O_DIRECT
>       * specified so check NOCACHE flag too
>       */
> -    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
> -                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
> -
> -        /* if non-NULL, laio_init() has already been run */
> -        if (*aio_ctx == NULL) {
> -            *aio_ctx = laio_init();
> -            if (!*aio_ctx) {
> -                goto error;
> -            }
> -        }
> -        *use_aio = 1;
> -    } else {
> -        *use_aio = 0;
> -    }
> -
> -    ret = 0;
> -
> -error:
> -    return ret;
> +    return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
> +                         (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
>  }
>  #endif
>  
> @@ -494,13 +444,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
>      s->fd = fd;
>  
>  #ifdef CONFIG_LINUX_AIO
> -    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
> -        qemu_close(fd);
> -        ret = -errno;
> -        error_setg_errno(errp, -ret, "Could not set AIO state");
> -        goto fail;
> -    }
> -    if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
> +    if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
>          error_setg(errp, "aio=native was specified, but it requires "
>                           "cache.direct=on, which was not specified.");
>          ret = -EINVAL;
> @@ -567,8 +511,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
>      }
>  #endif
>  
> -    raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
> -
>      ret = 0;
>  fail:
>      if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
> @@ -603,18 +545,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
>      state->opaque = g_new0(BDRVRawReopenState, 1);
>      raw_s = state->opaque;
>  
> -#ifdef CONFIG_LINUX_AIO
> -    raw_s->use_aio = s->use_aio;
> -
> -    /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
> -     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
> -     * won't override aio_ctx if aio_ctx is non-NULL */
> -    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
> -        error_setg(errp, "Could not set AIO state");
> -        return -1;
> -    }
> -#endif
> -
>      if (s->type == FTYPE_CD) {
>          raw_s->open_flags |= O_NONBLOCK;
>      }
> @@ -697,9 +627,6 @@ static void raw_reopen_commit(BDRVReopenState *state)
>  
>      qemu_close(s->fd);
>      s->fd = raw_s->fd;
> -#ifdef CONFIG_LINUX_AIO
> -    s->use_aio = raw_s->use_aio;
> -#endif
>  
>      g_free(state->opaque);
>      state->opaque = NULL;
> @@ -1337,9 +1264,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
>          if (!bdrv_qiov_is_aligned(bs, qiov)) {
>              type |= QEMU_AIO_MISALIGNED;
>  #ifdef CONFIG_LINUX_AIO
> -        } else if (s->use_aio) {
> +        } else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
> +            LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
>              assert(qiov->size == bytes);
> -            return laio_co_submit(bs, s->aio_ctx, s->fd, offset, qiov, type);
> +            return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
>  #endif
>          }
>      }
> @@ -1365,9 +1293,9 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
>  static void raw_aio_plug(BlockDriverState *bs)
>  {
>  #ifdef CONFIG_LINUX_AIO
> -    BDRVRawState *s = bs->opaque;
> -    if (s->use_aio) {
> -        laio_io_plug(bs, s->aio_ctx);
> +    if (bs->open_flags & BDRV_O_NATIVE_AIO) {
> +        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
> +        laio_io_plug(bs, aio);
>      }
>  #endif
>  }
> @@ -1375,9 +1303,9 @@ static void raw_aio_plug(BlockDriverState *bs)
>  static void raw_aio_unplug(BlockDriverState *bs)
>  {
>  #ifdef CONFIG_LINUX_AIO
> -    BDRVRawState *s = bs->opaque;
> -    if (s->use_aio) {
> -        laio_io_unplug(bs, s->aio_ctx);
> +    if (bs->open_flags & BDRV_O_NATIVE_AIO) {
> +        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
> +        laio_io_unplug(bs, aio);
>      }
>  #endif
>  }
> @@ -1397,13 +1325,6 @@ static void raw_close(BlockDriverState *bs)
>  {
>      BDRVRawState *s = bs->opaque;
>  
> -    raw_detach_aio_context(bs);
> -
> -#ifdef CONFIG_LINUX_AIO
> -    if (s->use_aio) {
> -        laio_cleanup(s->aio_ctx);
> -    }
> -#endif
>      if (s->fd >= 0) {
>          qemu_close(s->fd);
>          s->fd = -1;
> @@ -1962,9 +1883,6 @@ BlockDriver bdrv_file = {
>      .bdrv_get_allocated_file_size
>                          = raw_get_allocated_file_size,
>  
> -    .bdrv_detach_aio_context = raw_detach_aio_context,
> -    .bdrv_attach_aio_context = raw_attach_aio_context,
> -
>      .create_opts = &raw_create_opts,
>  };
>  
> @@ -2410,9 +2328,6 @@ static BlockDriver bdrv_host_device = {
>      .bdrv_probe_blocksizes = hdev_probe_blocksizes,
>      .bdrv_probe_geometry = hdev_probe_geometry,
>  
> -    .bdrv_detach_aio_context = raw_detach_aio_context,
> -    .bdrv_attach_aio_context = raw_attach_aio_context,
> -
>      /* generic scsi device */
>  #ifdef __linux__
>      .bdrv_aio_ioctl     = hdev_aio_ioctl,
> @@ -2532,9 +2447,6 @@ static BlockDriver bdrv_host_cdrom = {
>      .bdrv_get_allocated_file_size
>                          = raw_get_allocated_file_size,
>  
> -    .bdrv_detach_aio_context = raw_detach_aio_context,
> -    .bdrv_attach_aio_context = raw_attach_aio_context,
> -
>      /* removable device support */
>      .bdrv_is_inserted   = cdrom_is_inserted,
>      .bdrv_eject         = cdrom_eject,
> @@ -2665,9 +2577,6 @@ static BlockDriver bdrv_host_cdrom = {
>      .bdrv_get_allocated_file_size
>                          = raw_get_allocated_file_size,
>  
> -    .bdrv_detach_aio_context = raw_detach_aio_context,
> -    .bdrv_attach_aio_context = raw_attach_aio_context,
> -
>      /* removable device support */
>      .bdrv_is_inserted   = cdrom_is_inserted,
>      .bdrv_eject         = cdrom_eject,
> diff --git a/block/raw-win32.c b/block/raw-win32.c
> index fd23891..ce77432 100644
> --- a/block/raw-win32.c
> +++ b/block/raw-win32.c
> @@ -27,7 +27,7 @@
>  #include "qemu/timer.h"
>  #include "block/block_int.h"
>  #include "qemu/module.h"
> -#include "raw-aio.h"
> +#include "block/raw-aio.h"
>  #include "trace.h"
>  #include "block/thread-pool.h"
>  #include "qemu/iov.h"
> diff --git a/include/block/aio.h b/include/block/aio.h
> index 88a64ee..afd72a7 100644
> --- a/include/block/aio.h
> +++ b/include/block/aio.h
> @@ -47,6 +47,9 @@ typedef struct AioHandler AioHandler;
>  typedef void QEMUBHFunc(void *opaque);
>  typedef void IOHandler(void *opaque);
>  
> +struct ThreadPool;
> +struct LinuxAioState;
> +
>  struct AioContext {
>      GSource source;
>  
> @@ -119,6 +122,13 @@ struct AioContext {
>      /* Thread pool for performing work and receiving completion callbacks */
>      struct ThreadPool *thread_pool;
>  
> +#ifdef CONFIG_LINUX_AIO
> +    /* State for native Linux AIO.  Uses aio_context_acquire/release for
> +     * locking.
> +     */
> +    struct LinuxAioState *linux_aio;
> +#endif
> +
>      /* TimerLists for calling timers - one per clock type */
>      QEMUTimerListGroup tlg;
>  
> @@ -335,6 +345,9 @@ GSource *aio_get_g_source(AioContext *ctx);
>  /* Return the ThreadPool bound to this AioContext */
>  struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
>  
> +/* Return the LinuxAioState bound to this AioContext */
> +struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
> +
>  /**
>   * aio_timer_new:
>   * @ctx: the aio context
> diff --git a/block/raw-aio.h b/include/block/raw-aio.h
> similarity index 100%
> rename from block/raw-aio.h
> rename to include/block/raw-aio.h
>
Stefan Hajnoczi July 15, 2016, 10:10 a.m. UTC | #2
On Mon, Jul 04, 2016 at 06:33:20PM +0200, Paolo Bonzini wrote:
> This has better performance because it executes fewer system calls
> and does not use a bottom half per disk.
> 
> Originally proposed by Ming Lei.
> 
> Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  async.c                            |  23 +++++++
>  block/linux-aio.c                  |  10 ++--
>  block/raw-posix.c                  | 119 +++++--------------------------------
>  block/raw-win32.c                  |   2 +-
>  include/block/aio.h                |  13 ++++
>  {block => include/block}/raw-aio.h |   0
>  6 files changed, 57 insertions(+), 110 deletions(-)
>  rename {block => include/block}/raw-aio.h (100%)

Thanks, applied to my block tree:
https://github.com/stefanha/qemu/commits/block

Stefan
diff mbox

Patch

diff --git a/async.c b/async.c
index b4bf205..6caa98c 100644
--- a/async.c
+++ b/async.c
@@ -29,6 +29,7 @@ 
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"
 #include "qemu/atomic.h"
+#include "block/raw-aio.h"
 
 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -242,6 +243,14 @@  aio_ctx_finalize(GSource     *source)
     qemu_bh_delete(ctx->notify_dummy_bh);
     thread_pool_free(ctx->thread_pool);
 
+#ifdef CONFIG_LINUX_AIO
+    if (ctx->linux_aio) {
+        laio_detach_aio_context(ctx->linux_aio, ctx);
+        laio_cleanup(ctx->linux_aio);
+        ctx->linux_aio = NULL;
+    }
+#endif
+
     qemu_mutex_lock(&ctx->bh_lock);
     while (ctx->first_bh) {
         QEMUBH *next = ctx->first_bh->next;
@@ -282,6 +291,17 @@  ThreadPool *aio_get_thread_pool(AioContext *ctx)
     return ctx->thread_pool;
 }
 
+#ifdef CONFIG_LINUX_AIO
+LinuxAioState *aio_get_linux_aio(AioContext *ctx)
+{
+    if (!ctx->linux_aio) {
+        ctx->linux_aio = laio_init();
+        laio_attach_aio_context(ctx->linux_aio, ctx);
+    }
+    return ctx->linux_aio;
+}
+#endif
+
 void aio_notify(AioContext *ctx)
 {
     /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
@@ -345,6 +365,9 @@  AioContext *aio_context_new(Error **errp)
                            false,
                            (EventNotifierHandler *)
                            event_notifier_dummy_cb);
+#ifdef CONFIG_LINUX_AIO
+    ctx->linux_aio = NULL;
+#endif
     ctx->thread_pool = NULL;
     qemu_mutex_init(&ctx->bh_lock);
     rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
diff --git a/block/linux-aio.c b/block/linux-aio.c
index e468960..3eb0a0e 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -50,6 +50,8 @@  typedef struct {
 } LaioQueue;
 
 struct LinuxAioState {
+    AioContext *aio_context;
+
     io_context_t ctx;
     EventNotifier e;
 
@@ -227,15 +229,14 @@  static void ioq_submit(LinuxAioState *s)
 
 void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
 {
-    assert(!s->io_q.plugged);
-    s->io_q.plugged = 1;
+    s->io_q.plugged++;
 }
 
 void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
 {
     assert(s->io_q.plugged);
-    s->io_q.plugged = 0;
-    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+    if (--s->io_q.plugged == 0 &&
+        !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ioq_submit(s);
     }
 }
@@ -325,6 +326,7 @@  void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 
 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
 {
+    s->aio_context = new_context;
     s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
     aio_set_event_notifier(new_context, &s->e, false,
                            qemu_laio_completion_cb);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index bef7a67..aedf575 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -32,7 +32,7 @@ 
 #include "trace.h"
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
 #include "qapi/util.h"
 #include "qapi/qmp/qstring.h"
 
@@ -137,10 +137,6 @@  typedef struct BDRVRawState {
     int open_flags;
     size_t buf_align;
 
-#ifdef CONFIG_LINUX_AIO
-    int use_aio;
-    LinuxAioState *aio_ctx;
-#endif
 #ifdef CONFIG_XFS
     bool is_xfs:1;
 #endif
@@ -154,9 +150,6 @@  typedef struct BDRVRawState {
 typedef struct BDRVRawReopenState {
     int fd;
     int open_flags;
-#ifdef CONFIG_LINUX_AIO
-    int use_aio;
-#endif
 } BDRVRawReopenState;
 
 static int fd_open(BlockDriverState *bs);
@@ -374,58 +367,15 @@  static void raw_parse_flags(int bdrv_flags, int *open_flags)
     }
 }
 
-static void raw_detach_aio_context(BlockDriverState *bs)
-{
 #ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-
-    if (s->use_aio) {
-        laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
-    }
-#endif
-}
-
-static void raw_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
+static bool raw_use_aio(int bdrv_flags)
 {
-#ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-
-    if (s->use_aio) {
-        laio_attach_aio_context(s->aio_ctx, new_context);
-    }
-#endif
-}
-
-#ifdef CONFIG_LINUX_AIO
-static int raw_set_aio(LinuxAioState **aio_ctx, int *use_aio, int bdrv_flags)
-{
-    int ret = -1;
-    assert(aio_ctx != NULL);
-    assert(use_aio != NULL);
     /*
      * Currently Linux do AIO only for files opened with O_DIRECT
      * specified so check NOCACHE flag too
      */
-    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
-                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
-
-        /* if non-NULL, laio_init() has already been run */
-        if (*aio_ctx == NULL) {
-            *aio_ctx = laio_init();
-            if (!*aio_ctx) {
-                goto error;
-            }
-        }
-        *use_aio = 1;
-    } else {
-        *use_aio = 0;
-    }
-
-    ret = 0;
-
-error:
-    return ret;
+    return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+                         (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
 }
 #endif
 
@@ -494,13 +444,7 @@  static int raw_open_common(BlockDriverState *bs, QDict *options,
     s->fd = fd;
 
 #ifdef CONFIG_LINUX_AIO
-    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
-        qemu_close(fd);
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not set AIO state");
-        goto fail;
-    }
-    if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
+    if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
         error_setg(errp, "aio=native was specified, but it requires "
                          "cache.direct=on, which was not specified.");
         ret = -EINVAL;
@@ -567,8 +511,6 @@  static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
 #endif
 
-    raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
     ret = 0;
 fail:
     if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -603,18 +545,6 @@  static int raw_reopen_prepare(BDRVReopenState *state,
     state->opaque = g_new0(BDRVRawReopenState, 1);
     raw_s = state->opaque;
 
-#ifdef CONFIG_LINUX_AIO
-    raw_s->use_aio = s->use_aio;
-
-    /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
-     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
-     * won't override aio_ctx if aio_ctx is non-NULL */
-    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
-        error_setg(errp, "Could not set AIO state");
-        return -1;
-    }
-#endif
-
     if (s->type == FTYPE_CD) {
         raw_s->open_flags |= O_NONBLOCK;
     }
@@ -697,9 +627,6 @@  static void raw_reopen_commit(BDRVReopenState *state)
 
     qemu_close(s->fd);
     s->fd = raw_s->fd;
-#ifdef CONFIG_LINUX_AIO
-    s->use_aio = raw_s->use_aio;
-#endif
 
     g_free(state->opaque);
     state->opaque = NULL;
@@ -1337,9 +1264,10 @@  static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
         if (!bdrv_qiov_is_aligned(bs, qiov)) {
             type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_AIO
-        } else if (s->use_aio) {
+        } else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+            LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
             assert(qiov->size == bytes);
-            return laio_co_submit(bs, s->aio_ctx, s->fd, offset, qiov, type);
+            return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
 #endif
         }
     }
@@ -1365,9 +1293,9 @@  static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
 static void raw_aio_plug(BlockDriverState *bs)
 {
 #ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-    if (s->use_aio) {
-        laio_io_plug(bs, s->aio_ctx);
+    if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+        laio_io_plug(bs, aio);
     }
 #endif
 }
@@ -1375,9 +1303,9 @@  static void raw_aio_plug(BlockDriverState *bs)
 static void raw_aio_unplug(BlockDriverState *bs)
 {
 #ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-    if (s->use_aio) {
-        laio_io_unplug(bs, s->aio_ctx);
+    if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+        laio_io_unplug(bs, aio);
     }
 #endif
 }
@@ -1397,13 +1325,6 @@  static void raw_close(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
 
-    raw_detach_aio_context(bs);
-
-#ifdef CONFIG_LINUX_AIO
-    if (s->use_aio) {
-        laio_cleanup(s->aio_ctx);
-    }
-#endif
     if (s->fd >= 0) {
         qemu_close(s->fd);
         s->fd = -1;
@@ -1962,9 +1883,6 @@  BlockDriver bdrv_file = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
     .create_opts = &raw_create_opts,
 };
 
@@ -2410,9 +2328,6 @@  static BlockDriver bdrv_host_device = {
     .bdrv_probe_blocksizes = hdev_probe_blocksizes,
     .bdrv_probe_geometry = hdev_probe_geometry,
 
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
     /* generic scsi device */
 #ifdef __linux__
     .bdrv_aio_ioctl     = hdev_aio_ioctl,
@@ -2532,9 +2447,6 @@  static BlockDriver bdrv_host_cdrom = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
     /* removable device support */
     .bdrv_is_inserted   = cdrom_is_inserted,
     .bdrv_eject         = cdrom_eject,
@@ -2665,9 +2577,6 @@  static BlockDriver bdrv_host_cdrom = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
     /* removable device support */
     .bdrv_is_inserted   = cdrom_is_inserted,
     .bdrv_eject         = cdrom_eject,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index fd23891..ce77432 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -27,7 +27,7 @@ 
 #include "qemu/timer.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
 #include "trace.h"
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
diff --git a/include/block/aio.h b/include/block/aio.h
index 88a64ee..afd72a7 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -47,6 +47,9 @@  typedef struct AioHandler AioHandler;
 typedef void QEMUBHFunc(void *opaque);
 typedef void IOHandler(void *opaque);
 
+struct ThreadPool;
+struct LinuxAioState;
+
 struct AioContext {
     GSource source;
 
@@ -119,6 +122,13 @@  struct AioContext {
     /* Thread pool for performing work and receiving completion callbacks */
     struct ThreadPool *thread_pool;
 
+#ifdef CONFIG_LINUX_AIO
+    /* State for native Linux AIO.  Uses aio_context_acquire/release for
+     * locking.
+     */
+    struct LinuxAioState *linux_aio;
+#endif
+
     /* TimerLists for calling timers - one per clock type */
     QEMUTimerListGroup tlg;
 
@@ -335,6 +345,9 @@  GSource *aio_get_g_source(AioContext *ctx);
 /* Return the ThreadPool bound to this AioContext */
 struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
 
+/* Return the LinuxAioState bound to this AioContext */
+struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
+
 /**
  * aio_timer_new:
  * @ctx: the aio context
diff --git a/block/raw-aio.h b/include/block/raw-aio.h
similarity index 100%
rename from block/raw-aio.h
rename to include/block/raw-aio.h