diff mbox series

[v2,3/3] aio-posix: keep aio_notify_me disabled during polling

Message ID 20200805100051.361547-4-stefanha@redhat.com (mailing list archive)
State New, archived
Headers show
Series aio-posix: keep aio_notify_me disabled during polling | expand

Commit Message

Stefan Hajnoczi Aug. 5, 2020, 10 a.m. UTC
Polling only monitors the ctx->notified field and does not need the
ctx->notifier EventNotifier to be signalled. Keep ctx->aio_notify_me
disabled while polling to avoid unnecessary EventNotifier syscalls.

This optimization improves virtio-blk 4KB random read performance by
18%. The following results are with an IOThread and the null-co block
driver:

Test         IOPS   Error
Before  244518.62 ± 1.20%
After   290706.11 ± 0.44%

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 util/aio-posix.c | 59 +++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 28 deletions(-)

Comments

Paolo Bonzini Aug. 5, 2020, 4:37 p.m. UTC | #1
On 05/08/20 12:00, Stefan Hajnoczi wrote:
> +
> +        /*
> +         * aio_notify can avoid the expensive event_notifier_set if
> +         * everything (file descriptors, bottom halves, timers) will
> +         * be re-evaluated before the next blocking poll().  This is
> +         * already true when aio_poll is called with blocking == false;
> +         * if blocking == true, it is only true after poll() returns,
> +         * so disable the optimization now.
> +         */
> +        if (use_notify_me) {
> +            atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
> +            /*
> +             * Write ctx->notify_me before reading ctx->notified.  Pairs with
> +             * smp_mb in aio_notify().
> +             */
> +            smp_mb();
> +
> +            /* Don't block if aio_notify() was called */
> +            if (atomic_read(&ctx->notified)) {
> +                timeout = 0;
> +            }

Aha, this is the trick: "timeout = 0" also applies if a timer was moved 
early.  In this case you uselessly keep notify_me set for a bit, but 
it's okay. Nice!

The code can be simplified a bit more, since the use_notify_me variable 
is just "timeout":

    use_notify_me = (timeout != 0);
    if (use_notify_me) {
         /*
          * aio_notify can avoid the expensive event_notifier_set if
          * everything (file descriptors, bottom halves, timers) will
          * be re-evaluated before the next blocking poll().  This is
          * already true when aio_poll is called with blocking == false;
          * if blocking == true, it is only true after poll() returns,
          * so disable the optimization now.
          */
         atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
         /*
          * Write ctx->notify_me before reading ctx->notified.  Pairs with
          * smp_mb in aio_notify().
          */
         smp_mb();
 
         /* Don't block if aio_notify() was called */
         if (atomic_read(&ctx->notified)) {
             timeout = 0;
         }
     }
     if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
         ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
     }
     if (use_notify_me) {
         /* Finish the poll before clearing the flag.  */
         atomic_store_release(&ctx->notify_me,
                              atomic_read(&ctx->notify_me) - 2);
     }

Paolo
Stefan Hajnoczi Aug. 6, 2020, 10:52 a.m. UTC | #2
On Wed, Aug 05, 2020 at 06:37:45PM +0200, Paolo Bonzini wrote:
> On 05/08/20 12:00, Stefan Hajnoczi wrote:
> > +
> > +        /*
> > +         * aio_notify can avoid the expensive event_notifier_set if
> > +         * everything (file descriptors, bottom halves, timers) will
> > +         * be re-evaluated before the next blocking poll().  This is
> > +         * already true when aio_poll is called with blocking == false;
> > +         * if blocking == true, it is only true after poll() returns,
> > +         * so disable the optimization now.
> > +         */
> > +        if (use_notify_me) {
> > +            atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
> > +            /*
> > +             * Write ctx->notify_me before reading ctx->notified.  Pairs with
> > +             * smp_mb in aio_notify().
> > +             */
> > +            smp_mb();
> > +
> > +            /* Don't block if aio_notify() was called */
> > +            if (atomic_read(&ctx->notified)) {
> > +                timeout = 0;
> > +            }
> 
> Aha, this is the trick: "timeout = 0" also applies if a timer was moved 
> early.  In this case you uselessly keep notify_me set for a bit, but 
> it's okay. Nice!
> 
> The code can be simplified a bit more, since the use_notify_me variable 
> is just "timeout":

Good point. I'll send another revision.

Stefan
diff mbox series

Patch

diff --git a/util/aio-posix.c b/util/aio-posix.c
index 1b2a3af65b..8d10910bcf 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -464,9 +464,6 @@  static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
  *
  * Polls for a given time.
  *
- * Note that ctx->notify_me must be non-zero so this function can detect
- * aio_notify().
- *
  * Note that the caller must have incremented ctx->list_lock.
  *
  * Returns: true if progress was made, false otherwise
@@ -476,7 +473,6 @@  static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
     bool progress;
     int64_t start_time, elapsed_time;
 
-    assert(ctx->notify_me);
     assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
 
     trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
@@ -520,8 +516,6 @@  static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
  * @timeout: timeout for blocking wait, computed by the caller and updated if
  *    polling succeeds.
  *
- * ctx->notify_me must be non-zero so this function can detect aio_notify().
- *
  * Note that the caller must have incremented ctx->list_lock.
  *
  * Returns: true if progress was made, false otherwise
@@ -566,23 +560,6 @@  bool aio_poll(AioContext *ctx, bool blocking)
      */
     assert(in_aio_context_home_thread(ctx));
 
-    /* aio_notify can avoid the expensive event_notifier_set if
-     * everything (file descriptors, bottom halves, timers) will
-     * be re-evaluated before the next blocking poll().  This is
-     * already true when aio_poll is called with blocking == false;
-     * if blocking == true, it is only true after poll() returns,
-     * so disable the optimization now.
-     */
-    if (blocking) {
-        atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
-        /*
-         * Write ctx->notify_me before computing the timeout
-         * (reading bottom half flags, etc.).  Pairs with
-         * smp_mb in aio_notify().
-         */
-        smp_mb();
-    }
-
     qemu_lockcnt_inc(&ctx->list_lock);
 
     if (ctx->poll_max_ns) {
@@ -597,15 +574,41 @@  bool aio_poll(AioContext *ctx, bool blocking)
      * system call---a single round of run_poll_handlers_once suffices.
      */
     if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
+        bool use_notify_me = timeout != 0;
+
+        /*
+         * aio_notify can avoid the expensive event_notifier_set if
+         * everything (file descriptors, bottom halves, timers) will
+         * be re-evaluated before the next blocking poll().  This is
+         * already true when aio_poll is called with blocking == false;
+         * if blocking == true, it is only true after poll() returns,
+         * so disable the optimization now.
+         */
+        if (use_notify_me) {
+            atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
+            /*
+             * Write ctx->notify_me before reading ctx->notified.  Pairs with
+             * smp_mb in aio_notify().
+             */
+            smp_mb();
+
+            /* Don't block if aio_notify() was called */
+            if (atomic_read(&ctx->notified)) {
+                timeout = 0;
+            }
+        }
+
         ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
-    }
 
-    if (blocking) {
-        /* Finish the poll before clearing the flag.  */
-        atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
-        aio_notify_accept(ctx);
+        if (use_notify_me) {
+            /* Finish the poll before clearing the flag.  */
+            atomic_store_release(&ctx->notify_me,
+                                 atomic_read(&ctx->notify_me) - 2);
+        }
     }
 
+    aio_notify_accept(ctx);
+
     /* Adjust polling time */
     if (ctx->poll_max_ns) {
         int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;