diff mbox

[v3,for-2.11,2/4] coroutine: abort if we try to schedule or enter a pending coroutine

Message ID ccb6b6368f2ba5198f786559acc92c8432c7a936.1511278397.git.jcody@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jeff Cody Nov. 21, 2017, 3:38 p.m. UTC
The previous patch fixed a race condition, in which there were
coroutines being executing doubly, or after coroutine deletion.

We can detect common scenarios when this happens, and print an error
message and abort before we corrupt memory / data, or segfault.

This patch will abort if an attempt to enter a coroutine is made while
it is currently pending execution, either in a specific AioContext bh,
or pending execution via a timer.  It will also abort if a coroutine
is scheduled, before a prior scheduled run has occurred.

We cannot rely on the existing co->caller check for recursive re-entry
to catch this, as the coroutine may run and exit with
COROUTINE_TERMINATE before the scheduled coroutine executes.

(This is the scenario that was occurring and fixed in the previous
patch).

Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 include/qemu/coroutine_int.h |  6 ++++++
 util/async.c                 | 13 +++++++++++++
 util/qemu-coroutine-sleep.c  | 12 ++++++++++++
 util/qemu-coroutine.c        | 13 +++++++++++++
 4 files changed, 44 insertions(+)

Comments

Stefan Hajnoczi Nov. 21, 2017, 4:39 p.m. UTC | #1
On Tue, Nov 21, 2017 at 10:38:51AM -0500, Jeff Cody wrote:
> diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
> index d6095c1..9e4ed43 100644
> --- a/util/qemu-coroutine.c
> +++ b/util/qemu-coroutine.c
> @@ -107,8 +107,21 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
>      Coroutine *self = qemu_coroutine_self();
>      CoroutineAction ret;
>  
> +    /* Prior read of co protected by callers - e.g., aio_co_wake */
> +    const char *scheduled = atomic_read(&co->scheduled);

Consider: user code -> qemu_coroutine_enter() ->
qemu_aio_coroutine_enter().  Unlike the
aio_co_wake() code path, there is no read barrier here.

I think atomic_mb_read() is needed to guarantee the thread sees the
latest value.
diff mbox

Patch

diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
index cb98892..56e4c48 100644
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@@ -53,6 +53,12 @@  struct Coroutine {
 
     /* Only used when the coroutine has yielded.  */
     AioContext *ctx;
+
+    /* Used to catch and abort on illegal co-routine entry.
+     * Will contain the name of the function that had first
+     * scheduled the coroutine. */
+    const char *scheduled;
+
     QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
     QSLIST_ENTRY(Coroutine) co_scheduled_next;
 };
diff --git a/util/async.c b/util/async.c
index 0e1bd87..4dd9d95 100644
--- a/util/async.c
+++ b/util/async.c
@@ -388,6 +388,9 @@  static void co_schedule_bh_cb(void *opaque)
         QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
         trace_aio_co_schedule_bh_cb(ctx, co);
         aio_context_acquire(ctx);
+
+        /* Protected by write barrier in qemu_aio_coroutine_enter */
+        atomic_set(&co->scheduled, NULL);
         qemu_coroutine_enter(co);
         aio_context_release(ctx);
     }
@@ -438,6 +441,16 @@  fail:
 void aio_co_schedule(AioContext *ctx, Coroutine *co)
 {
     trace_aio_co_schedule(ctx, co);
+    const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL,
+                                           __func__);
+
+    if (scheduled) {
+        fprintf(stderr,
+                "%s: Co-routine was already scheduled in '%s'\n",
+                __func__, scheduled);
+        abort();
+    }
+
     QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
                               co, co_scheduled_next);
     qemu_bh_schedule(ctx->co_schedule_bh);
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
index 9c56550..254349c 100644
--- a/util/qemu-coroutine-sleep.c
+++ b/util/qemu-coroutine-sleep.c
@@ -13,6 +13,7 @@ 
 
 #include "qemu/osdep.h"
 #include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
 #include "qemu/timer.h"
 #include "block/aio.h"
 
@@ -25,6 +26,8 @@  static void co_sleep_cb(void *opaque)
 {
     CoSleepCB *sleep_cb = opaque;
 
+    /* Write of schedule protected by barrier write in aio_co_schedule */
+    atomic_set(&sleep_cb->co->scheduled, NULL);
     aio_co_wake(sleep_cb->co);
 }
 
@@ -34,6 +37,15 @@  void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
     CoSleepCB sleep_cb = {
         .co = qemu_coroutine_self(),
     };
+
+    const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL,
+                                           __func__);
+    if (scheduled) {
+        fprintf(stderr,
+                "%s: Co-routine was already scheduled in '%s'\n",
+                __func__, scheduled);
+        abort();
+    }
     sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
     timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
     qemu_coroutine_yield();
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index d6095c1..9e4ed43 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -107,8 +107,21 @@  void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
     Coroutine *self = qemu_coroutine_self();
     CoroutineAction ret;
 
+    /* Prior read of co protected by callers - e.g., aio_co_wake */
+    const char *scheduled = atomic_read(&co->scheduled);
+
     trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
 
+    /* if the Coroutine has already been scheduled, entering it again will
+     * cause us to enter it twice, potentially even after the coroutine has
+     * been deleted */
+    if (scheduled) {
+        fprintf(stderr,
+                "%s: Co-routine was already scheduled in '%s'\n",
+                __func__, scheduled);
+        abort();
+    }
+
     if (co->caller) {
         fprintf(stderr, "Co-routine re-entered recursively\n");
         abort();