diff mbox

[2/5] coroutine: abort if we try to enter coroutine scheduled for another ctx

Message ID 5bec37564f83e39e216d2ebcf7a380b0c7704e0e.1511145863.git.jcody@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jeff Cody Nov. 20, 2017, 2:46 a.m. UTC
The previous patch fixed a race condition, in which there were
coroutines being executing doubly, or after coroutine deletion.

We can detect common scenarios when this happens, and print an error
and abort before we corrupt memory / data, or segfault.

This patch will abort if an attempt to enter a coroutine is made while
it is currently pending execution in a different AioContext.

We cannot rely on the existing co->caller check for recursive re-entry
to catch this, as the coroutine may run and exit with
COROUTINE_TERMINATE before the AioContext scheduled event happens.

(This is the scenario that was occuring and fixed in the previous
patch).

Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 include/qemu/coroutine_int.h | 3 +++
 util/async.c                 | 7 +++++++
 util/qemu-coroutine.c        | 9 +++++++++
 3 files changed, 19 insertions(+)

Comments

Stefan Hajnoczi Nov. 20, 2017, 11:28 a.m. UTC | #1
On Sun, Nov 19, 2017 at 09:46:43PM -0500, Jeff Cody wrote:
> diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
> index cb98892..931cdc9 100644
> --- a/include/qemu/coroutine_int.h
> +++ b/include/qemu/coroutine_int.h
> @@ -53,6 +53,9 @@ struct Coroutine {
>  
>      /* Only used when the coroutine has yielded.  */
>      AioContext *ctx;
> +
> +    int scheduled;

s/int/bool/

> diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
> index d6095c1..2edab63 100644
> --- a/util/qemu-coroutine.c
> +++ b/util/qemu-coroutine.c
> @@ -109,6 +109,15 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
>  
>      trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
>  
> +    /* if the Coroutine has already been scheduled, entering it again will
> +     * cause us to enter it twice, potentially even after the coroutine has
> +     * been deleted */
> +    if (co->scheduled == 1) {
> +        fprintf(stderr, "Cannot enter a co-routine that has already "
> +                        "been scheduled to run in a different AioContext\n");

This error message is too specific, the AioContext doesn't have to be
different from the current one:

block/blkdebug.c:        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());

If something calls qemu_aio_coroutine_enter() on the coroutine it might
be from the same AioContext - but still an error condition worth failing
loudly on.

I suggest simplifying the error message:

  fprintf(stderr, "Cannot enter a co-routine that has already "
                  "been scheduled\n");
Jeff Cody Nov. 20, 2017, 1:42 p.m. UTC | #2
On Mon, Nov 20, 2017 at 11:28:26AM +0000, Stefan Hajnoczi wrote:
> On Sun, Nov 19, 2017 at 09:46:43PM -0500, Jeff Cody wrote:
> > diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
> > index cb98892..931cdc9 100644
> > --- a/include/qemu/coroutine_int.h
> > +++ b/include/qemu/coroutine_int.h
> > @@ -53,6 +53,9 @@ struct Coroutine {
> >  
> >      /* Only used when the coroutine has yielded.  */
> >      AioContext *ctx;
> > +
> > +    int scheduled;
> 
> s/int/bool/
> 

OK.

> > diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
> > index d6095c1..2edab63 100644
> > --- a/util/qemu-coroutine.c
> > +++ b/util/qemu-coroutine.c
> > @@ -109,6 +109,15 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
> >  
> >      trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
> >  
> > +    /* if the Coroutine has already been scheduled, entering it again will
> > +     * cause us to enter it twice, potentially even after the coroutine has
> > +     * been deleted */
> > +    if (co->scheduled == 1) {
> > +        fprintf(stderr, "Cannot enter a co-routine that has already "
> > +                        "been scheduled to run in a different AioContext\n");
> 
> This error message is too specific, the AioContext doesn't have to be
> different from the current one:
> 
> block/blkdebug.c:        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
> 
> If something calls qemu_aio_coroutine_enter() on the coroutine it might
> be from the same AioContext - but still an error condition worth failing
> loudly on.
> 

Good point.


> I suggest simplifying the error message:
> 
>   fprintf(stderr, "Cannot enter a co-routine that has already "
>                   "been scheduled\n");

OK. I'll also change the wording here to what you have above, as well:

  void aio_co_schedule(AioContext *ctx, Coroutine *co)
  {
       trace_aio_co_schedule(ctx, co);
  +    if (co->scheduled == 1) {
  +        fprintf(stderr,
  +                "Cannot schedule a co-routine that is already scheduled\n");
  +        abort();
  +    }
  +    co->scheduled = 1;
       QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
                                 co, co_scheduled_next);


Jeff
diff mbox

Patch

diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
index cb98892..931cdc9 100644
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@@ -53,6 +53,9 @@  struct Coroutine {
 
     /* Only used when the coroutine has yielded.  */
     AioContext *ctx;
+
+    int scheduled;
+
     QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
     QSLIST_ENTRY(Coroutine) co_scheduled_next;
 };
diff --git a/util/async.c b/util/async.c
index 0e1bd87..d459684 100644
--- a/util/async.c
+++ b/util/async.c
@@ -388,6 +388,7 @@  static void co_schedule_bh_cb(void *opaque)
         QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
         trace_aio_co_schedule_bh_cb(ctx, co);
         aio_context_acquire(ctx);
+        co->scheduled = 0;
         qemu_coroutine_enter(co);
         aio_context_release(ctx);
     }
@@ -438,6 +439,12 @@  fail:
 void aio_co_schedule(AioContext *ctx, Coroutine *co)
 {
     trace_aio_co_schedule(ctx, co);
+    if (co->scheduled == 1) {
+        fprintf(stderr,
+                "Cannot schedule a co-routine that is already scheduled\n");
+        abort();
+    }
+    co->scheduled = 1;
     QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
                               co, co_scheduled_next);
     qemu_bh_schedule(ctx->co_schedule_bh);
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index d6095c1..2edab63 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -109,6 +109,15 @@  void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
 
     trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
 
+    /* if the Coroutine has already been scheduled, entering it again will
+     * cause us to enter it twice, potentially even after the coroutine has
+     * been deleted */
+    if (co->scheduled == 1) {
+        fprintf(stderr, "Cannot enter a co-routine that has already "
+                        "been scheduled to run in a different AioContext\n");
+        abort();
+    }
+
     if (co->caller) {
         fprintf(stderr, "Co-routine re-entered recursively\n");
         abort();