diff mbox series

[v5,18/20] jobs: protect job.aio_context with BQL and job_mutex

Message ID 20220208143513.1077229-19-eesposit@redhat.com (mailing list archive)
State New, archived
Headers show
Series job: replace AioContext lock with job_mutex | expand

Commit Message

Emanuele Giuseppe Esposito Feb. 8, 2022, 2:35 p.m. UTC
In order to make it thread safe, implement a "fake rwlock",
where we allow reads under BQL *or* job_mutex held, but
writes only under BQL *and* job_mutex.

The only write we have is in child_job_set_aio_ctx, which always
happens under drain (so the job is paused).
For this reason, introduce job_set_aio_context and make sure that
the context is set under BQL, job_mutex and drain.
Also make sure all other places where the aiocontext is read
are protected.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
 block/replication.c |  2 +-
 blockjob.c          |  3 ++-
 include/qemu/job.h  | 19 ++++++++++++++++++-
 job.c               | 12 ++++++++++++
 4 files changed, 33 insertions(+), 3 deletions(-)

Comments

Stefan Hajnoczi March 8, 2022, 1:41 p.m. UTC | #1
On Tue, Feb 08, 2022 at 09:35:11AM -0500, Emanuele Giuseppe Esposito wrote:
>  static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
>  {
>      BlockJob *job = c->opaque;
> +    assert(qemu_in_main_thread());
>  
>      return job->job.aio_context;
>  }

It's not clear to me that .get_parent_aio_context() should only be
called from the main thread. The API is read-only so someone might try
to call from I/O code in the future expecting it to work like other
read-only graph APIs that are available from I/O code.

Currently the assertion is true because the only user is
bdrv_attach_child_*() but please document this invariant for
bdrv_child_get_parent_aio_context() and the callback. Maybe move the
assertion into a higher-level function like
bdrv_child_get_parent_aio_context() (if that still covers all cases).

> diff --git a/include/qemu/job.h b/include/qemu/job.h
> index dfbf2ea501..ca46e46f5b 100644
> --- a/include/qemu/job.h
> +++ b/include/qemu/job.h
> @@ -75,7 +75,12 @@ typedef struct Job {
>      ProgressMeter progress;
>  
>  
> -    /** AioContext to run the job coroutine in */
> +    /**
> +     * AioContext to run the job coroutine in.
> +     * This field can be read when holding either the BQL (so we are in
> +     * the main loop) or the job_mutex.
> +     * Instead, it can be only written when we hold *both* BQL and job_mutex.

s/Instead,//

(It sounds weird because "instead" means "replacement" or "substitution"
We're comparing "read" and "write" here, not substituting them.
Something like "on the other hand" or "conversely" works.)

> +     */
>      AioContext *aio_context;
>  
>      /** Reference count of the block job */
> @@ -706,4 +711,16 @@ void job_dismiss_locked(Job **job, Error **errp);
>  int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
>                             Error **errp);
>  
> +/**
> + * Sets the @job->aio_context.
> + * Called with job_mutex *not* held.
> + *
> + * This function must run in the main thread to protect against
> + * concurrent read in job_finish_sync_locked(),
> + * takes the job_mutex lock to protect against the read in
> + * job_do_yield_locked(), and must be called when the coroutine
> + * is quiescent.
> + */
> +void job_set_aio_context(Job *job, AioContext *ctx);
> +
>  #endif
> diff --git a/job.c b/job.c
> index f05850a337..7a07d25ec3 100644
> --- a/job.c
> +++ b/job.c
> @@ -354,6 +354,17 @@ Job *job_get_locked(const char *id)
>      return NULL;
>  }
>  
> +void job_set_aio_context(Job *job, AioContext *ctx)
> +{
> +    /* protect against read in job_finish_sync_locked and job_start */
> +    assert(qemu_in_main_thread());
> +    /* protect against read in job_do_yield_locked */
> +    JOB_LOCK_GUARD();
> +    /* ensure the coroutine is quiescent while the AioContext is changed */
> +    assert(job->pause_count > 0);
> +    job->aio_context = ctx;
> +}
> +
>  /* Called with job_mutex *not* held. */
>  static void job_sleep_timer_cb(void *opaque)
>  {
> @@ -1256,6 +1267,7 @@ int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
>  {
>      Error *local_err = NULL;
>      int ret;
> +    assert(qemu_in_main_thread());
>  
>      job_ref_locked(job);
>  
> -- 
> 2.31.1
>
Emanuele Giuseppe Esposito March 10, 2022, 10:09 a.m. UTC | #2
Am 08/03/2022 um 14:41 schrieb Stefan Hajnoczi:
> It's not clear to me that .get_parent_aio_context() should only be
> called from the main thread. The API is read-only so someone might try
> to call from I/O code in the future expecting it to work like other
> read-only graph APIs that are available from I/O code.
> 
> Currently the assertion is true because the only user is
> bdrv_attach_child_*() but please document this invariant for
> bdrv_child_get_parent_aio_context() and the callback. Maybe move the
> assertion into a higher-level function like
> bdrv_child_get_parent_aio_context() (if that still covers all cases).

We classified .get_parent_aio_context as GS callback, and indeed
bdrv_child_get_parent_aio_context has the macro GLOBAL_STATE_CODE();
So we should be fine.

Emanuele
diff mbox series

Patch

diff --git a/block/replication.c b/block/replication.c
index 50ea778937..68018948b9 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -148,8 +148,8 @@  static void replication_close(BlockDriverState *bs)
     }
     if (s->stage == BLOCK_REPLICATION_FAILOVER) {
         commit_job = &s->commit_job->job;
-        assert(commit_job->aio_context == qemu_get_current_aio_context());
         WITH_JOB_LOCK_GUARD() {
+            assert(commit_job->aio_context == qemu_get_current_aio_context());
             job_cancel_sync_locked(commit_job, false);
         }
     }
diff --git a/blockjob.c b/blockjob.c
index 04d868f020..afb0e9ad5e 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -154,12 +154,13 @@  static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx,
         bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore);
     }
 
-    job->job.aio_context = ctx;
+    job_set_aio_context(&job->job, ctx);
 }
 
 static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
 {
     BlockJob *job = c->opaque;
+    assert(qemu_in_main_thread());
 
     return job->job.aio_context;
 }
diff --git a/include/qemu/job.h b/include/qemu/job.h
index dfbf2ea501..ca46e46f5b 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -75,7 +75,12 @@  typedef struct Job {
     ProgressMeter progress;
 
 
-    /** AioContext to run the job coroutine in */
+    /**
+     * AioContext to run the job coroutine in.
+     * This field can be read when holding either the BQL (so we are in
+     * the main loop) or the job_mutex.
+     * Instead, it can be only written when we hold *both* BQL and job_mutex.
+     */
     AioContext *aio_context;
 
     /** Reference count of the block job */
@@ -706,4 +711,16 @@  void job_dismiss_locked(Job **job, Error **errp);
 int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
                            Error **errp);
 
+/**
+ * Sets the @job->aio_context.
+ * Called with job_mutex *not* held.
+ *
+ * This function must run in the main thread to protect against
+ * concurrent read in job_finish_sync_locked(),
+ * takes the job_mutex lock to protect against the read in
+ * job_do_yield_locked(), and must be called when the coroutine
+ * is quiescent.
+ */
+void job_set_aio_context(Job *job, AioContext *ctx);
+
 #endif
diff --git a/job.c b/job.c
index f05850a337..7a07d25ec3 100644
--- a/job.c
+++ b/job.c
@@ -354,6 +354,17 @@  Job *job_get_locked(const char *id)
     return NULL;
 }
 
+void job_set_aio_context(Job *job, AioContext *ctx)
+{
+    /* protect against read in job_finish_sync_locked and job_start */
+    assert(qemu_in_main_thread());
+    /* protect against read in job_do_yield_locked */
+    JOB_LOCK_GUARD();
+    /* ensure the coroutine is quiescent while the AioContext is changed */
+    assert(job->pause_count > 0);
+    job->aio_context = ctx;
+}
+
 /* Called with job_mutex *not* held. */
 static void job_sleep_timer_cb(void *opaque)
 {
@@ -1256,6 +1267,7 @@  int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
 {
     Error *local_err = NULL;
     int ret;
+    assert(qemu_in_main_thread());
 
     job_ref_locked(job);