diff mbox series

[PULL,38/42] job: Avoid deadlocks in job_completed_txn_abort()

Message ID 20180925151541.18932-39-mreitz@redhat.com (mailing list archive)
State New, archived
Headers show
Series [PULL,01/42] block/commit: add block job creation flags | expand

Commit Message

Max Reitz Sept. 25, 2018, 3:15 p.m. UTC
From: Kevin Wolf <kwolf@redhat.com>

Amongst others, job_finalize_single() calls the .prepare/.commit/.abort
callbacks of the individual job driver. Recently, their use was adapted
for all block jobs so that they involve code calling AIO_WAIT_WHILE()
now. Such code must be called under the AioContext lock for the
respective job, but without holding any other AioContext lock.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 job.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/job.c b/job.c
index 518f603314..93aea79a7b 100644
--- a/job.c
+++ b/job.c
@@ -718,6 +718,7 @@  static void job_cancel_async(Job *job, bool force)
 
 static void job_completed_txn_abort(Job *job)
 {
+    AioContext *outer_ctx = job->aio_context;
     AioContext *ctx;
     JobTxn *txn = job->txn;
     Job *other_job;
@@ -731,23 +732,26 @@  static void job_completed_txn_abort(Job *job)
     txn->aborting = true;
     job_txn_ref(txn);
 
-    /* We are the first failed job. Cancel other jobs. */
-    QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
-        ctx = other_job->aio_context;
-        aio_context_acquire(ctx);
-    }
+    /* We can only hold the single job's AioContext lock while calling
+     * job_finalize_single() because the finalization callbacks can involve
+     * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */
+    aio_context_release(outer_ctx);
 
     /* Other jobs are effectively cancelled by us, set the status for
      * them; this job, however, may or may not be cancelled, depending
      * on the caller, so leave it. */
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
         if (other_job != job) {
+            ctx = other_job->aio_context;
+            aio_context_acquire(ctx);
             job_cancel_async(other_job, false);
+            aio_context_release(ctx);
         }
     }
     while (!QLIST_EMPTY(&txn->jobs)) {
         other_job = QLIST_FIRST(&txn->jobs);
         ctx = other_job->aio_context;
+        aio_context_acquire(ctx);
         if (!job_is_completed(other_job)) {
             assert(job_is_cancelled(other_job));
             job_finish_sync(other_job, NULL, NULL);
@@ -756,6 +760,8 @@  static void job_completed_txn_abort(Job *job)
         aio_context_release(ctx);
     }
 
+    aio_context_acquire(outer_ctx);
+
     job_txn_unref(txn);
 }