diff mbox series

[v8,13/20] jobs: group together API calls under the same job lock

Message ID 20220629141538.3400679-14-eesposit@redhat.com (mailing list archive)
State New, archived
Headers show
Series job: replace AioContext lock with job_mutex | expand

Commit Message

Emanuele Giuseppe Esposito June 29, 2022, 2:15 p.m. UTC
Now that the API offers also _locked() functions, take advantage
of it and give also the caller control to take the lock and call
_locked functions.

This makes sense especially when we have for loops, because it
makes no sense to have:

for(job = job_next(); ...)

where each job_next() takes the lock internally.
Instead we want

JOB_LOCK_GUARD();
for(job = job_next_locked(); ...)

Note: at this stage, job_{lock/unlock} and job lock guard macros
are *nop*.

Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
 block.c            | 20 +++++++++++-------
 blockdev.c         | 12 ++++++++---
 blockjob.c         | 52 +++++++++++++++++++++++++++++++---------------
 job-qmp.c          |  4 +++-
 job.c              | 13 +++++++-----
 monitor/qmp-cmds.c |  7 +++++--
 qemu-img.c         | 41 +++++++++++++++++++++---------------
 7 files changed, 97 insertions(+), 52 deletions(-)

Comments

Stefan Hajnoczi July 5, 2022, 8:14 a.m. UTC | #1
On Wed, Jun 29, 2022 at 10:15:31AM -0400, Emanuele Giuseppe Esposito wrote:
> diff --git a/blockdev.c b/blockdev.c
> index 71f793c4ab..5b79093155 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -150,12 +150,15 @@ void blockdev_mark_auto_del(BlockBackend *blk)
>          return;
>      }
>  
> -    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
> +    JOB_LOCK_GUARD();
> +
> +    for (job = block_job_next_locked(NULL); job;
> +         job = block_job_next_locked(job)) {
>          if (block_job_has_bdrv(job, blk_bs(blk))) {
>              AioContext *aio_context = job->job.aio_context;
>              aio_context_acquire(aio_context);

Is there a lock ordering rule for job_mutex and the AioContext lock? I
haven't audited the code, but there might be ABBA lock ordering issues.

> diff --git a/qemu-img.c b/qemu-img.c
> index 4cf4d2423d..289d88a156 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -912,25 +912,30 @@ static void run_block_job(BlockJob *job, Error **errp)
>      int ret = 0;
>  
>      aio_context_acquire(aio_context);
> -    job_ref(&job->job);
> -    do {
> -        float progress = 0.0f;
> -        aio_poll(aio_context, true);
> +    WITH_JOB_LOCK_GUARD() {

Here the lock order is the opposite of above.
Emanuele Giuseppe Esposito July 5, 2022, 8:17 a.m. UTC | #2
Am 05/07/2022 um 10:14 schrieb Stefan Hajnoczi:
> On Wed, Jun 29, 2022 at 10:15:31AM -0400, Emanuele Giuseppe Esposito wrote:
>> diff --git a/blockdev.c b/blockdev.c
>> index 71f793c4ab..5b79093155 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -150,12 +150,15 @@ void blockdev_mark_auto_del(BlockBackend *blk)
>>          return;
>>      }
>>  
>> -    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
>> +    JOB_LOCK_GUARD();
>> +
>> +    for (job = block_job_next_locked(NULL); job;
>> +         job = block_job_next_locked(job)) {
>>          if (block_job_has_bdrv(job, blk_bs(blk))) {
>>              AioContext *aio_context = job->job.aio_context;
>>              aio_context_acquire(aio_context);
> 
> Is there a lock ordering rule for job_mutex and the AioContext lock? I
> haven't audited the code, but there might be ABBA lock ordering issues.

Doesn't really matter here, as lock is nop. To be honest I forgot which
one should go first, probably job_lock because the aiocontext lock can
be taken and released in callbacks.

Should I resend with ordering fixed? Just to have a consistent logic

> 
>> diff --git a/qemu-img.c b/qemu-img.c
>> index 4cf4d2423d..289d88a156 100644
>> --- a/qemu-img.c
>> +++ b/qemu-img.c
>> @@ -912,25 +912,30 @@ static void run_block_job(BlockJob *job, Error **errp)
>>      int ret = 0;
>>  
>>      aio_context_acquire(aio_context);
>> -    job_ref(&job->job);
>> -    do {
>> -        float progress = 0.0f;
>> -        aio_poll(aio_context, true);
>> +    WITH_JOB_LOCK_GUARD() {
> 
> Here the lock order is the opposite of above.
>
Emanuele Giuseppe Esposito July 5, 2022, 1:01 p.m. UTC | #3
Am 05/07/2022 um 10:17 schrieb Emanuele Giuseppe Esposito:
> 
> 
> Am 05/07/2022 um 10:14 schrieb Stefan Hajnoczi:
>> On Wed, Jun 29, 2022 at 10:15:31AM -0400, Emanuele Giuseppe Esposito wrote:
>>> diff --git a/blockdev.c b/blockdev.c
>>> index 71f793c4ab..5b79093155 100644
>>> --- a/blockdev.c
>>> +++ b/blockdev.c
>>> @@ -150,12 +150,15 @@ void blockdev_mark_auto_del(BlockBackend *blk)
>>>          return;
>>>      }
>>>  
>>> -    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
>>> +    JOB_LOCK_GUARD();
>>> +
>>> +    for (job = block_job_next_locked(NULL); job;
>>> +         job = block_job_next_locked(job)) {
>>>          if (block_job_has_bdrv(job, blk_bs(blk))) {
>>>              AioContext *aio_context = job->job.aio_context;
>>>              aio_context_acquire(aio_context);
>>
>> Is there a lock ordering rule for job_mutex and the AioContext lock? I
>> haven't audited the code, but there might be ABBA lock ordering issues.
> 
> Doesn't really matter here, as lock is nop. To be honest I forgot which
> one should go first, probably job_lock because the aiocontext lock can
> be taken and released in callbacks.
> 
> Should I resend with ordering fixed? Just to have a consistent logic

Well actually how do I fix that? I would just add useless additional
changes into the diff, because for example in the case below I am not
even sure what exactly is the aiocontext protecting.

So I guess I'll leave as it is. I will just update the commit message to
make sure it is clear that the lock is nop and ordering is mixed.

Thank you,
Emanuele
> 
>>
>>> diff --git a/qemu-img.c b/qemu-img.c
>>> index 4cf4d2423d..289d88a156 100644
>>> --- a/qemu-img.c
>>> +++ b/qemu-img.c
>>> @@ -912,25 +912,30 @@ static void run_block_job(BlockJob *job, Error **errp)
>>>      int ret = 0;
>>>  
>>>      aio_context_acquire(aio_context);
>>> -    job_ref(&job->job);
>>> -    do {
>>> -        float progress = 0.0f;
>>> -        aio_poll(aio_context, true);
>>> +    WITH_JOB_LOCK_GUARD() {
>>
>> Here the lock order is the opposite of above.
>>
Vladimir Sementsov-Ogievskiy July 5, 2022, 1:22 p.m. UTC | #4
On 7/5/22 16:01, Emanuele Giuseppe Esposito wrote:
> 
> 
> Am 05/07/2022 um 10:17 schrieb Emanuele Giuseppe Esposito:
>>
>>
>> Am 05/07/2022 um 10:14 schrieb Stefan Hajnoczi:
>>> On Wed, Jun 29, 2022 at 10:15:31AM -0400, Emanuele Giuseppe Esposito wrote:
>>>> diff --git a/blockdev.c b/blockdev.c
>>>> index 71f793c4ab..5b79093155 100644
>>>> --- a/blockdev.c
>>>> +++ b/blockdev.c
>>>> @@ -150,12 +150,15 @@ void blockdev_mark_auto_del(BlockBackend *blk)
>>>>           return;
>>>>       }
>>>>   
>>>> -    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
>>>> +    JOB_LOCK_GUARD();
>>>> +
>>>> +    for (job = block_job_next_locked(NULL); job;
>>>> +         job = block_job_next_locked(job)) {
>>>>           if (block_job_has_bdrv(job, blk_bs(blk))) {
>>>>               AioContext *aio_context = job->job.aio_context;
>>>>               aio_context_acquire(aio_context);
>>>
>>> Is there a lock ordering rule for job_mutex and the AioContext lock? I
>>> haven't audited the code, but there might be ABBA lock ordering issues.
>>
>> Doesn't really matter here, as lock is nop. To be honest I forgot which
>> one should go first, probably job_lock because the aiocontext lock can
>> be taken and released in callbacks.
>>
>> Should I resend with ordering fixed? Just to have a consistent logic
> 
> Well actually how do I fix that? I would just add useless additional
> changes into the diff, because for example in the case below I am not
> even sure what exactly is the aiocontext protecting.
> 
> So I guess I'll leave as it is. I will just update the commit message to
> make sure it is clear that the lock is nop and ordering is mixed.
> 

Yes, I think it's OK.

As far as I understand, our final ordering rule is that job_mutex can be taken under aio context lock but not visa-versa.

Still, there some aio-context-lock critical sections that are inside job_mutex-lock critical section during the series, just because we don't know the way to avoid it except just merge almost the whole series into one patch. That's why job_mutex is a noop during the series and should become real mutex in the same time with removing these aio-context-lock critical section which breaks the ordering rule.
Vladimir Sementsov-Ogievskiy July 5, 2022, 2:55 p.m. UTC | #5
On 6/29/22 17:15, Emanuele Giuseppe Esposito wrote:
> --- a/job.c
> +++ b/job.c
> @@ -1045,11 +1045,14 @@ static void job_completed_txn_abort_locked(Job *job)
>   /* Called with job_mutex held, but releases it temporarily */
>   static int job_prepare_locked(Job *job)
>   {
> +    int ret;
> +
>       GLOBAL_STATE_CODE();
>       if (job->ret == 0 && job->driver->prepare) {
>           job_unlock();
> -        job->ret = job->driver->prepare(job);
> +        ret = job->driver->prepare(job);
>           job_lock();
> +        job->ret = ret;
>           job_update_rc_locked(job);
>       }
>       return job->ret;
> @@ -1235,10 +1238,10 @@ void job_cancel_locked(Job *job, bool force)
>            * job_cancel_async() ignores soft-cancel requests for jobs
>            * that are already done (i.e. deferred to the main loop).  We
>            * have to check again whether the job is really cancelled.
> -         * (job_cancel_requested() and job_is_cancelled() are equivalent
> -         * here, because job_cancel_async() will make soft-cancel
> -         * requests no-ops when deferred_to_main_loop is true.  We
> -         * choose to call job_is_cancelled() to show that we invoke
> +         * (job_cancel_requested_locked() and job_is_cancelled_locked()
> +         * are equivalent here, because job_cancel_async() will
> +         * make soft-cancel requests no-ops when deferred_to_main_loop is true.
> +         * We choose to call job_is_cancelled_locked() to show that we invoke
>            * job_completed_txn_abort() only for force-cancelled jobs.)
>            */
>           if (job_is_cancelled_locked(job)) {

that's definitely part of commit 05
Stefan Hajnoczi July 6, 2022, 10:13 a.m. UTC | #6
On Tue, Jul 05, 2022 at 04:22:41PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 7/5/22 16:01, Emanuele Giuseppe Esposito wrote:
> > 
> > 
> > Am 05/07/2022 um 10:17 schrieb Emanuele Giuseppe Esposito:
> > > 
> > > 
> > > Am 05/07/2022 um 10:14 schrieb Stefan Hajnoczi:
> > > > On Wed, Jun 29, 2022 at 10:15:31AM -0400, Emanuele Giuseppe Esposito wrote:
> > > > > diff --git a/blockdev.c b/blockdev.c
> > > > > index 71f793c4ab..5b79093155 100644
> > > > > --- a/blockdev.c
> > > > > +++ b/blockdev.c
> > > > > @@ -150,12 +150,15 @@ void blockdev_mark_auto_del(BlockBackend *blk)
> > > > >           return;
> > > > >       }
> > > > > -    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
> > > > > +    JOB_LOCK_GUARD();
> > > > > +
> > > > > +    for (job = block_job_next_locked(NULL); job;
> > > > > +         job = block_job_next_locked(job)) {
> > > > >           if (block_job_has_bdrv(job, blk_bs(blk))) {
> > > > >               AioContext *aio_context = job->job.aio_context;
> > > > >               aio_context_acquire(aio_context);
> > > > 
> > > > Is there a lock ordering rule for job_mutex and the AioContext lock? I
> > > > haven't audited the code, but there might be ABBA lock ordering issues.
> > > 
> > > Doesn't really matter here, as lock is nop. To be honest I forgot which
> > > one should go first, probably job_lock because the aiocontext lock can
> > > be taken and released in callbacks.
> > > 
> > > Should I resend with ordering fixed? Just to have a consistent logic
> > 
> > Well actually how do I fix that? I would just add useless additional
> > changes into the diff, because for example in the case below I am not
> > even sure what exactly is the aiocontext protecting.
> > 
> > So I guess I'll leave as it is. I will just update the commit message to
> > make sure it is clear that the lock is nop and ordering is mixed.
> > 
> 
> Yes, I think it's OK.
> 
> As far as I understand, our final ordering rule is that job_mutex can be taken under aio context lock but not visa-versa.

I'm also fine with resolving the ordering in a later patch.

Stefan
diff mbox series

Patch

diff --git a/block.c b/block.c
index 2c00dddd80..d0db104d71 100644
--- a/block.c
+++ b/block.c
@@ -4978,9 +4978,12 @@  static void bdrv_close(BlockDriverState *bs)
 
 void bdrv_close_all(void)
 {
-    assert(job_next(NULL) == NULL);
     GLOBAL_STATE_CODE();
 
+    WITH_JOB_LOCK_GUARD() {
+        assert(job_next_locked(NULL) == NULL);
+    }
+
     /* Drop references from requests still in flight, such as canceled block
      * jobs whose AIO context has not been polled yet */
     bdrv_drain_all();
@@ -6165,13 +6168,16 @@  XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
         }
     }
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        GSList *el;
+    WITH_JOB_LOCK_GUARD() {
+        for (job = block_job_next_locked(NULL); job;
+             job = block_job_next_locked(job)) {
+            GSList *el;
 
-        xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
-                           job->job.id);
-        for (el = job->nodes; el; el = el->next) {
-            xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
+            xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
+                                job->job.id);
+            for (el = job->nodes; el; el = el->next) {
+                xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
+            }
         }
     }
 
diff --git a/blockdev.c b/blockdev.c
index 71f793c4ab..5b79093155 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -150,12 +150,15 @@  void blockdev_mark_auto_del(BlockBackend *blk)
         return;
     }
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
+    JOB_LOCK_GUARD();
+
+    for (job = block_job_next_locked(NULL); job;
+         job = block_job_next_locked(job)) {
         if (block_job_has_bdrv(job, blk_bs(blk))) {
             AioContext *aio_context = job->job.aio_context;
             aio_context_acquire(aio_context);
 
-            job_cancel(&job->job, false);
+            job_cancel_locked(&job->job, false);
 
             aio_context_release(aio_context);
         }
@@ -3745,7 +3748,10 @@  BlockJobInfoList *qmp_query_block_jobs(Error **errp)
     BlockJobInfoList *head = NULL, **tail = &head;
     BlockJob *job;
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
+    JOB_LOCK_GUARD();
+
+    for (job = block_job_next_locked(NULL); job;
+         job = block_job_next_locked(job)) {
         BlockJobInfo *value;
         AioContext *aio_context;
 
diff --git a/blockjob.c b/blockjob.c
index 70952879d8..1075def475 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -99,7 +99,9 @@  static char *child_job_get_parent_desc(BdrvChild *c)
 static void child_job_drained_begin(BdrvChild *c)
 {
     BlockJob *job = c->opaque;
-    job_pause(&job->job);
+    WITH_JOB_LOCK_GUARD() {
+        job_pause_locked(&job->job);
+    }
 }
 
 static bool child_job_drained_poll(BdrvChild *c)
@@ -111,8 +113,10 @@  static bool child_job_drained_poll(BdrvChild *c)
     /* An inactive or completed job doesn't have any pending requests. Jobs
      * with !job->busy are either already paused or have a pause point after
      * being reentered, so no job driver code will run before they pause. */
-    if (!job->busy || job_is_completed(job)) {
-        return false;
+    WITH_JOB_LOCK_GUARD() {
+        if (!job->busy || job_is_completed_locked(job)) {
+            return false;
+        }
     }
 
     /* Otherwise, assume that it isn't fully stopped yet, but allow the job to
@@ -127,7 +131,9 @@  static bool child_job_drained_poll(BdrvChild *c)
 static void child_job_drained_end(BdrvChild *c, int *drained_end_counter)
 {
     BlockJob *job = c->opaque;
-    job_resume(&job->job);
+    WITH_JOB_LOCK_GUARD() {
+        job_resume_locked(&job->job);
+    }
 }
 
 static bool child_job_can_set_aio_ctx(BdrvChild *c, AioContext *ctx,
@@ -480,13 +486,15 @@  void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     job->ready_notifier.notify = block_job_event_ready_locked;
     job->idle_notifier.notify = block_job_on_idle_locked;
 
-    notifier_list_add(&job->job.on_finalize_cancelled,
-                      &job->finalize_cancelled_notifier);
-    notifier_list_add(&job->job.on_finalize_completed,
-                      &job->finalize_completed_notifier);
-    notifier_list_add(&job->job.on_pending, &job->pending_notifier);
-    notifier_list_add(&job->job.on_ready, &job->ready_notifier);
-    notifier_list_add(&job->job.on_idle, &job->idle_notifier);
+    WITH_JOB_LOCK_GUARD() {
+        notifier_list_add(&job->job.on_finalize_cancelled,
+                          &job->finalize_cancelled_notifier);
+        notifier_list_add(&job->job.on_finalize_completed,
+                          &job->finalize_completed_notifier);
+        notifier_list_add(&job->job.on_pending, &job->pending_notifier);
+        notifier_list_add(&job->job.on_ready, &job->ready_notifier);
+        notifier_list_add(&job->job.on_idle, &job->idle_notifier);
+    }
 
     error_setg(&job->blocker, "block device is in use by block job: %s",
                job_type_str(&job->job));
@@ -498,7 +506,10 @@  void *block_job_create(const char *job_id, const BlockJobDriver *driver,
 
     bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
-    if (!block_job_set_speed(job, speed, errp)) {
+    WITH_JOB_LOCK_GUARD() {
+        ret = block_job_set_speed_locked(job, speed, errp);
+    }
+    if (!ret) {
         goto fail;
     }
 
@@ -529,7 +540,9 @@  void block_job_user_resume(Job *job)
 {
     BlockJob *bjob = container_of(job, BlockJob, job);
     GLOBAL_STATE_CODE();
-    block_job_iostatus_reset(bjob);
+    WITH_JOB_LOCK_GUARD() {
+        block_job_iostatus_reset_locked(bjob);
+    }
 }
 
 BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
@@ -563,10 +576,15 @@  BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
                                         action);
     }
     if (action == BLOCK_ERROR_ACTION_STOP) {
-        if (!job->job.user_paused) {
-            job_pause(&job->job);
-            /* make the pause user visible, which will be resumed from QMP. */
-            job->job.user_paused = true;
+        WITH_JOB_LOCK_GUARD() {
+            if (!job->job.user_paused) {
+                job_pause_locked(&job->job);
+                /*
+                 * make the pause user visible, which will be
+                 * resumed from QMP.
+                 */
+                job->job.user_paused = true;
+            }
         }
         block_job_iostatus_set_err(job, error);
     }
diff --git a/job-qmp.c b/job-qmp.c
index 8ce3b7965e..6eff7016b2 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -192,7 +192,9 @@  JobInfoList *qmp_query_jobs(Error **errp)
     JobInfoList *head = NULL, **tail = &head;
     Job *job;
 
-    for (job = job_next(NULL); job; job = job_next(job)) {
+    JOB_LOCK_GUARD();
+
+    for (job = job_next_locked(NULL); job; job = job_next_locked(job)) {
         JobInfo *value;
         AioContext *aio_context;
 
diff --git a/job.c b/job.c
index 7a3cc93f66..19d711dc73 100644
--- a/job.c
+++ b/job.c
@@ -1045,11 +1045,14 @@  static void job_completed_txn_abort_locked(Job *job)
 /* Called with job_mutex held, but releases it temporarily */
 static int job_prepare_locked(Job *job)
 {
+    int ret;
+
     GLOBAL_STATE_CODE();
     if (job->ret == 0 && job->driver->prepare) {
         job_unlock();
-        job->ret = job->driver->prepare(job);
+        ret = job->driver->prepare(job);
         job_lock();
+        job->ret = ret;
         job_update_rc_locked(job);
     }
     return job->ret;
@@ -1235,10 +1238,10 @@  void job_cancel_locked(Job *job, bool force)
          * job_cancel_async() ignores soft-cancel requests for jobs
          * that are already done (i.e. deferred to the main loop).  We
          * have to check again whether the job is really cancelled.
-         * (job_cancel_requested() and job_is_cancelled() are equivalent
-         * here, because job_cancel_async() will make soft-cancel
-         * requests no-ops when deferred_to_main_loop is true.  We
-         * choose to call job_is_cancelled() to show that we invoke
+         * (job_cancel_requested_locked() and job_is_cancelled_locked()
+         * are equivalent here, because job_cancel_async() will
+         * make soft-cancel requests no-ops when deferred_to_main_loop is true.
+         * We choose to call job_is_cancelled_locked() to show that we invoke
          * job_completed_txn_abort() only for force-cancelled jobs.)
          */
         if (job_is_cancelled_locked(job)) {
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index 1ebb89f46c..1897ed7a13 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -133,8 +133,11 @@  void qmp_cont(Error **errp)
         blk_iostatus_reset(blk);
     }
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        block_job_iostatus_reset(job);
+    WITH_JOB_LOCK_GUARD() {
+        for (job = block_job_next_locked(NULL); job;
+             job = block_job_next_locked(job)) {
+            block_job_iostatus_reset_locked(job);
+        }
     }
 
     /* Continuing after completed migration. Images have been inactivated to
diff --git a/qemu-img.c b/qemu-img.c
index 4cf4d2423d..289d88a156 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -912,25 +912,30 @@  static void run_block_job(BlockJob *job, Error **errp)
     int ret = 0;
 
     aio_context_acquire(aio_context);
-    job_ref(&job->job);
-    do {
-        float progress = 0.0f;
-        aio_poll(aio_context, true);
+    WITH_JOB_LOCK_GUARD() {
+        job_ref_locked(&job->job);
+        do {
+            float progress = 0.0f;
+            job_unlock();
+            aio_poll(aio_context, true);
+
+            progress_get_snapshot(&job->job.progress, &progress_current,
+                                &progress_total);
+            if (progress_total) {
+                progress = (float)progress_current / progress_total * 100.f;
+            }
+            qemu_progress_print(progress, 0);
+            job_lock();
+        } while (!job_is_ready_locked(&job->job) &&
+                 !job_is_completed_locked(&job->job));
 
-        progress_get_snapshot(&job->job.progress, &progress_current,
-                              &progress_total);
-        if (progress_total) {
-            progress = (float)progress_current / progress_total * 100.f;
+        if (!job_is_completed_locked(&job->job)) {
+            ret = job_complete_sync_locked(&job->job, errp);
+        } else {
+            ret = job->job.ret;
         }
-        qemu_progress_print(progress, 0);
-    } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
-
-    if (!job_is_completed(&job->job)) {
-        ret = job_complete_sync(&job->job, errp);
-    } else {
-        ret = job->job.ret;
+        job_unref_locked(&job->job);
     }
-    job_unref(&job->job);
     aio_context_release(aio_context);
 
     /* publish completion progress only when success */
@@ -1083,7 +1088,9 @@  static int img_commit(int argc, char **argv)
         bdrv_ref(bs);
     }
 
-    job = block_job_get("commit");
+    WITH_JOB_LOCK_GUARD() {
+        job = block_job_get_locked("commit");
+    }
     assert(job);
     run_block_job(job, &local_err);
     if (local_err) {