diff mbox series

[for-5.0,v2,3/3] block: Fix blk->in_flight during blk_wait_while_drained()

Message ID 20200406171403.6761-4-kwolf@redhat.com (mailing list archive)
State New, archived
Headers show
Series block: Fix blk->in_flight during blk_wait_while_drained() | expand

Commit Message

Kevin Wolf April 6, 2020, 5:14 p.m. UTC
Waiting in blk_wait_while_drained() while blk->in_flight is increased
for the current request is wrong because it will cause the drain
operation to deadlock.

This patch makes sure that blk_wait_while_drained() is called with
blk->in_flight increased exactly once for the current request, and that
it temporarily decreases the counter while it waits.

Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/block-backend.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy April 7, 2020, 6:52 a.m. UTC | #1
06.04.2020 20:14, Kevin Wolf wrote:
> Waiting in blk_wait_while_drained() while blk->in_flight is increased
> for the current request is wrong because it will cause the drain
> operation to deadlock.
> 
> This patch makes sure that blk_wait_while_drained() is called with
> blk->in_flight increased exactly once for the current request, and that
> it temporarily decreases the counter while it waits.
> 
> Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>   block/block-backend.c | 17 +++++------------
>   1 file changed, 5 insertions(+), 12 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index d330e08b05..f621435f0b 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
>       return 0;
>   }
>   
> +/* To be called between exactly one pair of blk_inc/dec_in_flight() */
>   static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
>   {
> +    assert(blk->in_flight > 0);

Hmm. You promise to make sure that in_flight increased exactly once. Shouldn't it be assert(blk->in_flight == 1) ?

> +
>       if (blk->quiesce_counter && !blk->disable_request_queuing) {
> +        blk_dec_in_flight(blk);
>           qemu_co_queue_wait(&blk->queued_requests, NULL);
> +        blk_inc_in_flight(blk);
>       }
>   }
>   
> @@ -1416,12 +1421,6 @@ static void blk_aio_read_entry(void *opaque)
>       BlkRwCo *rwco = &acb->rwco;
>       QEMUIOVector *qiov = rwco->iobuf;
>   
> -    if (rwco->blk->quiesce_counter) {
> -        blk_dec_in_flight(rwco->blk);
> -        blk_wait_while_drained(rwco->blk);
> -        blk_inc_in_flight(rwco->blk);
> -    }

Hm, you drop it as it's called from blk_do_preadv too. I think it worth mentioning in commit message still.

> -
>       assert(qiov->size == acb->bytes);
>       rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
>                                 qiov, rwco->flags);
> @@ -1434,12 +1433,6 @@ static void blk_aio_write_entry(void *opaque)
>       BlkRwCo *rwco = &acb->rwco;
>       QEMUIOVector *qiov = rwco->iobuf;
>   
> -    if (rwco->blk->quiesce_counter) {
> -        blk_dec_in_flight(rwco->blk);
> -        blk_wait_while_drained(rwco->blk);
> -        blk_inc_in_flight(rwco->blk);
> -    }
> -
>       assert(!qiov || qiov->size == acb->bytes);
>       rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
>                                       qiov, 0, rwco->flags);
> 

With assert(blk->in_flight == 1) and mention extra wait removing in commit message:
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>


And still, if my suggestion in previous patch works, we may fix it all without extra blk_dec/blk_inc pair, by just moving blk_inc_ after blk_wait_while_drained in my previous suggestion.. It looks more native for me (but may be I miss something?):


@@ -1154,6 +1154,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
      int ret;
      BlockDriverState *bs;

      blk_wait_while_drained(blk);
+    blk_inc_in_flight(blk);

      /* Call blk_bs() only after waiting, the graph may have changed */
@@ -1175,6 +1176,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,

      ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
      bdrv_dec_in_flight(bs);
+    blk_dec_in_flight(blk);
      return ret;
  }

@@ -1337,7 +1339,6 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
  {
      if (acb->has_returned) {
          acb->common.cb(acb->common.opaque, acb->rwco.ret);
-        blk_dec_in_flight(acb->rwco.blk);
          qemu_aio_unref(acb);
      }
  }
@@ -1357,7 +1358,6 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
      BlkAioEmAIOCB *acb;
      Coroutine *co;

-    blk_inc_in_flight(blk);
      acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
      acb->rwco = (BlkRwCo) {
          .blk    = blk,
@@ -1388,9 +1388,7 @@ static void blk_aio_read_entry(void *opaque)
      QEMUIOVector *qiov = rwco->iobuf;

      if (rwco->blk->quiesce_counter) {
-        blk_dec_in_flight(rwco->blk);
          blk_wait_while_drained(rwco->blk);
-        blk_inc_in_flight(rwco->blk);
      }

      assert(qiov->size == acb->bytes);


(and same for write, ioctl, flush, discard)
Kevin Wolf April 7, 2020, 8:59 a.m. UTC | #2
Am 07.04.2020 um 08:52 hat Vladimir Sementsov-Ogievskiy geschrieben:
> 06.04.2020 20:14, Kevin Wolf wrote:
> > Waiting in blk_wait_while_drained() while blk->in_flight is increased
> > for the current request is wrong because it will cause the drain
> > operation to deadlock.
> > 
> > This patch makes sure that blk_wait_while_drained() is called with
> > blk->in_flight increased exactly once for the current request, and that
> > it temporarily decreases the counter while it waits.
> > 
> > Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >   block/block-backend.c | 17 +++++------------
> >   1 file changed, 5 insertions(+), 12 deletions(-)
> > 
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index d330e08b05..f621435f0b 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
> >       return 0;
> >   }
> > +/* To be called between exactly one pair of blk_inc/dec_in_flight() */
> >   static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
> >   {
> > +    assert(blk->in_flight > 0);
> 
> Hmm. You promise to make sure that in_flight increased exactly once.
> Shouldn't it be assert(blk->in_flight == 1) ?

Exactly once for this specific request, but if you have multiple
requests in flight, blk->in_flight will be the sum of all requests.

Just asserting > 0 should still catch potential bugs because you won't
always have multiple requests in flight.

> > +
> >       if (blk->quiesce_counter && !blk->disable_request_queuing) {
> > +        blk_dec_in_flight(blk);
> >           qemu_co_queue_wait(&blk->queued_requests, NULL);
> > +        blk_inc_in_flight(blk);
> >       }
> >   }
> > @@ -1416,12 +1421,6 @@ static void blk_aio_read_entry(void *opaque)
> >       BlkRwCo *rwco = &acb->rwco;
> >       QEMUIOVector *qiov = rwco->iobuf;
> > -    if (rwco->blk->quiesce_counter) {
> > -        blk_dec_in_flight(rwco->blk);
> > -        blk_wait_while_drained(rwco->blk);
> > -        blk_inc_in_flight(rwco->blk);
> > -    }
> 
> Hm, you drop it as it's called from blk_do_preadv too. I think it
> worth mentioning in commit message still.

Okay, I can add a sentence like "The blk_wait_while_drained() call in
blk_aio_read/write_entry is redundant with the one in blk_co_*(), so
drop it."

> > -
> >       assert(qiov->size == acb->bytes);
> >       rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
> >                                 qiov, rwco->flags);
> > @@ -1434,12 +1433,6 @@ static void blk_aio_write_entry(void *opaque)
> >       BlkRwCo *rwco = &acb->rwco;
> >       QEMUIOVector *qiov = rwco->iobuf;
> > -    if (rwco->blk->quiesce_counter) {
> > -        blk_dec_in_flight(rwco->blk);
> > -        blk_wait_while_drained(rwco->blk);
> > -        blk_inc_in_flight(rwco->blk);
> > -    }
> > -
> >       assert(!qiov || qiov->size == acb->bytes);
> >       rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
> >                                       qiov, 0, rwco->flags);
> > 
> 
> With assert(blk->in_flight == 1) and mention extra wait removing in commit message:
> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

Thanks, and I hope you agree with blk->in_flight > 0 now.

Kevin
Vladimir Sementsov-Ogievskiy April 7, 2020, 9:15 a.m. UTC | #3
07.04.2020 11:59, Kevin Wolf wrote:
> Am 07.04.2020 um 08:52 hat Vladimir Sementsov-Ogievskiy geschrieben:
>> 06.04.2020 20:14, Kevin Wolf wrote:
>>> Waiting in blk_wait_while_drained() while blk->in_flight is increased
>>> for the current request is wrong because it will cause the drain
>>> operation to deadlock.
>>>
>>> This patch makes sure that blk_wait_while_drained() is called with
>>> blk->in_flight increased exactly once for the current request, and that
>>> it temporarily decreases the counter while it waits.
>>>
>>> Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e
>>> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
>>> ---
>>>    block/block-backend.c | 17 +++++------------
>>>    1 file changed, 5 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/block/block-backend.c b/block/block-backend.c
>>> index d330e08b05..f621435f0b 100644
>>> --- a/block/block-backend.c
>>> +++ b/block/block-backend.c
>>> @@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
>>>        return 0;
>>>    }
>>> +/* To be called between exactly one pair of blk_inc/dec_in_flight() */
>>>    static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
>>>    {
>>> +    assert(blk->in_flight > 0);
>>
>> Hmm. You promise to make sure that in_flight increased exactly once.
>> Shouldn't it be assert(blk->in_flight == 1) ?
> 
> Exactly once for this specific request, but if you have multiple
> requests in flight, blk->in_flight will be the sum of all requests.
> 
> Just asserting > 0 should still catch potential bugs because you won't
> always have multiple requests in flight.
> 
>>> +
>>>        if (blk->quiesce_counter && !blk->disable_request_queuing) {
>>> +        blk_dec_in_flight(blk);
>>>            qemu_co_queue_wait(&blk->queued_requests, NULL);
>>> +        blk_inc_in_flight(blk);
>>>        }
>>>    }
>>> @@ -1416,12 +1421,6 @@ static void blk_aio_read_entry(void *opaque)
>>>        BlkRwCo *rwco = &acb->rwco;
>>>        QEMUIOVector *qiov = rwco->iobuf;
>>> -    if (rwco->blk->quiesce_counter) {
>>> -        blk_dec_in_flight(rwco->blk);
>>> -        blk_wait_while_drained(rwco->blk);
>>> -        blk_inc_in_flight(rwco->blk);
>>> -    }
>>
>> Hm, you drop it as it's called from blk_do_preadv too. I think it
>> worth mentioning in commit message still.
> 
> Okay, I can add a sentence like "The blk_wait_while_drained() call in
> blk_aio_read/write_entry is redundant with the one in blk_co_*(), so
> drop it."

Yes, that works
> 
>>> -
>>>        assert(qiov->size == acb->bytes);
>>>        rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
>>>                                  qiov, rwco->flags);
>>> @@ -1434,12 +1433,6 @@ static void blk_aio_write_entry(void *opaque)
>>>        BlkRwCo *rwco = &acb->rwco;
>>>        QEMUIOVector *qiov = rwco->iobuf;
>>> -    if (rwco->blk->quiesce_counter) {
>>> -        blk_dec_in_flight(rwco->blk);
>>> -        blk_wait_while_drained(rwco->blk);
>>> -        blk_inc_in_flight(rwco->blk);
>>> -    }
>>> -
>>>        assert(!qiov || qiov->size == acb->bytes);
>>>        rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
>>>                                        qiov, 0, rwco->flags);
>>>
>>
>> With assert(blk->in_flight == 1) and mention extra wait removing in commit message:
>> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> 
> Thanks, and I hope you agree with blk->in_flight > 0 now.
> 

Yes, I agree.

Hmm still interesting, could we move somehow blk_wait_while_drained out of inc/dec section? It seems there are no yields and no coroutine switches between first in_flight increment and call of blk_wait_while_drained, only initialization of acb.
Max Reitz April 7, 2020, 10:12 a.m. UTC | #4
On 06.04.20 19:14, Kevin Wolf wrote:
> Waiting in blk_wait_while_drained() while blk->in_flight is increased
> for the current request is wrong because it will cause the drain
> operation to deadlock.
> 
> This patch makes sure that blk_wait_while_drained() is called with
> blk->in_flight increased exactly once for the current request, and that
> it temporarily decreases the counter while it waits.
> 
> Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  block/block-backend.c | 17 +++++------------
>  1 file changed, 5 insertions(+), 12 deletions(-)

Reviewed-by: Max Reitz <mreitz@redhat.com>
diff mbox series

Patch

diff --git a/block/block-backend.c b/block/block-backend.c
index d330e08b05..f621435f0b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1140,10 +1140,15 @@  static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
     return 0;
 }
 
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
 {
+    assert(blk->in_flight > 0);
+
     if (blk->quiesce_counter && !blk->disable_request_queuing) {
+        blk_dec_in_flight(blk);
         qemu_co_queue_wait(&blk->queued_requests, NULL);
+        blk_inc_in_flight(blk);
     }
 }
 
@@ -1416,12 +1421,6 @@  static void blk_aio_read_entry(void *opaque)
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
-    if (rwco->blk->quiesce_counter) {
-        blk_dec_in_flight(rwco->blk);
-        blk_wait_while_drained(rwco->blk);
-        blk_inc_in_flight(rwco->blk);
-    }
-
     assert(qiov->size == acb->bytes);
     rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
                               qiov, rwco->flags);
@@ -1434,12 +1433,6 @@  static void blk_aio_write_entry(void *opaque)
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
-    if (rwco->blk->quiesce_counter) {
-        blk_dec_in_flight(rwco->blk);
-        blk_wait_while_drained(rwco->blk);
-        blk_inc_in_flight(rwco->blk);
-    }
-
     assert(!qiov || qiov->size == acb->bytes);
     rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
                                     qiov, 0, rwco->flags);