Message ID | 20200406171403.6761-4-kwolf@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | block: Fix blk->in_flight during blk_wait_while_drained() | expand |
06.04.2020 20:14, Kevin Wolf wrote: > Waiting in blk_wait_while_drained() while blk->in_flight is increased > for the current request is wrong because it will cause the drain > operation to deadlock. > > This patch makes sure that blk_wait_while_drained() is called with > blk->in_flight increased exactly once for the current request, and that > it temporarily decreases the counter while it waits. > > Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > --- > block/block-backend.c | 17 +++++------------ > 1 file changed, 5 insertions(+), 12 deletions(-) > > diff --git a/block/block-backend.c b/block/block-backend.c > index d330e08b05..f621435f0b 100644 > --- a/block/block-backend.c > +++ b/block/block-backend.c > @@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, > return 0; > } > > +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ > static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) > { > + assert(blk->in_flight > 0); Hmm. You promise to make sure that in_flight increased exactly once. Shouldn't it be assert(blk->in_flight == 1) ? > + > if (blk->quiesce_counter && !blk->disable_request_queuing) { > + blk_dec_in_flight(blk); > qemu_co_queue_wait(&blk->queued_requests, NULL); > + blk_inc_in_flight(blk); > } > } > > @@ -1416,12 +1421,6 @@ static void blk_aio_read_entry(void *opaque) > BlkRwCo *rwco = &acb->rwco; > QEMUIOVector *qiov = rwco->iobuf; > > - if (rwco->blk->quiesce_counter) { > - blk_dec_in_flight(rwco->blk); > - blk_wait_while_drained(rwco->blk); > - blk_inc_in_flight(rwco->blk); > - } Hm, you drop it as it's called from blk_do_preadv too. I think it worth mentioning in commit message still. > - > assert(qiov->size == acb->bytes); > rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, > qiov, rwco->flags); > @@ -1434,12 +1433,6 @@ static void blk_aio_write_entry(void *opaque) > BlkRwCo *rwco = &acb->rwco; > QEMUIOVector *qiov = rwco->iobuf; > > - if (rwco->blk->quiesce_counter) { > - blk_dec_in_flight(rwco->blk); > - blk_wait_while_drained(rwco->blk); > - blk_inc_in_flight(rwco->blk); > - } > - > assert(!qiov || qiov->size == acb->bytes); > rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, > qiov, 0, rwco->flags); > With assert(blk->in_flight == 1) and mention extra wait removing in commit message: Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> And still, if my suggestion in previous patch works, we may fix it all without extra blk_dec/blk_inc pair, by just moving blk_inc_ after blk_wait_while_drained in my previous suggestion.. It looks more native for me (but may be I miss something?): @@ -1154,6 +1154,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, int ret; BlockDriverState *bs; blk_wait_while_drained(blk); + blk_inc_in_flight(blk); /* Call blk_bs() only after waiting, the graph may have changed */ @@ -1175,6 +1176,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags); bdrv_dec_in_flight(bs); + blk_dec_in_flight(blk); return ret; } @@ -1337,7 +1339,6 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb) { if (acb->has_returned) { acb->common.cb(acb->common.opaque, acb->rwco.ret); - blk_dec_in_flight(acb->rwco.blk); qemu_aio_unref(acb); } } @@ -1357,7 +1358,6 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, BlkAioEmAIOCB *acb; Coroutine *co; - blk_inc_in_flight(blk); acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); acb->rwco = (BlkRwCo) { .blk = blk, @@ -1388,9 +1388,7 @@ static void blk_aio_read_entry(void *opaque) QEMUIOVector *qiov = rwco->iobuf; if (rwco->blk->quiesce_counter) { - blk_dec_in_flight(rwco->blk); blk_wait_while_drained(rwco->blk); - blk_inc_in_flight(rwco->blk); } assert(qiov->size == acb->bytes); (and same for write, ioctl, flush, discard)
Am 07.04.2020 um 08:52 hat Vladimir Sementsov-Ogievskiy geschrieben: > 06.04.2020 20:14, Kevin Wolf wrote: > > Waiting in blk_wait_while_drained() while blk->in_flight is increased > > for the current request is wrong because it will cause the drain > > operation to deadlock. > > > > This patch makes sure that blk_wait_while_drained() is called with > > blk->in_flight increased exactly once for the current request, and that > > it temporarily decreases the counter while it waits. > > > > Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e > > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > --- > > block/block-backend.c | 17 +++++------------ > > 1 file changed, 5 insertions(+), 12 deletions(-) > > > > diff --git a/block/block-backend.c b/block/block-backend.c > > index d330e08b05..f621435f0b 100644 > > --- a/block/block-backend.c > > +++ b/block/block-backend.c > > @@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, > > return 0; > > } > > +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ > > static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) > > { > > + assert(blk->in_flight > 0); > > Hmm. You promise to make sure that in_flight increased exactly once. > Shouldn't it be assert(blk->in_flight == 1) ? Exactly once for this specific request, but if you have multiple requests in flight, blk->in_flight will be the sum of all requests. Just asserting > 0 should still catch potential bugs because you won't always have multiple requests in flight. > > + > > if (blk->quiesce_counter && !blk->disable_request_queuing) { > > + blk_dec_in_flight(blk); > > qemu_co_queue_wait(&blk->queued_requests, NULL); > > + blk_inc_in_flight(blk); > > } > > } > > @@ -1416,12 +1421,6 @@ static void blk_aio_read_entry(void *opaque) > > BlkRwCo *rwco = &acb->rwco; > > QEMUIOVector *qiov = rwco->iobuf; > > - if (rwco->blk->quiesce_counter) { > > - blk_dec_in_flight(rwco->blk); > > - blk_wait_while_drained(rwco->blk); > > - blk_inc_in_flight(rwco->blk); > > - } > > Hm, you drop it as it's called from blk_do_preadv too. I think it > worth mentioning in commit message still. Okay, I can add a sentence like "The blk_wait_while_drained() call in blk_aio_read/write_entry is redundant with the one in blk_co_*(), so drop it." > > - > > assert(qiov->size == acb->bytes); > > rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, > > qiov, rwco->flags); > > @@ -1434,12 +1433,6 @@ static void blk_aio_write_entry(void *opaque) > > BlkRwCo *rwco = &acb->rwco; > > QEMUIOVector *qiov = rwco->iobuf; > > - if (rwco->blk->quiesce_counter) { > > - blk_dec_in_flight(rwco->blk); > > - blk_wait_while_drained(rwco->blk); > > - blk_inc_in_flight(rwco->blk); > > - } > > - > > assert(!qiov || qiov->size == acb->bytes); > > rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, > > qiov, 0, rwco->flags); > > > > With assert(blk->in_flight == 1) and mention extra wait removing in commit message: > Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Thanks, and I hope you agree with blk->in_flight > 0 now. Kevin
07.04.2020 11:59, Kevin Wolf wrote: > Am 07.04.2020 um 08:52 hat Vladimir Sementsov-Ogievskiy geschrieben: >> 06.04.2020 20:14, Kevin Wolf wrote: >>> Waiting in blk_wait_while_drained() while blk->in_flight is increased >>> for the current request is wrong because it will cause the drain >>> operation to deadlock. >>> >>> This patch makes sure that blk_wait_while_drained() is called with >>> blk->in_flight increased exactly once for the current request, and that >>> it temporarily decreases the counter while it waits. >>> >>> Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e >>> Signed-off-by: Kevin Wolf <kwolf@redhat.com> >>> --- >>> block/block-backend.c | 17 +++++------------ >>> 1 file changed, 5 insertions(+), 12 deletions(-) >>> >>> diff --git a/block/block-backend.c b/block/block-backend.c >>> index d330e08b05..f621435f0b 100644 >>> --- a/block/block-backend.c >>> +++ b/block/block-backend.c >>> @@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, >>> return 0; >>> } >>> +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ >>> static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) >>> { >>> + assert(blk->in_flight > 0); >> >> Hmm. You promise to make sure that in_flight increased exactly once. >> Shouldn't it be assert(blk->in_flight == 1) ? > > Exactly once for this specific request, but if you have multiple > requests in flight, blk->in_flight will be the sum of all requests. > > Just asserting > 0 should still catch potential bugs because you won't > always have multiple requests in flight. > >>> + >>> if (blk->quiesce_counter && !blk->disable_request_queuing) { >>> + blk_dec_in_flight(blk); >>> qemu_co_queue_wait(&blk->queued_requests, NULL); >>> + blk_inc_in_flight(blk); >>> } >>> } >>> @@ -1416,12 +1421,6 @@ static void blk_aio_read_entry(void *opaque) >>> BlkRwCo *rwco = &acb->rwco; >>> QEMUIOVector *qiov = rwco->iobuf; >>> - if (rwco->blk->quiesce_counter) { >>> - blk_dec_in_flight(rwco->blk); >>> - blk_wait_while_drained(rwco->blk); >>> - blk_inc_in_flight(rwco->blk); >>> - } >> >> Hm, you drop it as it's called from blk_do_preadv too. I think it >> worth mentioning in commit message still. > > Okay, I can add a sentence like "The blk_wait_while_drained() call in > blk_aio_read/write_entry is redundant with the one in blk_co_*(), so > drop it." Yes, that works > >>> - >>> assert(qiov->size == acb->bytes); >>> rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, >>> qiov, rwco->flags); >>> @@ -1434,12 +1433,6 @@ static void blk_aio_write_entry(void *opaque) >>> BlkRwCo *rwco = &acb->rwco; >>> QEMUIOVector *qiov = rwco->iobuf; >>> - if (rwco->blk->quiesce_counter) { >>> - blk_dec_in_flight(rwco->blk); >>> - blk_wait_while_drained(rwco->blk); >>> - blk_inc_in_flight(rwco->blk); >>> - } >>> - >>> assert(!qiov || qiov->size == acb->bytes); >>> rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, >>> qiov, 0, rwco->flags); >>> >> >> With assert(blk->in_flight == 1) and mention extra wait removing in commit message: >> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> > > Thanks, and I hope you agree with blk->in_flight > 0 now. > Yes, I agree. Hmm still interesting, could we move somehow blk_wait_while_drained out of inc/dec section? It seems there are no yields and no coroutine switches between first in_flight increment and call of blk_wait_while_drained, only initialization of acb.
On 06.04.20 19:14, Kevin Wolf wrote: > Waiting in blk_wait_while_drained() while blk->in_flight is increased > for the current request is wrong because it will cause the drain > operation to deadlock. > > This patch makes sure that blk_wait_while_drained() is called with > blk->in_flight increased exactly once for the current request, and that > it temporarily decreases the counter while it waits. > > Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > --- > block/block-backend.c | 17 +++++------------ > 1 file changed, 5 insertions(+), 12 deletions(-) Reviewed-by: Max Reitz <mreitz@redhat.com>
diff --git a/block/block-backend.c b/block/block-backend.c index d330e08b05..f621435f0b 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return 0; } +/* To be called between exactly one pair of blk_inc/dec_in_flight() */ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) { + assert(blk->in_flight > 0); + if (blk->quiesce_counter && !blk->disable_request_queuing) { + blk_dec_in_flight(blk); qemu_co_queue_wait(&blk->queued_requests, NULL); + blk_inc_in_flight(blk); } } @@ -1416,12 +1421,6 @@ static void blk_aio_read_entry(void *opaque) BlkRwCo *rwco = &acb->rwco; QEMUIOVector *qiov = rwco->iobuf; - if (rwco->blk->quiesce_counter) { - blk_dec_in_flight(rwco->blk); - blk_wait_while_drained(rwco->blk); - blk_inc_in_flight(rwco->blk); - } - assert(qiov->size == acb->bytes); rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, qiov, rwco->flags); @@ -1434,12 +1433,6 @@ static void blk_aio_write_entry(void *opaque) BlkRwCo *rwco = &acb->rwco; QEMUIOVector *qiov = rwco->iobuf; - if (rwco->blk->quiesce_counter) { - blk_dec_in_flight(rwco->blk); - blk_wait_while_drained(rwco->blk); - blk_inc_in_flight(rwco->blk); - } - assert(!qiov || qiov->size == acb->bytes); rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, qiov, 0, rwco->flags);
Waiting in blk_wait_while_drained() while blk->in_flight is increased for the current request is wrong because it will cause the drain operation to deadlock. This patch makes sure that blk_wait_while_drained() is called with blk->in_flight increased exactly once for the current request, and that it temporarily decreases the counter while it waits. Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/block-backend.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-)