Message ID | 20160612065104.13856-1-famz@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 12/06/2016 08:51, Fam Zheng wrote: > From: Stefan Hajnoczi <stefanha@redhat.com> > > When dataplane is enabled or disabled the drive switches to a new > AioContext. The mirror block job must also move to the new AioContext > so that drive accesses are always made within its AioContext. > > This patch partially achieves that by draining target and source > requests to reach a quiescent point. The job is resumed in the new > AioContext after moving s->target into the new AioContext. > > The quiesce_requested flag is added to deal with yield points in > block_job_sleep_ns(), bdrv_is_allocated_above(), and > bdrv_get_block_status_above(). Previously they continue executing in > the old AioContext. The nested aio_poll in mirror_detach_aio_context > will drive the mirror coroutine upto fixed yield points, where > mirror_check_for_quiesce is called. > > Cc: Fam Zheng <famz@redhat.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Jeff Cody <jcody@redhat.com> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > [Drain source as well, and add s->quiesce_requested flag. -- Fam] > Signed-off-by: Fam Zheng <famz@redhat.com> As discussed on IRC, perhaps we can reuse the pausing/job->busy mechanism to detect quiescence. There's no synchronous pause function, but it can be realized with block_job_pause and aio_poll. Also while discussing this patch on IRC Fam noticed that resume currently clears the job's iostatus. I think that functionality can be moved to the QMP command. Thanks, Paolo > --- > > v2: Picked up Stefan's RFC patch and move on towards a more complete > fix. Please review! > > Jason: it would be nice if you could test this version again. It differs > from the previous version. > --- > block/mirror.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 44 insertions(+), 1 deletion(-) > > diff --git a/block/mirror.c b/block/mirror.c > index 80fd3c7..142199a 100644 > --- a/block/mirror.c > +++ b/block/mirror.c > @@ -63,6 +63,8 @@ typedef struct MirrorBlockJob { > int ret; > bool unmap; > bool waiting_for_io; > + bool quiesce_requested; /* temporarily detached to move AioContext, > + don't do more I/O */ > int target_cluster_sectors; > int max_iov; > } MirrorBlockJob; > @@ -119,7 +121,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) > qemu_iovec_destroy(&op->qiov); > g_free(op); > > - if (s->waiting_for_io) { > + if (s->waiting_for_io && !s->quiesce_requested) { > qemu_coroutine_enter(s->common.co, NULL); > } > } > @@ -307,6 +309,14 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, > } > } > > +static void coroutine_fn mirror_check_for_quiesce(MirrorBlockJob *s) > +{ > + if (s->quiesce_requested) { > + s->quiesce_requested = false; > + qemu_coroutine_yield(); > + } > +} > + > static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) > { > BlockDriverState *source = blk_bs(s->common.blk); > @@ -331,6 +341,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) > mirror_wait_for_io(s); > } > > + mirror_check_for_quiesce(s); > /* Find the number of consective dirty chunks following the first dirty > * one, and wait for in flight requests in them. */ > while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) { > @@ -442,6 +453,31 @@ static void mirror_drain(MirrorBlockJob *s) > } > } > > +static void mirror_attached_aio_context(AioContext *new_context, void *opaque) > +{ > + MirrorBlockJob *s = opaque; > + > + blk_set_aio_context(s->target, new_context); > + > + /* Resume execution */ > + assert(!s->quiesce_requested); > + if (s->waiting_for_io) { > + qemu_coroutine_enter(s->common.co, NULL); > + } > +} > + > +static void mirror_detach_aio_context(void *opaque) > +{ > + MirrorBlockJob *s = opaque; > + > + /* Complete pending write requests */ > + assert(!s->quiesce_requested); > + s->quiesce_requested = true; > + while (s->quiesce_requested || s->in_flight) { > + aio_poll(blk_get_aio_context(s->common.blk), true); > + } > +} > + > typedef struct { > int ret; > } MirrorExitData; > @@ -491,6 +527,8 @@ static void mirror_exit(BlockJob *job, void *opaque) > if (replace_aio_context) { > aio_context_release(replace_aio_context); > } > + blk_remove_aio_context_notifier(s->common.blk, mirror_attached_aio_context, > + mirror_detach_aio_context, s); > g_free(s->replaces); > bdrv_op_unblock_all(target_bs, s->common.blocker); > blk_unref(s->target); > @@ -583,6 +621,7 @@ static void coroutine_fn mirror_run(void *opaque) > block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0); > } > > + mirror_check_for_quiesce(s); > if (block_job_is_cancelled(&s->common)) { > goto immediate_exit; > } > @@ -612,6 +651,7 @@ static void coroutine_fn mirror_run(void *opaque) > goto immediate_exit; > } > > + mirror_check_for_quiesce(s); > cnt = bdrv_get_dirty_count(s->dirty_bitmap); > /* s->common.offset contains the number of bytes already processed so > * far, cnt is the number of dirty sectors remaining and > @@ -851,6 +891,9 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, > > bdrv_op_block_all(target, s->common.blocker); > > + blk_add_aio_context_notifier(s->common.blk, mirror_attached_aio_context, > + mirror_detach_aio_context, s); > + > s->common.co = qemu_coroutine_create(mirror_run); > trace_mirror_start(bs, s, s->common.co, opaque); > qemu_coroutine_enter(s->common.co, s); >
On Sun, Jun 12, 2016 at 02:51:04PM +0800, Fam Zheng wrote: > @@ -119,7 +121,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) > qemu_iovec_destroy(&op->qiov); > g_free(op); > > - if (s->waiting_for_io) { > + if (s->waiting_for_io && !s->quiesce_requested) { > qemu_coroutine_enter(s->common.co, NULL); > } Is it necessary to interact with s->waiting_for_io? The coroutine should reach a quiescent point later on anyway so it would be simpler to leave this unchanged. > +static void mirror_detach_aio_context(void *opaque) > +{ > + MirrorBlockJob *s = opaque; > + > + /* Complete pending write requests */ > + assert(!s->quiesce_requested); > + s->quiesce_requested = true; > + while (s->quiesce_requested || s->in_flight) { > + aio_poll(blk_get_aio_context(s->common.blk), true); > + } > +} Adding synchronous aio_poll() loops will bite us in the future. For example, if a guest resets the virtio device the vcpu will be hung until I/O completes and sleeps finish. This flaw in QEMU already exists today and won't be fixed any time soon, so I guess this approach is okay... Stefan
On Sun, Jun 12, 2016 at 02:51:04PM +0800, Fam Zheng wrote: > From: Stefan Hajnoczi <stefanha@redhat.com> > > When dataplane is enabled or disabled the drive switches to a new > AioContext. The mirror block job must also move to the new AioContext > so that drive accesses are always made within its AioContext. > > This patch partially achieves that by draining target and source > requests to reach a quiescent point. The job is resumed in the new > AioContext after moving s->target into the new AioContext. > > The quiesce_requested flag is added to deal with yield points in > block_job_sleep_ns(), bdrv_is_allocated_above(), and > bdrv_get_block_status_above(). Previously they continue executing in > the old AioContext. The nested aio_poll in mirror_detach_aio_context > will drive the mirror coroutine upto fixed yield points, where > mirror_check_for_quiesce is called. > > Cc: Fam Zheng <famz@redhat.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Jeff Cody <jcody@redhat.com> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > [Drain source as well, and add s->quiesce_requested flag. -- Fam] > Signed-off-by: Fam Zheng <famz@redhat.com> > > --- > > v2: Picked up Stefan's RFC patch and move on towards a more complete > fix. Please review! Thanks, I'll send a v3 with my own comment addressed and fixes for other block jobs.
On 06/12/2016 02:51 AM, Fam Zheng wrote: ... > --- > > v2: Picked up Stefan's RFC patch and move on towards a more complete > fix. Please review! > > Jason: it would be nice if you could test this version again. It differs > from the previous version. No problem. I'll test v3 when it is available.
diff --git a/block/mirror.c b/block/mirror.c index 80fd3c7..142199a 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -63,6 +63,8 @@ typedef struct MirrorBlockJob { int ret; bool unmap; bool waiting_for_io; + bool quiesce_requested; /* temporarily detached to move AioContext, + don't do more I/O */ int target_cluster_sectors; int max_iov; } MirrorBlockJob; @@ -119,7 +121,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) qemu_iovec_destroy(&op->qiov); g_free(op); - if (s->waiting_for_io) { + if (s->waiting_for_io && !s->quiesce_requested) { qemu_coroutine_enter(s->common.co, NULL); } } @@ -307,6 +309,14 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, } } +static void coroutine_fn mirror_check_for_quiesce(MirrorBlockJob *s) +{ + if (s->quiesce_requested) { + s->quiesce_requested = false; + qemu_coroutine_yield(); + } +} + static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = blk_bs(s->common.blk); @@ -331,6 +341,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) mirror_wait_for_io(s); } + mirror_check_for_quiesce(s); /* Find the number of consective dirty chunks following the first dirty * one, and wait for in flight requests in them. */ while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) { @@ -442,6 +453,31 @@ static void mirror_drain(MirrorBlockJob *s) } } +static void mirror_attached_aio_context(AioContext *new_context, void *opaque) +{ + MirrorBlockJob *s = opaque; + + blk_set_aio_context(s->target, new_context); + + /* Resume execution */ + assert(!s->quiesce_requested); + if (s->waiting_for_io) { + qemu_coroutine_enter(s->common.co, NULL); + } +} + +static void mirror_detach_aio_context(void *opaque) +{ + MirrorBlockJob *s = opaque; + + /* Complete pending write requests */ + assert(!s->quiesce_requested); + s->quiesce_requested = true; + while (s->quiesce_requested || s->in_flight) { + aio_poll(blk_get_aio_context(s->common.blk), true); + } +} + typedef struct { int ret; } MirrorExitData; @@ -491,6 +527,8 @@ static void mirror_exit(BlockJob *job, void *opaque) if (replace_aio_context) { aio_context_release(replace_aio_context); } + blk_remove_aio_context_notifier(s->common.blk, mirror_attached_aio_context, + mirror_detach_aio_context, s); g_free(s->replaces); bdrv_op_unblock_all(target_bs, s->common.blocker); blk_unref(s->target); @@ -583,6 +621,7 @@ static void coroutine_fn mirror_run(void *opaque) block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0); } + mirror_check_for_quiesce(s); if (block_job_is_cancelled(&s->common)) { goto immediate_exit; } @@ -612,6 +651,7 @@ static void coroutine_fn mirror_run(void *opaque) goto immediate_exit; } + mirror_check_for_quiesce(s); cnt = bdrv_get_dirty_count(s->dirty_bitmap); /* s->common.offset contains the number of bytes already processed so * far, cnt is the number of dirty sectors remaining and @@ -851,6 +891,9 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, bdrv_op_block_all(target, s->common.blocker); + blk_add_aio_context_notifier(s->common.blk, mirror_attached_aio_context, + mirror_detach_aio_context, s); + s->common.co = qemu_coroutine_create(mirror_run); trace_mirror_start(bs, s, s->common.co, opaque); qemu_coroutine_enter(s->common.co, s);