Message ID | 20161201192652.9509-13-stefanha@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 12/01/2016 08:26 PM, Stefan Hajnoczi wrote: > This patch is based on the algorithm for the kvm.ko halt_poll_ns > parameter in Linux. The initial polling time is zero. > > If the event loop is woken up within the maximum polling time it means > polling could be effective, so grow polling time. > > If the event loop is woken up beyond the maximum polling time it means > polling is not effective, so shrink polling time. > > If the event loop makes progress within the current polling time then > the sweet spot has been reached. > > This algorithm adjusts the polling time so it can adapt to variations in > workloads. The goal is to reach the sweet spot while also recognizing > when polling would hurt more than help. > > Two new trace events, poll_grow and poll_shrink, are added for observing > polling time adjustment. > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Not sure way, but I have 4 host ramdisks with the same iothread as guest virtio-blk. running fio in the guest on one of these disks will poll, as soon as I have 2 disks in fio I almost always see shrinks (so polling stays at 0) and almost no grows. > --- > include/block/aio.h | 10 +++++++-- > aio-posix.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++---- > aio-win32.c | 3 ++- > async.c | 3 +++ > iothread.c | 4 ++-- > trace-events | 2 ++ > 6 files changed, 71 insertions(+), 9 deletions(-) > > diff --git a/include/block/aio.h b/include/block/aio.h > index cc3272b..e4a4912 100644 > --- a/include/block/aio.h > +++ b/include/block/aio.h > @@ -134,8 +134,11 @@ struct AioContext { > /* Number of AioHandlers without .io_poll() */ > int poll_disable_cnt; > > - /* Maximum polling time in nanoseconds */ > - int64_t poll_max_ns; > + /* Polling mode parameters */ > + int64_t poll_ns; /* current polling time in nanoseconds */ > + int64_t poll_max_ns; /* maximum polling time in nanoseconds */ > + int64_t poll_grow; /* polling time growth factor */ > + int64_t poll_shrink; /* polling time shrink factor */ > > /* Are we in polling mode or monitoring file descriptors? */ > bool poll_started; > @@ -511,10 +514,13 @@ void aio_context_setup(AioContext *ctx); > * aio_context_set_poll_params: > * @ctx: the aio context > * @max_ns: how long to busy poll for, in nanoseconds > + * @grow: polling time growth factor > + * @shrink: polling time shrink factor > * > * Poll mode can be disabled by setting poll_max_ns to 0. > */ > void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, > + int64_t grow, int64_t shrink, > Error **errp); > > #endif > diff --git a/aio-posix.c b/aio-posix.c > index 5216d82..1585571 100644 > --- a/aio-posix.c > +++ b/aio-posix.c > @@ -550,7 +550,7 @@ static bool try_poll_mode(AioContext *ctx, bool blocking) > if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) { > /* See qemu_soonest_timeout() uint64_t hack */ > int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx), > - (uint64_t)ctx->poll_max_ns); > + (uint64_t)ctx->poll_ns); > > if (max_ns) { > poll_set_started(ctx, true); > @@ -576,6 +576,7 @@ bool aio_poll(AioContext *ctx, bool blocking) > int ret = 0; > bool progress; > int64_t timeout; > + int64_t start = 0; > > aio_context_acquire(ctx); > progress = false; > @@ -593,6 +594,10 @@ bool aio_poll(AioContext *ctx, bool blocking) > > ctx->walking_handlers++; > > + if (ctx->poll_max_ns) { > + start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); > + } > + > if (try_poll_mode(ctx, blocking)) { > progress = true; > } else { > @@ -635,6 +640,47 @@ bool aio_poll(AioContext *ctx, bool blocking) > atomic_sub(&ctx->notify_me, 2); > } > > + /* Adjust polling time */ > + if (ctx->poll_max_ns) { > + int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start; > + > + if (block_ns <= ctx->poll_ns) { > + /* This is the sweet spot, no adjustment needed */ > + } else if (block_ns > ctx->poll_max_ns) { > + /* We'd have to poll for too long, poll less */ > + int64_t old = ctx->poll_ns; > + > + if (ctx->poll_shrink) { > + ctx->poll_ns /= ctx->poll_shrink; > + } else { > + ctx->poll_ns = 0; > + } > + > + trace_poll_shrink(ctx, old, ctx->poll_ns); > + } else if (ctx->poll_ns < ctx->poll_max_ns && > + block_ns < ctx->poll_max_ns) { > + /* There is room to grow, poll longer */ > + int64_t old = ctx->poll_ns; > + int64_t grow = ctx->poll_grow; > + > + if (grow == 0) { > + grow = 2; > + } > + > + if (ctx->poll_ns) { > + ctx->poll_ns *= grow; > + } else { > + ctx->poll_ns = 4000; /* start polling at 4 microseconds */ > + } > + > + if (ctx->poll_ns > ctx->poll_max_ns) { > + ctx->poll_ns = ctx->poll_max_ns; > + } > + > + trace_poll_grow(ctx, old, ctx->poll_ns); > + } > + } > + > aio_notify_accept(ctx); > > /* if we have any readable fds, dispatch event */ > @@ -678,12 +724,16 @@ void aio_context_setup(AioContext *ctx) > #endif > } > > -void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, Error **errp) > +void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, > + int64_t grow, int64_t shrink, Error **errp) > { > - /* No thread synchronization here, it doesn't matter if an incorrect poll > - * timeout is used once. > + /* No thread synchronization here, it doesn't matter if an incorrect value > + * is used once. > */ > ctx->poll_max_ns = max_ns; > + ctx->poll_ns = 0; > + ctx->poll_grow = grow; > + ctx->poll_shrink = shrink; > > aio_notify(ctx); > } > diff --git a/aio-win32.c b/aio-win32.c > index d0e40a8..d19dc42 100644 > --- a/aio-win32.c > +++ b/aio-win32.c > @@ -395,7 +395,8 @@ void aio_context_setup(AioContext *ctx) > { > } > > -void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, Error **errp) > +void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, > + int64_t grow, int64_t shrink, Error **errp) > { > error_setg(errp, "AioContext polling is not implemented on Windows"); > } > diff --git a/async.c b/async.c > index 29abf40..2960171 100644 > --- a/async.c > +++ b/async.c > @@ -385,7 +385,10 @@ AioContext *aio_context_new(Error **errp) > qemu_rec_mutex_init(&ctx->lock); > timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); > > + ctx->poll_ns = 0; > ctx->poll_max_ns = 0; > + ctx->poll_grow = 0; > + ctx->poll_shrink = 0; > > return ctx; > fail: > diff --git a/iothread.c b/iothread.c > index 8dfd10d..28598b5 100644 > --- a/iothread.c > +++ b/iothread.c > @@ -98,7 +98,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) > return; > } > > - aio_context_set_poll_params(iothread->ctx, iothread->poll_max_ns, > + aio_context_set_poll_params(iothread->ctx, iothread->poll_max_ns, 0, 0, > &local_error); > if (local_error) { > error_propagate(errp, local_error); > @@ -158,7 +158,7 @@ static void iothread_set_poll_max_ns(Object *obj, Visitor *v, > iothread->poll_max_ns = value; > > if (iothread->ctx) { > - aio_context_set_poll_params(iothread->ctx, value, &local_err); > + aio_context_set_poll_params(iothread->ctx, value, 0, 0, &local_err); > } > > out: > diff --git a/trace-events b/trace-events > index 7fe3a1b..1181486 100644 > --- a/trace-events > +++ b/trace-events > @@ -28,6 +28,8 @@ > # aio-posix.c > run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64 > run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d" > +poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 > +poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 > > # thread-pool.c > thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" >
On Mon, Dec 05, 2016 at 09:06:17PM +0100, Christian Borntraeger wrote: > On 12/01/2016 08:26 PM, Stefan Hajnoczi wrote: > > This patch is based on the algorithm for the kvm.ko halt_poll_ns > > parameter in Linux. The initial polling time is zero. > > > > If the event loop is woken up within the maximum polling time it means > > polling could be effective, so grow polling time. > > > > If the event loop is woken up beyond the maximum polling time it means > > polling is not effective, so shrink polling time. > > > > If the event loop makes progress within the current polling time then > > the sweet spot has been reached. > > > > This algorithm adjusts the polling time so it can adapt to variations in > > workloads. The goal is to reach the sweet spot while also recognizing > > when polling would hurt more than help. > > > > Two new trace events, poll_grow and poll_shrink, are added for observing > > polling time adjustment. > > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > > Not sure way, but I have 4 host ramdisks with the same iothread as guest > virtio-blk. running fio in the guest on one of these disks will poll, as > soon as I have 2 disks in fio I almost always see shrinks (so polling > stays at 0) and almost no grows. Shrinking occurs when polling + ppoll(2) time exceeds poll-max-ns. What is the value of poll-max-ns and how long is run_poll_handlers_end - run_poll_handlers_begin? I wonder if polling both disks takes longer than poll-max-ns once you have two disks. The "polling" activity includes processing the I/O requests, so I imagine the time extends significantly as more disks have I/O requests ready for processing. Maybe the block_ns timing calculation should exclude processing time to avoid false shrinking? It also strikes me that there's a blind spot to the self-tuning algorithm: imagine virtqueue kick via ppoll(2) + ioeventfd takes N nanoseconds. Detecting new virtqueue buffers via polling takes M nanoseconds. When M <= poll-max-ns < N the algorithm decides there is no point in polling but it would actually be faster to poll. The reason is that the algorithm only looks at block_ns, which is N, not M. This seems difficult to tackle because the algorithm has no way of predicting M unless it randomly tries to poll longer. Stefan
On 12/06/2016 10:20 AM, Stefan Hajnoczi wrote: > On Mon, Dec 05, 2016 at 09:06:17PM +0100, Christian Borntraeger wrote: >> On 12/01/2016 08:26 PM, Stefan Hajnoczi wrote: >>> This patch is based on the algorithm for the kvm.ko halt_poll_ns >>> parameter in Linux. The initial polling time is zero. >>> >>> If the event loop is woken up within the maximum polling time it means >>> polling could be effective, so grow polling time. >>> >>> If the event loop is woken up beyond the maximum polling time it means >>> polling is not effective, so shrink polling time. >>> >>> If the event loop makes progress within the current polling time then >>> the sweet spot has been reached. >>> >>> This algorithm adjusts the polling time so it can adapt to variations in >>> workloads. The goal is to reach the sweet spot while also recognizing >>> when polling would hurt more than help. >>> >>> Two new trace events, poll_grow and poll_shrink, are added for observing >>> polling time adjustment. >>> >>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> >> >> Not sure way, but I have 4 host ramdisks with the same iothread as guest >> virtio-blk. running fio in the guest on one of these disks will poll, as >> soon as I have 2 disks in fio I almost always see shrinks (so polling >> stays at 0) and almost no grows. > > Shrinking occurs when polling + ppoll(2) time exceeds poll-max-ns. > > What is the value of poll-max-ns I used 50000ns as poll value. When using 500000ns it is polling again. > and how long is run_poll_handlers_end - run_poll_handlers_begin? Too long. I looked again and I realized that I used cache=none without io=native. After adding io=native things are better. Even with 4 disks polling still happens. So it seems that the mileage will vary depending on the settings Christian
On Tue, Dec 06, 2016 at 11:12:45AM +0100, Christian Borntraeger wrote: > On 12/06/2016 10:20 AM, Stefan Hajnoczi wrote: > > On Mon, Dec 05, 2016 at 09:06:17PM +0100, Christian Borntraeger wrote: > >> On 12/01/2016 08:26 PM, Stefan Hajnoczi wrote: > >>> This patch is based on the algorithm for the kvm.ko halt_poll_ns > >>> parameter in Linux. The initial polling time is zero. > >>> > >>> If the event loop is woken up within the maximum polling time it means > >>> polling could be effective, so grow polling time. > >>> > >>> If the event loop is woken up beyond the maximum polling time it means > >>> polling is not effective, so shrink polling time. > >>> > >>> If the event loop makes progress within the current polling time then > >>> the sweet spot has been reached. > >>> > >>> This algorithm adjusts the polling time so it can adapt to variations in > >>> workloads. The goal is to reach the sweet spot while also recognizing > >>> when polling would hurt more than help. > >>> > >>> Two new trace events, poll_grow and poll_shrink, are added for observing > >>> polling time adjustment. > >>> > >>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > >> > >> Not sure way, but I have 4 host ramdisks with the same iothread as guest > >> virtio-blk. running fio in the guest on one of these disks will poll, as > >> soon as I have 2 disks in fio I almost always see shrinks (so polling > >> stays at 0) and almost no grows. > > > > Shrinking occurs when polling + ppoll(2) time exceeds poll-max-ns. > > > > What is the value of poll-max-ns > > I used 50000ns as poll value. When using 500000ns it is polling again. > > > and how long is run_poll_handlers_end - run_poll_handlers_begin? > > Too long. I looked again and I realized that I used cache=none without > io=native. After adding io=native things are better. Even with 4 disks > polling still happens. So it seems that the mileage will vary depending > on the settings Okay, it could be the things I mentioned. The self-tuning can be too conservative and poll less than we'd hope for. On the other hand it's hard to predict the actual ideal polling amount so I guess being conservative is alright. Maybe the algorithm can be refined later. Stefan
diff --git a/include/block/aio.h b/include/block/aio.h index cc3272b..e4a4912 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -134,8 +134,11 @@ struct AioContext { /* Number of AioHandlers without .io_poll() */ int poll_disable_cnt; - /* Maximum polling time in nanoseconds */ - int64_t poll_max_ns; + /* Polling mode parameters */ + int64_t poll_ns; /* current polling time in nanoseconds */ + int64_t poll_max_ns; /* maximum polling time in nanoseconds */ + int64_t poll_grow; /* polling time growth factor */ + int64_t poll_shrink; /* polling time shrink factor */ /* Are we in polling mode or monitoring file descriptors? */ bool poll_started; @@ -511,10 +514,13 @@ void aio_context_setup(AioContext *ctx); * aio_context_set_poll_params: * @ctx: the aio context * @max_ns: how long to busy poll for, in nanoseconds + * @grow: polling time growth factor + * @shrink: polling time shrink factor * * Poll mode can be disabled by setting poll_max_ns to 0. */ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + int64_t grow, int64_t shrink, Error **errp); #endif diff --git a/aio-posix.c b/aio-posix.c index 5216d82..1585571 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -550,7 +550,7 @@ static bool try_poll_mode(AioContext *ctx, bool blocking) if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) { /* See qemu_soonest_timeout() uint64_t hack */ int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx), - (uint64_t)ctx->poll_max_ns); + (uint64_t)ctx->poll_ns); if (max_ns) { poll_set_started(ctx, true); @@ -576,6 +576,7 @@ bool aio_poll(AioContext *ctx, bool blocking) int ret = 0; bool progress; int64_t timeout; + int64_t start = 0; aio_context_acquire(ctx); progress = false; @@ -593,6 +594,10 @@ bool aio_poll(AioContext *ctx, bool blocking) ctx->walking_handlers++; + if (ctx->poll_max_ns) { + start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + } + if (try_poll_mode(ctx, blocking)) { progress = true; } else { @@ -635,6 +640,47 @@ bool aio_poll(AioContext *ctx, bool blocking) atomic_sub(&ctx->notify_me, 2); } + /* Adjust polling time */ + if (ctx->poll_max_ns) { + int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start; + + if (block_ns <= ctx->poll_ns) { + /* This is the sweet spot, no adjustment needed */ + } else if (block_ns > ctx->poll_max_ns) { + /* We'd have to poll for too long, poll less */ + int64_t old = ctx->poll_ns; + + if (ctx->poll_shrink) { + ctx->poll_ns /= ctx->poll_shrink; + } else { + ctx->poll_ns = 0; + } + + trace_poll_shrink(ctx, old, ctx->poll_ns); + } else if (ctx->poll_ns < ctx->poll_max_ns && + block_ns < ctx->poll_max_ns) { + /* There is room to grow, poll longer */ + int64_t old = ctx->poll_ns; + int64_t grow = ctx->poll_grow; + + if (grow == 0) { + grow = 2; + } + + if (ctx->poll_ns) { + ctx->poll_ns *= grow; + } else { + ctx->poll_ns = 4000; /* start polling at 4 microseconds */ + } + + if (ctx->poll_ns > ctx->poll_max_ns) { + ctx->poll_ns = ctx->poll_max_ns; + } + + trace_poll_grow(ctx, old, ctx->poll_ns); + } + } + aio_notify_accept(ctx); /* if we have any readable fds, dispatch event */ @@ -678,12 +724,16 @@ void aio_context_setup(AioContext *ctx) #endif } -void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, Error **errp) +void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + int64_t grow, int64_t shrink, Error **errp) { - /* No thread synchronization here, it doesn't matter if an incorrect poll - * timeout is used once. + /* No thread synchronization here, it doesn't matter if an incorrect value + * is used once. */ ctx->poll_max_ns = max_ns; + ctx->poll_ns = 0; + ctx->poll_grow = grow; + ctx->poll_shrink = shrink; aio_notify(ctx); } diff --git a/aio-win32.c b/aio-win32.c index d0e40a8..d19dc42 100644 --- a/aio-win32.c +++ b/aio-win32.c @@ -395,7 +395,8 @@ void aio_context_setup(AioContext *ctx) { } -void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, Error **errp) +void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + int64_t grow, int64_t shrink, Error **errp) { error_setg(errp, "AioContext polling is not implemented on Windows"); } diff --git a/async.c b/async.c index 29abf40..2960171 100644 --- a/async.c +++ b/async.c @@ -385,7 +385,10 @@ AioContext *aio_context_new(Error **errp) qemu_rec_mutex_init(&ctx->lock); timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); + ctx->poll_ns = 0; ctx->poll_max_ns = 0; + ctx->poll_grow = 0; + ctx->poll_shrink = 0; return ctx; fail: diff --git a/iothread.c b/iothread.c index 8dfd10d..28598b5 100644 --- a/iothread.c +++ b/iothread.c @@ -98,7 +98,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) return; } - aio_context_set_poll_params(iothread->ctx, iothread->poll_max_ns, + aio_context_set_poll_params(iothread->ctx, iothread->poll_max_ns, 0, 0, &local_error); if (local_error) { error_propagate(errp, local_error); @@ -158,7 +158,7 @@ static void iothread_set_poll_max_ns(Object *obj, Visitor *v, iothread->poll_max_ns = value; if (iothread->ctx) { - aio_context_set_poll_params(iothread->ctx, value, &local_err); + aio_context_set_poll_params(iothread->ctx, value, 0, 0, &local_err); } out: diff --git a/trace-events b/trace-events index 7fe3a1b..1181486 100644 --- a/trace-events +++ b/trace-events @@ -28,6 +28,8 @@ # aio-posix.c run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64 run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d" +poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 +poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 # thread-pool.c thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
This patch is based on the algorithm for the kvm.ko halt_poll_ns parameter in Linux. The initial polling time is zero. If the event loop is woken up within the maximum polling time it means polling could be effective, so grow polling time. If the event loop is woken up beyond the maximum polling time it means polling is not effective, so shrink polling time. If the event loop makes progress within the current polling time then the sweet spot has been reached. This algorithm adjusts the polling time so it can adapt to variations in workloads. The goal is to reach the sweet spot while also recognizing when polling would hurt more than help. Two new trace events, poll_grow and poll_shrink, are added for observing polling time adjustment. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> --- include/block/aio.h | 10 +++++++-- aio-posix.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++---- aio-win32.c | 3 ++- async.c | 3 +++ iothread.c | 4 ++-- trace-events | 2 ++ 6 files changed, 71 insertions(+), 9 deletions(-)