Message ID | 1468415004-31755-1-git-send-email-roman.penyaev@profitbricks.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jul 13, 2016 at 03:03:24PM +0200, Roman Pen wrote: > Invoking io_setup(MAX_EVENTS) we ask kernel to create ring buffer for us > with specified number of events. But kernel ring buffer allocation logic > is a bit tricky (ring buffer is page size aligned + some percpu allocation > are required) so eventually more than requested events number is allocated. > > From a userspace side we have to follow the convention and should not try > to io_submit() more or logic, which consumes completed events, should be > changed accordingly. The pitfall is in the following sequence: > > MAX_EVENTS = 128 > io_setup(MAX_EVENTS) > > io_submit(MAX_EVENTS) > io_submit(MAX_EVENTS) > > /* now 256 events are in-flight */ > > io_getevents(MAX_EVENTS) = 128 > > /* we can handle only 128 events at once, to be sure > * that nothing is pended the io_getevents(MAX_EVENTS) > * call must be invoked once more or hang will happen. */ > > To prevent the hang or reiteration of io_getevents() call this patch > restricts the number of in-flights, which is now limited to MAX_EVENTS. > > Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com> > Reviewed-by: Fam Zheng <famz@redhat.com> > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> > Cc: Stefan Hajnoczi <stefanha@redhat.com> > Cc: qemu-devel@nongnu.org > --- > v3: > o comment tweaks. > > v2: > o comment tweaks. > o fix QEMU coding style. > > block/linux-aio.c | 26 ++++++++++++++++---------- > 1 file changed, 16 insertions(+), 10 deletions(-) Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
On Wed, Jul 13, 2016 at 03:03:24PM +0200, Roman Pen wrote: > Invoking io_setup(MAX_EVENTS) we ask kernel to create ring buffer for us > with specified number of events. But kernel ring buffer allocation logic > is a bit tricky (ring buffer is page size aligned + some percpu allocation > are required) so eventually more than requested events number is allocated. > > From a userspace side we have to follow the convention and should not try > to io_submit() more or logic, which consumes completed events, should be > changed accordingly. The pitfall is in the following sequence: > > MAX_EVENTS = 128 > io_setup(MAX_EVENTS) > > io_submit(MAX_EVENTS) > io_submit(MAX_EVENTS) > > /* now 256 events are in-flight */ > > io_getevents(MAX_EVENTS) = 128 > > /* we can handle only 128 events at once, to be sure > * that nothing is pended the io_getevents(MAX_EVENTS) > * call must be invoked once more or hang will happen. */ > > To prevent the hang or reiteration of io_getevents() call this patch > restricts the number of in-flights, which is now limited to MAX_EVENTS. > > Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com> > Reviewed-by: Fam Zheng <famz@redhat.com> > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> > Cc: Stefan Hajnoczi <stefanha@redhat.com> > Cc: qemu-devel@nongnu.org > --- > v3: > o comment tweaks. > > v2: > o comment tweaks. > o fix QEMU coding style. > > block/linux-aio.c | 26 ++++++++++++++++---------- > 1 file changed, 16 insertions(+), 10 deletions(-) Thanks, applied to my block tree: https://github.com/stefanha/qemu/commits/block Stefan
diff --git a/block/linux-aio.c b/block/linux-aio.c index e468960..78f4524 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -28,8 +28,6 @@ */ #define MAX_EVENTS 128 -#define MAX_QUEUED_IO 128 - struct qemu_laiocb { BlockAIOCB common; Coroutine *co; @@ -44,7 +42,8 @@ struct qemu_laiocb { typedef struct { int plugged; - unsigned int n; + unsigned int in_queue; + unsigned int in_flight; bool blocked; QSIMPLEQ_HEAD(, qemu_laiocb) pending; } LaioQueue; @@ -129,6 +128,7 @@ static void qemu_laio_completion_bh(void *opaque) s->event_max = 0; return; /* no more events */ } + s->io_q.in_flight -= s->event_max; } /* Reschedule so nested event loops see currently pending completions */ @@ -190,7 +190,8 @@ static void ioq_init(LaioQueue *io_q) { QSIMPLEQ_INIT(&io_q->pending); io_q->plugged = 0; - io_q->n = 0; + io_q->in_queue = 0; + io_q->in_flight = 0; io_q->blocked = false; } @@ -198,14 +199,17 @@ static void ioq_submit(LinuxAioState *s) { int ret, len; struct qemu_laiocb *aiocb; - struct iocb *iocbs[MAX_QUEUED_IO]; + struct iocb *iocbs[MAX_EVENTS]; QSIMPLEQ_HEAD(, qemu_laiocb) completed; do { + if (s->io_q.in_flight >= MAX_EVENTS) { + break; + } len = 0; QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) { iocbs[len++] = &aiocb->iocb; - if (len == MAX_QUEUED_IO) { + if (s->io_q.in_flight + len >= MAX_EVENTS) { break; } } @@ -218,11 +222,12 @@ static void ioq_submit(LinuxAioState *s) abort(); } - s->io_q.n -= ret; + s->io_q.in_flight += ret; + s->io_q.in_queue -= ret; aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb); QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed); } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending)); - s->io_q.blocked = (s->io_q.n > 0); + s->io_q.blocked = (s->io_q.in_queue > 0); } void laio_io_plug(BlockDriverState *bs, LinuxAioState *s) @@ -263,9 +268,10 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next); - s->io_q.n++; + s->io_q.in_queue++; if (!s->io_q.blocked && - (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) { + (!s->io_q.plugged || + s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) { ioq_submit(s); }