From patchwork Mon Mar 23 15:45:24 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anthony Liguori X-Patchwork-Id: 13740 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n2NFjt2S013600 for ; Mon, 23 Mar 2009 15:45:56 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753773AbZCWPpy (ORCPT ); Mon, 23 Mar 2009 11:45:54 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1752353AbZCWPpx (ORCPT ); Mon, 23 Mar 2009 11:45:53 -0400 Received: from e9.ny.us.ibm.com ([32.97.182.139]:34097 "EHLO e9.ny.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753257AbZCWPpv (ORCPT ); Mon, 23 Mar 2009 11:45:51 -0400 Received: from d01relay06.pok.ibm.com (d01relay06.pok.ibm.com [9.56.227.116]) by e9.ny.us.ibm.com (8.13.1/8.13.1) with ESMTP id n2NFafYt026742 for ; Mon, 23 Mar 2009 11:36:41 -0400 Received: from d03av02.boulder.ibm.com (d03av02.boulder.ibm.com [9.17.195.168]) by d01relay06.pok.ibm.com (8.13.8/8.13.8/NCO v9.2) with ESMTP id n2NFjmiO4817054 for ; Mon, 23 Mar 2009 11:45:48 -0400 Received: from d03av02.boulder.ibm.com (loopback [127.0.0.1]) by d03av02.boulder.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id n2NFjVpL017669 for ; Mon, 23 Mar 2009 09:45:44 -0600 Received: from localhost.localdomain (sig-9-65-100-185.mts.ibm.com [9.65.100.185]) by d03av02.boulder.ibm.com (8.12.11.20060308/8.12.11) with ESMTP id n2NFjOLZ017404; Mon, 23 Mar 2009 09:45:25 -0600 From: Anthony Liguori To: qemu-devel@nongnu.org Cc: kvm@vger.kernel.org Subject: [PATCH][RFC] Linux AIO support when using O_DIRECT Date: Mon, 23 Mar 2009 10:45:24 -0500 Message-Id: <1237823124-6417-1-git-send-email-aliguori@us.ibm.com> X-Mailer: git-send-email 1.6.0.6 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org This is just a first cut. It needs a fair bit of cleanup before it can be committed. I also think we need to fixup the AIO abstractions a bit. I wanted to share though in case anyone is interested in doing some performance comparisons. It seems to work although I haven't exercised it very much. --- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/Makefile b/Makefile index 82fec80..afc6b41 100644 --- a/Makefile +++ b/Makefile @@ -61,6 +61,9 @@ else ifdef CONFIG_AIO BLOCK_OBJS += posix-aio-compat.o endif +ifdef CONFIG_LINUX_AIO +BLOCK_OBJS += linux-aio.o +endif BLOCK_OBJS += block-raw-posix.o endif diff --git a/Makefile.target b/Makefile.target index 41366ee..df2a794 100644 --- a/Makefile.target +++ b/Makefile.target @@ -514,6 +514,9 @@ else ifdef CONFIG_AIO OBJS+=posix-aio-compat.o endif +ifdef CONFIG_LINUX_AIO +OBJS+=linux-aio.o +endif OBJS+=block-raw-posix.o endif diff --git a/block-raw-posix.c b/block-raw-posix.c index 1a1a178..e355cf4 100644 --- a/block-raw-posix.c +++ b/block-raw-posix.c @@ -29,6 +29,9 @@ #ifdef CONFIG_AIO #include "posix-aio-compat.h" #endif +#ifdef CONFIG_LINUX_AIO +#include "linux-aio.h" +#endif #ifdef CONFIG_COCOA #include @@ -68,6 +71,10 @@ #include #endif +#ifdef CONFIG_LINUX_AIO +#include "linux-aio.h" +#endif + //#define DEBUG_FLOPPY //#define DEBUG_BLOCK @@ -98,6 +105,17 @@ reopen it to see if the disk has been changed */ #define FD_OPEN_TIMEOUT 1000 +typedef struct AIOOperations +{ + struct qemu_aiocb *(*get_aiocb)(void); + void (*put_aiocb)(struct qemu_aiocb *); + int (*read)(struct qemu_aiocb *); + int (*write)(struct qemu_aiocb *); + int (*error)(struct qemu_aiocb *); + ssize_t (*get_result)(struct qemu_aiocb *aiocb); + int (*cancel)(int fd, struct qemu_aiocb *aiocb); +} AIOOperations; + typedef struct BDRVRawState { int fd; int type; @@ -111,8 +129,31 @@ typedef struct BDRVRawState { int fd_media_changed; #endif uint8_t* aligned_buf; + AIOOperations *aio_ops; } BDRVRawState; +static AIOOperations posix_aio_ops = { + .get_aiocb = qemu_paio_get_aiocb, + .put_aiocb = qemu_paio_put_aiocb, + .read = qemu_paio_read, + .write = qemu_paio_write, + .error = qemu_paio_error, + .get_result = qemu_paio_return, + .cancel = qemu_paio_cancel, +}; + +#ifdef CONFIG_LINUX_AIO +static AIOOperations linux_aio_ops = { + .get_aiocb = qemu_laio_get_aiocb, + .put_aiocb = qemu_laio_put_aiocb, + .read = qemu_laio_read, + .write = qemu_laio_write, + .error = qemu_laio_error, + .get_result = qemu_laio_return, + .cancel = qemu_laio_cancel, +}; +#endif + static int posix_aio_init(void); static int fd_open(BlockDriverState *bs); @@ -124,6 +165,14 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) posix_aio_init(); +#ifdef CONFIG_LINUX_AIO + if ((flags & BDRV_O_NOCACHE)) { + qemu_laio_init(); + s->aio_ops = &linux_aio_ops; + } else +#endif + s->aio_ops = &posix_aio_ops; + s->lseek_err_cnt = 0; open_flags = O_BINARY; @@ -463,7 +512,7 @@ static int raw_write(BlockDriverState *bs, int64_t sector_num, typedef struct RawAIOCB { BlockDriverAIOCB common; - struct qemu_paiocb aiocb; + struct qemu_aiocb *aiocb; struct RawAIOCB *next; int ret; } RawAIOCB; @@ -496,19 +545,24 @@ static void posix_aio_read(void *opaque) for(;;) { pacb = &s->first_aio; for(;;) { + BDRVRawState *s; + acb = *pacb; if (!acb) goto the_end; - ret = qemu_paio_error(&acb->aiocb); + + s = acb->common.bs->opaque; + ret = s->aio_ops->error(acb->aiocb); if (ret == ECANCELED) { /* remove the request */ *pacb = acb->next; + s->aio_ops->put_aiocb(acb->aiocb); qemu_aio_release(acb); } else if (ret != EINPROGRESS) { /* end of aio */ if (ret == 0) { - ret = qemu_paio_return(&acb->aiocb); - if (ret == acb->aiocb.aio_nbytes) + ret = s->aio_ops->get_result(acb->aiocb); + if (ret == acb->aiocb->aio_nbytes) ret = 0; else ret = -EINVAL; @@ -519,6 +573,7 @@ static void posix_aio_read(void *opaque) *pacb = acb->next; /* call the callback */ acb->common.cb(acb->common.opaque, ret); + s->aio_ops->put_aiocb(acb->aiocb); qemu_aio_release(acb); break; } else { @@ -553,7 +608,6 @@ static int posix_aio_init(void) struct sigaction act; PosixAioState *s; int fds[2]; - struct qemu_paioinit ai; if (posix_aio_state) return 0; @@ -579,6 +633,8 @@ static int posix_aio_init(void) qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s); + struct qemu_paioinit ai; + memset(&ai, 0, sizeof(ai)); ai.aio_threads = 64; ai.aio_num = 64; @@ -600,16 +656,15 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs, return NULL; acb = qemu_aio_get(bs, cb, opaque); - if (!acb) - return NULL; - acb->aiocb.aio_fildes = s->fd; - acb->aiocb.ev_signo = SIGUSR2; - acb->aiocb.aio_buf = buf; + acb->aiocb = s->aio_ops->get_aiocb(); + acb->aiocb->aio_fildes = s->fd; + acb->aiocb->ev_signo = SIGUSR2; + acb->aiocb->aio_buf = buf; if (nb_sectors < 0) - acb->aiocb.aio_nbytes = -nb_sectors; + acb->aiocb->aio_nbytes = -nb_sectors; else - acb->aiocb.aio_nbytes = nb_sectors * 512; - acb->aiocb.aio_offset = sector_num * 512; + acb->aiocb->aio_nbytes = nb_sectors * 512; + acb->aiocb->aio_offset = sector_num * 512; acb->next = posix_aio_state->first_aio; posix_aio_state->first_aio = acb; return acb; @@ -618,7 +673,9 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs, static void raw_aio_em_cb(void* opaque) { RawAIOCB *acb = opaque; + BDRVRawState *s = acb->common.bs->opaque; acb->common.cb(acb->common.opaque, acb->ret); + s->aio_ops->put_aiocb(acb->aiocb); qemu_aio_release(acb); } @@ -633,7 +690,9 @@ static void raw_aio_remove(RawAIOCB *acb) fprintf(stderr, "raw_aio_remove: aio request not found!\n"); break; } else if (*pacb == acb) { + BDRVRawState *s = acb->common.bs->opaque; *pacb = acb->next; + s->aio_ops->put_aiocb(acb->aiocb); qemu_aio_release(acb); break; } @@ -656,6 +715,7 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { QEMUBH *bh; acb = qemu_aio_get(bs, cb, opaque); + acb->aiocb = s->aio_ops->get_aiocb(); acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors); bh = qemu_bh_new(raw_aio_em_cb, acb); qemu_bh_schedule(bh); @@ -665,7 +725,7 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); if (!acb) return NULL; - if (qemu_paio_read(&acb->aiocb) < 0) { + if (s->aio_ops->read(acb->aiocb) < 0) { raw_aio_remove(acb); return NULL; } @@ -687,6 +747,7 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { QEMUBH *bh; acb = qemu_aio_get(bs, cb, opaque); + acb->aiocb = s->aio_ops->get_aiocb(); acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors); bh = qemu_bh_new(raw_aio_em_cb, acb); qemu_bh_schedule(bh); @@ -696,7 +757,7 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs, acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); if (!acb) return NULL; - if (qemu_paio_write(&acb->aiocb) < 0) { + if (s->aio_ops->write(acb->aiocb) < 0) { raw_aio_remove(acb); return NULL; } @@ -707,12 +768,13 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb) { int ret; RawAIOCB *acb = (RawAIOCB *)blockacb; + BDRVRawState *s = acb->common.bs->opaque; - ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb); + ret = s->aio_ops->cancel(acb->aiocb->aio_fildes, acb->aiocb); if (ret == QEMU_PAIO_NOTCANCELED) { /* fail safe: if the aio could not be canceled, we wait for it */ - while (qemu_paio_error(&acb->aiocb) == EINPROGRESS); + while (s->aio_ops->error(acb->aiocb) == EINPROGRESS); } raw_aio_remove(acb); @@ -938,6 +1000,14 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) posix_aio_init(); +#ifdef CONFIG_LINUX_AIO + if ((flags & BDRV_O_NOCACHE)) { + qemu_laio_init(); + s->aio_ops = &linux_aio_ops; + } else +#endif + s->aio_ops = &posix_aio_ops; + #ifdef CONFIG_COCOA if (strstart(filename, "/dev/cdrom", NULL)) { kern_return_t kernResult; diff --git a/configure b/configure index 5c62c59..4913a3f 100755 --- a/configure +++ b/configure @@ -180,6 +180,7 @@ build_docs="no" uname_release="" curses="yes" aio="yes" +linuxaio="yes" nptl="yes" mixemu="no" bluez="yes" @@ -463,6 +464,8 @@ for opt do ;; --disable-aio) aio="no" ;; + --disable-linux-aio) linuxaio="no" + ;; --disable-blobs) blobs="no" ;; --kerneldir=*) kerneldir="$optarg" @@ -577,6 +580,7 @@ echo " --enable-uname-release=R Return R for uname -r in usermode emulation" echo " --sparc_cpu=V Build qemu for Sparc architecture v7, v8, v8plus, v8plusa, v9" echo " --disable-vde disable support for vde network" echo " --disable-aio disable AIO support" +echo " --disable-linux-aio disable Linux AIO support" echo " --disable-blobs disable installing provided firmware blobs" echo " --kerneldir=PATH look for kernel includes in PATH" echo "" @@ -1082,6 +1086,22 @@ EOF fi ########################################## +# linux-aio probe + +if test "$linuxaio" = "yes" ; then + linuxaio=no + cat > $TMPC < +#include +int main(void) { io_setup; io_set_eventfd; eventfd; return 0; } +EOF + if $cc $ARCH_CFLAGS -o $TMPE -laio $TMPC 2> /dev/null ; then + linuxaio=yes + AIOLIBS="$AIOLIBS -laio" + fi +fi + +########################################## # iovec probe cat > $TMPC < @@ -1204,6 +1224,7 @@ echo "uname -r $uname_release" echo "NPTL support $nptl" echo "vde support $vde" echo "AIO support $aio" +echo "Linux AIO support $linuxaio" echo "Install blobs $blobs" echo "KVM support $kvm" echo "fdt support $fdt" @@ -1500,6 +1521,10 @@ if test "$aio" = "yes" ; then echo "#define CONFIG_AIO 1" >> $config_h echo "CONFIG_AIO=yes" >> $config_mak fi +if test "$linuxaio" = "yes" ; then + echo "#define CONFIG_LINUX_AIO 1" >> $config_h + echo "CONFIG_LINUX_AIO=yes" >> $config_mak +fi if test "$blobs" = "yes" ; then echo "INSTALL_BLOBS=yes" >> $config_mak fi diff --git a/linux-aio.c b/linux-aio.c new file mode 100644 index 0000000..959407c --- /dev/null +++ b/linux-aio.c @@ -0,0 +1,207 @@ +/* QEMU linux-aio + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "linux-aio.h" +#include "sys-queue.h" +#include "osdep.h" +#include "qemu-aio.h" + +#include +#include + +#define MAX_EVENTS 64 + +struct qemu_laiocb +{ + struct qemu_aiocb common; + struct qemu_laio_state *ctx; + struct iocb iocb; + ssize_t ret; +}; + +struct qemu_laio_state +{ + int efd; + io_context_t ctx; + int count; +}; + +static struct qemu_laio_state *qemu_laio_state; + +static struct qemu_laiocb *aiocb_to_laiocb(struct qemu_aiocb *aiocb) +{ + return container_of(aiocb, struct qemu_laiocb, common); +} + +struct qemu_aiocb *qemu_laio_get_aiocb(void) +{ + struct qemu_laiocb *laiocb; + + laiocb = qemu_mallocz(sizeof(*laiocb)); + return &laiocb->common; +} + +void qemu_laio_put_aiocb(struct qemu_aiocb *aiocb) +{ + struct qemu_laiocb *laiocb = aiocb_to_laiocb(aiocb); + + qemu_free(laiocb); +} + +static void qemu_laio_completion_cb(void *opaque) +{ + struct qemu_laio_state *s = opaque; + uint64_t val; + ssize_t ret; + struct io_event events[MAX_EVENTS]; + int ev_signo = -1; + + while (1) { + struct timespec ts = { 0 }; + int nevents, i; + + do { + ret = read(s->efd, &val, sizeof(val)); + } while (ret == -1 && errno == EINTR); + + if (ret == -1 && errno == EAGAIN) + break; + + if (ret != 8) + break; + + do { + nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts); + } while (nevents == -1 && errno == EINTR); + + for (i = 0; i < nevents; i++) { + struct iocb *iocb = events[i].obj; + struct qemu_laiocb *laiocb = container_of(iocb, struct qemu_laiocb, iocb); + + laiocb->ret = (ssize_t)(((uint64_t)events[i].res2 << 32) | events[i].res); + s->count--; + ev_signo = laiocb->common.ev_signo; + } + } + + /* FIXME this is cheating */ + if (ev_signo != -1) + kill(getpid(), ev_signo); +} + +static int qemu_laio_flush_cb(void *opaque) +{ + struct qemu_laio_state *s = opaque; + + if (s->count > 0) + return 1; + + return 0; +} + +int qemu_laio_init(void) +{ + if (qemu_laio_state == NULL) { + qemu_laio_state = qemu_mallocz(sizeof(*qemu_laio_state)); + qemu_laio_state->efd = eventfd(0, 0); + if (qemu_laio_state->efd == -1) { + qemu_free(qemu_laio_state); + return -EINVAL; + } + if (io_setup(MAX_EVENTS, &qemu_laio_state->ctx) != 0) { + close(qemu_laio_state->efd); + qemu_free(qemu_laio_state); + return -EINVAL; + } + + fcntl(qemu_laio_state->efd, F_SETFL, O_NONBLOCK); + + /* FIXME we could use a separate thread to read from eventfd. */ + /* This will not generate a signal upon IO completion which means that + * the VCPU may keep spinning unless there's an IO thread. */ + qemu_aio_set_fd_handler(qemu_laio_state->efd, qemu_laio_completion_cb, + NULL, qemu_laio_flush_cb, qemu_laio_state); + } + + return 0; +} + +static int qemu_laio_submit(struct qemu_aiocb *aiocb, int is_write) +{ + struct qemu_laiocb *laiocb = aiocb_to_laiocb(aiocb); + struct iocb *iocbs = &laiocb->iocb; + + if (is_write) + io_prep_pwrite(&laiocb->iocb, aiocb->aio_fildes, aiocb->aio_buf, + aiocb->aio_nbytes, aiocb->aio_offset); + else + io_prep_pread(&laiocb->iocb, aiocb->aio_fildes, aiocb->aio_buf, + aiocb->aio_nbytes, aiocb->aio_offset); + + io_set_eventfd(&laiocb->iocb, qemu_laio_state->efd); + + laiocb->ctx = qemu_laio_state; + laiocb->ret = -EINPROGRESS; + + qemu_laio_state->count++; + + return io_submit(qemu_laio_state->ctx, 1, &iocbs); +} + +int qemu_laio_read(struct qemu_aiocb *aiocb) +{ + return qemu_laio_submit(aiocb, 0); +} + +int qemu_laio_write(struct qemu_aiocb *aiocb) +{ + return qemu_laio_submit(aiocb, 1); +} + +int qemu_laio_error(struct qemu_aiocb *aiocb) +{ + ssize_t ret = qemu_laio_return(aiocb); + + if (ret < 0) + ret = -ret; + else + ret = 0; + + return ret; +} + +ssize_t qemu_laio_return(struct qemu_aiocb *aiocb) +{ + struct qemu_laiocb *laiocb = aiocb_to_laiocb(aiocb); + + return laiocb->ret; +} + +int qemu_laio_cancel(int fd, struct qemu_aiocb *aiocb) +{ + struct qemu_laiocb *laiocb = aiocb_to_laiocb(aiocb); + struct io_event event; + int ret; + + if (laiocb->ret == -EINPROGRESS) { + ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event); + if (ret == 0) { + laiocb->ret = -ECANCELED; + ret = QEMU_PAIO_CANCELED; + } else + ret = QEMU_PAIO_NOTCANCELED; + } else + ret = QEMU_PAIO_ALLDONE; + + return ret; +} diff --git a/linux-aio.h b/linux-aio.h new file mode 100644 index 0000000..002270c --- /dev/null +++ b/linux-aio.h @@ -0,0 +1,28 @@ +/* QEMU linux-aio + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_LINUX_AIO_H +#define QEMU_LINUX_AIO_H + +#include "posix-aio-compat.h" + +struct qemu_aiocb *qemu_laio_get_aiocb(void); +void qemu_laio_put_aiocb(struct qemu_aiocb *aiocb); + +int qemu_laio_init(void); +int qemu_laio_read(struct qemu_aiocb *aiocb); +int qemu_laio_write(struct qemu_aiocb *aiocb); +int qemu_laio_error(struct qemu_aiocb *aiocb); +ssize_t qemu_laio_return(struct qemu_aiocb *aiocb); +int qemu_laio_cancel(int fd, struct qemu_aiocb *aiocb); + +#endif diff --git a/posix-aio-compat.c b/posix-aio-compat.c index 6b547f4..752001f 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -18,10 +18,24 @@ #include #include #include +#include "qemu-common.h" #include "osdep.h" #include "posix-aio-compat.h" +#include "sys-queue.h" + +struct qemu_paiocb +{ + struct qemu_aiocb common; + + /* private */ + TAILQ_ENTRY(qemu_paiocb) node; + int is_write; + ssize_t ret; + int active; +}; + static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static pthread_t thread_id; @@ -31,6 +45,11 @@ static int cur_threads = 0; static int idle_threads = 0; static TAILQ_HEAD(, qemu_paiocb) request_list; +static struct qemu_paiocb *aiocb_to_paiocb(struct qemu_aiocb *aiocb) +{ + return container_of(aiocb, struct qemu_paiocb, common); +} + static void die2(int err, const char *what) { fprintf(stderr, "%s failed: %s\n", what, strerror(err)); @@ -116,19 +135,19 @@ static void *aio_thread(void *unused) idle_threads--; mutex_unlock(&lock); - while (offset < aiocb->aio_nbytes) { + while (offset < aiocb->common.aio_nbytes) { ssize_t len; if (aiocb->is_write) - len = pwrite(aiocb->aio_fildes, - (const char *)aiocb->aio_buf + offset, - aiocb->aio_nbytes - offset, - aiocb->aio_offset + offset); + len = pwrite(aiocb->common.aio_fildes, + (const char *)aiocb->common.aio_buf + offset, + aiocb->common.aio_nbytes - offset, + aiocb->common.aio_offset + offset); else - len = pread(aiocb->aio_fildes, - (char *)aiocb->aio_buf + offset, - aiocb->aio_nbytes - offset, - aiocb->aio_offset + offset); + len = pread(aiocb->common.aio_fildes, + (char *)aiocb->common.aio_buf + offset, + aiocb->common.aio_nbytes - offset, + aiocb->common.aio_offset + offset); if (len == -1 && errno == EINTR) continue; @@ -146,7 +165,7 @@ static void *aio_thread(void *unused) idle_threads++; mutex_unlock(&lock); - if (kill(pid, aiocb->ev_signo)) die("kill failed"); + if (kill(pid, aiocb->common.ev_signo)) die("kill failed"); } idle_threads--; @@ -193,18 +212,21 @@ static int qemu_paio_submit(struct qemu_paiocb *aiocb, int is_write) return 0; } -int qemu_paio_read(struct qemu_paiocb *aiocb) +int qemu_paio_read(struct qemu_aiocb *cb) { + struct qemu_paiocb *aiocb = aiocb_to_paiocb(cb); return qemu_paio_submit(aiocb, 0); } -int qemu_paio_write(struct qemu_paiocb *aiocb) +int qemu_paio_write(struct qemu_aiocb *cb) { + struct qemu_paiocb *aiocb = aiocb_to_paiocb(cb); return qemu_paio_submit(aiocb, 1); } -ssize_t qemu_paio_return(struct qemu_paiocb *aiocb) +ssize_t qemu_paio_return(struct qemu_aiocb *cb) { + struct qemu_paiocb *aiocb = aiocb_to_paiocb(cb); ssize_t ret; mutex_lock(&lock); @@ -214,9 +236,9 @@ ssize_t qemu_paio_return(struct qemu_paiocb *aiocb) return ret; } -int qemu_paio_error(struct qemu_paiocb *aiocb) +int qemu_paio_error(struct qemu_aiocb *cb) { - ssize_t ret = qemu_paio_return(aiocb); + ssize_t ret = qemu_paio_return(cb); if (ret < 0) ret = -ret; @@ -226,8 +248,9 @@ int qemu_paio_error(struct qemu_paiocb *aiocb) return ret; } -int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb) +int qemu_paio_cancel(int fd, struct qemu_aiocb *cb) { + struct qemu_paiocb *aiocb = aiocb_to_paiocb(cb); int ret; mutex_lock(&lock); @@ -243,3 +266,18 @@ int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb) return ret; } + +struct qemu_aiocb *qemu_paio_get_aiocb(void) +{ + struct qemu_paiocb *paiocb; + + paiocb = qemu_mallocz(sizeof(*paiocb)); + return &paiocb->common; +} + +void qemu_paio_put_aiocb(struct qemu_aiocb *aiocb) +{ + struct qemu_paiocb *paiocb = aiocb_to_paiocb(aiocb); + + qemu_free(paiocb); +} diff --git a/posix-aio-compat.h b/posix-aio-compat.h index 0bc10f5..b9aa3f9 100644 --- a/posix-aio-compat.h +++ b/posix-aio-compat.h @@ -18,25 +18,17 @@ #include #include -#include "sys-queue.h" - #define QEMU_PAIO_CANCELED 0x01 #define QEMU_PAIO_NOTCANCELED 0x02 #define QEMU_PAIO_ALLDONE 0x03 -struct qemu_paiocb +struct qemu_aiocb { int aio_fildes; void *aio_buf; size_t aio_nbytes; int ev_signo; off_t aio_offset; - - /* private */ - TAILQ_ENTRY(qemu_paiocb) node; - int is_write; - ssize_t ret; - int active; }; struct qemu_paioinit @@ -46,11 +38,14 @@ struct qemu_paioinit unsigned int aio_idle_time; }; +struct qemu_aiocb *qemu_paio_get_aiocb(void); +void qemu_paio_put_aiocb(struct qemu_aiocb *aiocb); + int qemu_paio_init(struct qemu_paioinit *aioinit); -int qemu_paio_read(struct qemu_paiocb *aiocb); -int qemu_paio_write(struct qemu_paiocb *aiocb); -int qemu_paio_error(struct qemu_paiocb *aiocb); -ssize_t qemu_paio_return(struct qemu_paiocb *aiocb); -int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb); +int qemu_paio_read(struct qemu_aiocb *aiocb); +int qemu_paio_write(struct qemu_aiocb *aiocb); +int qemu_paio_error(struct qemu_aiocb *aiocb); +ssize_t qemu_paio_return(struct qemu_aiocb *aiocb); +int qemu_paio_cancel(int fd, struct qemu_aiocb *aiocb); #endif