diff mbox

[RFC,v9,21/27] virtio-blk: Add basic request merging

Message ID 1342624074-24650-22-git-send-email-stefanha@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefan Hajnoczi July 18, 2012, 3:07 p.m. UTC
This commit adds an I/O scheduler that sorts requests and merges
adjacent requests if they have the same operation type (read/write).
The code is ugly and not very well factored but it does merge
successfully.
---
 hw/dataplane/ioq.h     |    3 +-
 hw/dataplane/iosched.h |   51 +++++++++++++++++---------
 hw/dataplane/vring.h   |    4 +--
 hw/virtio-blk.c        |   93 +++++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 122 insertions(+), 29 deletions(-)
diff mbox

Patch

diff --git a/hw/dataplane/ioq.h b/hw/dataplane/ioq.h
index d1545d6..72e5fd6 100644
--- a/hw/dataplane/ioq.h
+++ b/hw/dataplane/ioq.h
@@ -96,7 +96,7 @@  static int ioq_submit(IOQueue *ioq)
     int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
     if (unlikely(rc < 0)) {
         unsigned int i;
-        fprintf(stderr, "io_submit io_ctx=%#lx nr=%d iovecs=%p\n", (uint64_t)ioq->io_ctx, ioq->queue_idx, ioq->queue);
+        fprintf(stderr, "io_submit failed io_ctx=%#lx nr=%d iovecs=%p rc=%d\n", (uint64_t)ioq->io_ctx, ioq->queue_idx, ioq->queue, rc);
         for (i = 0; i < ioq->queue_idx; i++) {
             fprintf(stderr, "[%u] type=%#x fd=%d\n", i, ioq->queue[i]->aio_lio_opcode, ioq->queue[i]->aio_fildes);
         }
@@ -121,7 +121,6 @@  static int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion, void
         ssize_t ret = ((uint64_t)events[i].res2 << 32) | events[i].res;
 
         completion(events[i].obj, ret, opaque);
-        ioq_put_iocb(ioq, events[i].obj);
     }
     return nevents;
 }
diff --git a/hw/dataplane/iosched.h b/hw/dataplane/iosched.h
index 12ebccc..39da73c 100644
--- a/hw/dataplane/iosched.h
+++ b/hw/dataplane/iosched.h
@@ -9,6 +9,8 @@  typedef struct {
     unsigned long sched_calls;
 } IOSched;
 
+typedef void MergeFunc(struct iocb *a, struct iocb *b);
+
 static int iocb_cmp(const void *a, const void *b)
 {
     const struct iocb *iocb_a = a;
@@ -29,10 +31,10 @@  static int iocb_cmp(const void *a, const void *b)
 
 static size_t iocb_nbytes(struct iocb *iocb)
 {
-    struct iovec *iov = iocb->u.c.buf;
+    const struct iovec *iov = iocb->u.v.vec;
     size_t nbytes = 0;
     size_t i;
-    for (i = 0; i < iocb->u.c.nbytes; i++) {
+    for (i = 0; i < iocb->u.v.nr; i++) {
         nbytes += iov->iov_len;
         iov++;
     }
@@ -44,35 +46,52 @@  static void iosched_init(IOSched *iosched)
     memset(iosched, 0, sizeof *iosched);
 }
 
-static void iosched_print_stats(IOSched *iosched)
+static __attribute__((unused)) void iosched_print_stats(IOSched *iosched)
 {
     fprintf(stderr, "iocbs = %lu merges = %lu sched_calls = %lu\n",
             iosched->iocbs, iosched->merges, iosched->sched_calls);
     memset(iosched, 0, sizeof *iosched);
 }
 
-static void iosched(IOSched *iosched, struct iocb *unsorted[], unsigned int count)
+static void iosched(IOSched *iosched, struct iocb *unsorted[], unsigned int *count, MergeFunc merge_func)
 {
-    struct iocb *sorted[count];
-    struct iocb *last;
-    unsigned int i;
+    struct iocb *sorted[*count];
+    unsigned int merges = 0;
+    unsigned int i, j;
 
+    /*
     if ((++iosched->sched_calls % 1000) == 0) {
         iosched_print_stats(iosched);
     }
+    */
+
+    if (!*count) {
+        return;
+    }
 
     memcpy(sorted, unsorted, sizeof sorted);
-    qsort(sorted, count, sizeof sorted[0], iocb_cmp);
-
-    iosched->iocbs += count;
-    last = sorted[0];
-    for (i = 1; i < count; i++) {
-        if (last->aio_lio_opcode == sorted[i]->aio_lio_opcode &&
-            last->u.c.offset + iocb_nbytes(last) == sorted[i]->u.c.offset) {
-            iosched->merges++;
+    qsort(sorted, *count, sizeof sorted[0], iocb_cmp);
+
+    unsorted[0] = sorted[0];
+    j = 1;
+    for (i = 1; i < *count; i++) {
+        struct iocb *last = sorted[i - 1];
+        struct iocb *cur = sorted[i];
+
+        if (last->aio_lio_opcode == cur->aio_lio_opcode &&
+            last->u.c.offset + iocb_nbytes(last) == cur->u.c.offset) {
+            merge_func(last, cur);
+            merges++;
+
+            unsorted[j - 1] = cur;
+        } else {
+            unsorted[j++] = cur;
         }
-        last = sorted[i];
     }
+
+    iosched->merges += merges;
+    iosched->iocbs += *count;
+    *count = j;
 }
 
 #endif /* IOSCHED_H */
diff --git a/hw/dataplane/vring.h b/hw/dataplane/vring.h
index cdd4d4a..bbf8c86 100644
--- a/hw/dataplane/vring.h
+++ b/hw/dataplane/vring.h
@@ -29,7 +29,7 @@  static inline void *phys_to_host(Vring *vring, target_phys_addr_t phys)
     if (phys >= 0x100000000) {
         phys -= 0x100000000 - 0xe0000000;
     } else if (phys >= 0xe0000000) {
-        fprintf(stderr, "phys_to_host bad physical address in PCI range %#lx\n", phys);
+        fprintf(stderr, "phys_to_host bad physical address in PCI range %#lx\n", (unsigned long)phys);
         exit(1);
     }
     return vring->phys_mem_zero_host_ptr + phys;
@@ -65,7 +65,7 @@  static void vring_setup(Vring *vring, VirtIODevice *vdev, int n)
     vring->last_used_idx = 0;
 
     fprintf(stderr, "vring physical=%#lx desc=%p avail=%p used=%p\n",
-            virtio_queue_get_ring_addr(vdev, n),
+            (unsigned long)virtio_queue_get_ring_addr(vdev, n),
             vring->vr.desc, vring->vr.avail, vring->vr.used);
 }
 
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 75cb0f2..9131a7a 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -33,11 +33,29 @@  enum {
                                      * VRING_MAX with indirect descriptors */
 };
 
-typedef struct {
+/** I/O request
+ *
+ * Most I/O requests need to know the vring index (head) and the completion
+ * status byte that must be filled in to tell the guest whether or not the
+ * request succeeded.
+ *
+ * The iovec array pointed to by the iocb is valid only before ioq_submit() is
+ * called.  After that, neither the kernel nor userspace needs to access the
+ * iovec anymore and the memory is no longer owned by this VirtIOBlockRequest.
+ *
+ * Requests can be merged together by the I/O scheduler.  When this happens,
+ * the next_merged field is used to link the requests and only the first
+ * request's iocb is used.  Merged requests require memory allocation for the
+ * iovec array and must be freed appropriately.
+ */
+typedef struct VirtIOBlockRequest VirtIOBlockRequest;
+struct VirtIOBlockRequest {
     struct iocb iocb;               /* Linux AIO control block */
     unsigned char *status;          /* virtio block status code */
     unsigned int head;              /* vring descriptor index */
-} VirtIOBlockRequest;
+    int len;                        /* number of I/O bytes, only used for merged reqs */
+    VirtIOBlockRequest *next_merged;/* next merged iocb or NULL */
+};
 
 typedef struct {
     VirtIODevice vdev;
@@ -93,15 +111,17 @@  static void virtio_blk_notify_guest(VirtIOBlock *s)
     event_notifier_set(virtio_queue_get_guest_notifier(s->vq));
 }
 
-static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
+static void complete_one_request(VirtIOBlockRequest *req, VirtIOBlock *s, ssize_t ret)
 {
-    VirtIOBlock *s = opaque;
-    VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
     int len;
 
     if (likely(ret >= 0)) {
         *req->status = VIRTIO_BLK_S_OK;
-        len = ret;
+
+        /* Merged requests know their part of the length, single requests can
+         * just use the return value.
+         */
+        len = unlikely(req->len) ? req->len : ret;
     } else {
         *req->status = VIRTIO_BLK_S_IOERR;
         len = 0;
@@ -114,6 +134,59 @@  static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
     vring_push(&s->vring, req->head, len + sizeof req->status);
 }
 
+static bool is_request_merged(VirtIOBlockRequest *req)
+{
+    return req->next_merged;
+}
+
+static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
+{
+    VirtIOBlock *s = opaque;
+    VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+
+    /* Free the iovec now, it isn't needed */
+    if (unlikely(is_request_merged(req))) {
+        g_free((void*)iocb->u.v.vec);
+    }
+
+    while (req) {
+        complete_one_request(req, s, ret);
+
+        VirtIOBlockRequest *next = req->next_merged;
+        ioq_put_iocb(&s->ioqueue, &req->iocb);
+        req = next;
+    }
+}
+
+static void merge_request(struct iocb *iocb_a, struct iocb *iocb_b)
+{
+    /* Repeated merging could be made more efficient using realloc, but this
+     * approach keeps it simple. */
+
+    VirtIOBlockRequest *req_a = container_of(iocb_a, VirtIOBlockRequest, iocb);
+    VirtIOBlockRequest *req_b = container_of(iocb_b, VirtIOBlockRequest, iocb);
+    struct iovec *iovec = g_malloc((iocb_a->u.v.nr + iocb_b->u.v.nr) * sizeof iovec[0]);
+
+    memcpy(iovec, iocb_a->u.v.vec, iocb_a->u.v.nr * sizeof iovec[0]);
+    memcpy(iovec + iocb_a->u.v.nr, iocb_b->u.v.vec, iocb_b->u.v.nr * sizeof iovec[0]);
+
+    if (is_request_merged(req_a)) {
+        /* Free the old merged iovec */
+        g_free((void*)iocb_a->u.v.vec);
+    } else {
+        /* Stash the request length */
+        req_a->len = iocb_nbytes(iocb_a);
+    }
+
+    iocb_b->u.v.vec = iovec;
+    req_b->len = iocb_nbytes(iocb_b);
+    req_b->next_merged = req_a;
+    /*
+    fprintf(stderr, "merged %p (%u) and %p (%u), %u iovecs in total\n",
+            req_a, iocb_a->u.v.nr, req_b, iocb_b->u.v.nr, iocb_a->u.v.nr + iocb_b->u.v.nr);
+    */
+}
+
 static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_num, unsigned int in_num, unsigned int head)
 {
     /* Virtio block requests look like this: */
@@ -187,6 +260,8 @@  static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_n
     VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
     req->head = head;
     req->status = &inhdr->status;
+    req->len = 0;
+    req->next_merged = NULL;
 }
 
 static bool handle_notify(EventHandler *handler)
@@ -251,7 +326,7 @@  static bool handle_notify(EventHandler *handler)
         }
     }
 
-    iosched(&s->iosched, s->ioqueue.queue, s->ioqueue.queue_idx);
+    iosched(&s->iosched, s->ioqueue.queue, &s->ioqueue.queue_idx, merge_request);
 
     /* Submit requests, if any */
     int rc = ioq_submit(&s->ioqueue);
@@ -298,7 +373,7 @@  static void data_plane_start(VirtIOBlock *s)
 
     /* Set up guest notifier (irq) */
     if (s->vdev.binding->set_guest_notifiers(s->vdev.binding_opaque, true) != 0) {
-        fprintf(stderr, "virtio-blk failed to set guest notifier, ensure -enable-kvm is set\n");
+        fprintf(stderr, "virtio-blk failed to set guest notifier\n");
         exit(1);
     }
 
@@ -306,7 +381,7 @@  static void data_plane_start(VirtIOBlock *s)
 
     /* Set up virtqueue notify */
     if (s->vdev.binding->set_host_notifier(s->vdev.binding_opaque, 0, true) != 0) {
-        fprintf(stderr, "virtio-blk failed to set host notifier\n");
+        fprintf(stderr, "virtio-blk failed to set host notifier, ensure -enable-kvm is set\n");
         exit(1);
     }
     event_poll_add(&s->event_poll, &s->notify_handler,