@@ -96,7 +96,7 @@ static int ioq_submit(IOQueue *ioq)
int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
if (unlikely(rc < 0)) {
unsigned int i;
- fprintf(stderr, "io_submit io_ctx=%#lx nr=%d iovecs=%p\n", (uint64_t)ioq->io_ctx, ioq->queue_idx, ioq->queue);
+ fprintf(stderr, "io_submit failed io_ctx=%#lx nr=%d iovecs=%p rc=%d\n", (uint64_t)ioq->io_ctx, ioq->queue_idx, ioq->queue, rc);
for (i = 0; i < ioq->queue_idx; i++) {
fprintf(stderr, "[%u] type=%#x fd=%d\n", i, ioq->queue[i]->aio_lio_opcode, ioq->queue[i]->aio_fildes);
}
@@ -121,7 +121,6 @@ static int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion, void
ssize_t ret = ((uint64_t)events[i].res2 << 32) | events[i].res;
completion(events[i].obj, ret, opaque);
- ioq_put_iocb(ioq, events[i].obj);
}
return nevents;
}
@@ -9,6 +9,8 @@ typedef struct {
unsigned long sched_calls;
} IOSched;
+typedef void MergeFunc(struct iocb *a, struct iocb *b);
+
static int iocb_cmp(const void *a, const void *b)
{
const struct iocb *iocb_a = a;
@@ -29,10 +31,10 @@ static int iocb_cmp(const void *a, const void *b)
static size_t iocb_nbytes(struct iocb *iocb)
{
- struct iovec *iov = iocb->u.c.buf;
+ const struct iovec *iov = iocb->u.v.vec;
size_t nbytes = 0;
size_t i;
- for (i = 0; i < iocb->u.c.nbytes; i++) {
+ for (i = 0; i < iocb->u.v.nr; i++) {
nbytes += iov->iov_len;
iov++;
}
@@ -44,35 +46,52 @@ static void iosched_init(IOSched *iosched)
memset(iosched, 0, sizeof *iosched);
}
-static void iosched_print_stats(IOSched *iosched)
+static __attribute__((unused)) void iosched_print_stats(IOSched *iosched)
{
fprintf(stderr, "iocbs = %lu merges = %lu sched_calls = %lu\n",
iosched->iocbs, iosched->merges, iosched->sched_calls);
memset(iosched, 0, sizeof *iosched);
}
-static void iosched(IOSched *iosched, struct iocb *unsorted[], unsigned int count)
+static void iosched(IOSched *iosched, struct iocb *unsorted[], unsigned int *count, MergeFunc merge_func)
{
- struct iocb *sorted[count];
- struct iocb *last;
- unsigned int i;
+ struct iocb *sorted[*count];
+ unsigned int merges = 0;
+ unsigned int i, j;
+ /*
if ((++iosched->sched_calls % 1000) == 0) {
iosched_print_stats(iosched);
}
+ */
+
+ if (!*count) {
+ return;
+ }
memcpy(sorted, unsorted, sizeof sorted);
- qsort(sorted, count, sizeof sorted[0], iocb_cmp);
-
- iosched->iocbs += count;
- last = sorted[0];
- for (i = 1; i < count; i++) {
- if (last->aio_lio_opcode == sorted[i]->aio_lio_opcode &&
- last->u.c.offset + iocb_nbytes(last) == sorted[i]->u.c.offset) {
- iosched->merges++;
+ qsort(sorted, *count, sizeof sorted[0], iocb_cmp);
+
+ unsorted[0] = sorted[0];
+ j = 1;
+ for (i = 1; i < *count; i++) {
+ struct iocb *last = sorted[i - 1];
+ struct iocb *cur = sorted[i];
+
+ if (last->aio_lio_opcode == cur->aio_lio_opcode &&
+ last->u.c.offset + iocb_nbytes(last) == cur->u.c.offset) {
+ merge_func(last, cur);
+ merges++;
+
+ unsorted[j - 1] = cur;
+ } else {
+ unsorted[j++] = cur;
}
- last = sorted[i];
}
+
+ iosched->merges += merges;
+ iosched->iocbs += *count;
+ *count = j;
}
#endif /* IOSCHED_H */
@@ -29,7 +29,7 @@ static inline void *phys_to_host(Vring *vring, target_phys_addr_t phys)
if (phys >= 0x100000000) {
phys -= 0x100000000 - 0xe0000000;
} else if (phys >= 0xe0000000) {
- fprintf(stderr, "phys_to_host bad physical address in PCI range %#lx\n", phys);
+ fprintf(stderr, "phys_to_host bad physical address in PCI range %#lx\n", (unsigned long)phys);
exit(1);
}
return vring->phys_mem_zero_host_ptr + phys;
@@ -65,7 +65,7 @@ static void vring_setup(Vring *vring, VirtIODevice *vdev, int n)
vring->last_used_idx = 0;
fprintf(stderr, "vring physical=%#lx desc=%p avail=%p used=%p\n",
- virtio_queue_get_ring_addr(vdev, n),
+ (unsigned long)virtio_queue_get_ring_addr(vdev, n),
vring->vr.desc, vring->vr.avail, vring->vr.used);
}
@@ -33,11 +33,29 @@ enum {
* VRING_MAX with indirect descriptors */
};
-typedef struct {
+/** I/O request
+ *
+ * Most I/O requests need to know the vring index (head) and the completion
+ * status byte that must be filled in to tell the guest whether or not the
+ * request succeeded.
+ *
+ * The iovec array pointed to by the iocb is valid only before ioq_submit() is
+ * called. After that, neither the kernel nor userspace needs to access the
+ * iovec anymore and the memory is no longer owned by this VirtIOBlockRequest.
+ *
+ * Requests can be merged together by the I/O scheduler. When this happens,
+ * the next_merged field is used to link the requests and only the first
+ * request's iocb is used. Merged requests require memory allocation for the
+ * iovec array and must be freed appropriately.
+ */
+typedef struct VirtIOBlockRequest VirtIOBlockRequest;
+struct VirtIOBlockRequest {
struct iocb iocb; /* Linux AIO control block */
unsigned char *status; /* virtio block status code */
unsigned int head; /* vring descriptor index */
-} VirtIOBlockRequest;
+ int len; /* number of I/O bytes, only used for merged reqs */
+ VirtIOBlockRequest *next_merged;/* next merged iocb or NULL */
+};
typedef struct {
VirtIODevice vdev;
@@ -93,15 +111,17 @@ static void virtio_blk_notify_guest(VirtIOBlock *s)
event_notifier_set(virtio_queue_get_guest_notifier(s->vq));
}
-static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
+static void complete_one_request(VirtIOBlockRequest *req, VirtIOBlock *s, ssize_t ret)
{
- VirtIOBlock *s = opaque;
- VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
int len;
if (likely(ret >= 0)) {
*req->status = VIRTIO_BLK_S_OK;
- len = ret;
+
+ /* Merged requests know their part of the length, single requests can
+ * just use the return value.
+ */
+ len = unlikely(req->len) ? req->len : ret;
} else {
*req->status = VIRTIO_BLK_S_IOERR;
len = 0;
@@ -114,6 +134,59 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
vring_push(&s->vring, req->head, len + sizeof req->status);
}
+static bool is_request_merged(VirtIOBlockRequest *req)
+{
+ return req->next_merged;
+}
+
+static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
+{
+ VirtIOBlock *s = opaque;
+ VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+
+ /* Free the iovec now, it isn't needed */
+ if (unlikely(is_request_merged(req))) {
+ g_free((void*)iocb->u.v.vec);
+ }
+
+ while (req) {
+ complete_one_request(req, s, ret);
+
+ VirtIOBlockRequest *next = req->next_merged;
+ ioq_put_iocb(&s->ioqueue, &req->iocb);
+ req = next;
+ }
+}
+
+static void merge_request(struct iocb *iocb_a, struct iocb *iocb_b)
+{
+ /* Repeated merging could be made more efficient using realloc, but this
+ * approach keeps it simple. */
+
+ VirtIOBlockRequest *req_a = container_of(iocb_a, VirtIOBlockRequest, iocb);
+ VirtIOBlockRequest *req_b = container_of(iocb_b, VirtIOBlockRequest, iocb);
+ struct iovec *iovec = g_malloc((iocb_a->u.v.nr + iocb_b->u.v.nr) * sizeof iovec[0]);
+
+ memcpy(iovec, iocb_a->u.v.vec, iocb_a->u.v.nr * sizeof iovec[0]);
+ memcpy(iovec + iocb_a->u.v.nr, iocb_b->u.v.vec, iocb_b->u.v.nr * sizeof iovec[0]);
+
+ if (is_request_merged(req_a)) {
+ /* Free the old merged iovec */
+ g_free((void*)iocb_a->u.v.vec);
+ } else {
+ /* Stash the request length */
+ req_a->len = iocb_nbytes(iocb_a);
+ }
+
+ iocb_b->u.v.vec = iovec;
+ req_b->len = iocb_nbytes(iocb_b);
+ req_b->next_merged = req_a;
+ /*
+ fprintf(stderr, "merged %p (%u) and %p (%u), %u iovecs in total\n",
+ req_a, iocb_a->u.v.nr, req_b, iocb_b->u.v.nr, iocb_a->u.v.nr + iocb_b->u.v.nr);
+ */
+}
+
static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_num, unsigned int in_num, unsigned int head)
{
/* Virtio block requests look like this: */
@@ -187,6 +260,8 @@ static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_n
VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
req->head = head;
req->status = &inhdr->status;
+ req->len = 0;
+ req->next_merged = NULL;
}
static bool handle_notify(EventHandler *handler)
@@ -251,7 +326,7 @@ static bool handle_notify(EventHandler *handler)
}
}
- iosched(&s->iosched, s->ioqueue.queue, s->ioqueue.queue_idx);
+ iosched(&s->iosched, s->ioqueue.queue, &s->ioqueue.queue_idx, merge_request);
/* Submit requests, if any */
int rc = ioq_submit(&s->ioqueue);
@@ -298,7 +373,7 @@ static void data_plane_start(VirtIOBlock *s)
/* Set up guest notifier (irq) */
if (s->vdev.binding->set_guest_notifiers(s->vdev.binding_opaque, true) != 0) {
- fprintf(stderr, "virtio-blk failed to set guest notifier, ensure -enable-kvm is set\n");
+ fprintf(stderr, "virtio-blk failed to set guest notifier\n");
exit(1);
}
@@ -306,7 +381,7 @@ static void data_plane_start(VirtIOBlock *s)
/* Set up virtqueue notify */
if (s->vdev.binding->set_host_notifier(s->vdev.binding_opaque, 0, true) != 0) {
- fprintf(stderr, "virtio-blk failed to set host notifier\n");
+ fprintf(stderr, "virtio-blk failed to set host notifier, ensure -enable-kvm is set\n");
exit(1);
}
event_poll_add(&s->event_poll, &s->notify_handler,