diff mbox

[RFC,v9,07/27] virtio-blk: Put dataplane code into its own directory

Message ID 1342624074-24650-8-git-send-email-stefanha@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefan Hajnoczi July 18, 2012, 3:07 p.m. UTC
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 hw/dataplane/event-poll.h |   79 +++++++++++++++++++
 hw/dataplane/vring.h      |  191 +++++++++++++++++++++++++++++++++++++++++++++
 hw/virtio-blk.c           |  149 ++++++++---------------------------
 3 files changed, 304 insertions(+), 115 deletions(-)
 create mode 100644 hw/dataplane/event-poll.h
 create mode 100644 hw/dataplane/vring.h
diff mbox

Patch

diff --git a/hw/dataplane/event-poll.h b/hw/dataplane/event-poll.h
new file mode 100644
index 0000000..f38e969
--- /dev/null
+++ b/hw/dataplane/event-poll.h
@@ -0,0 +1,79 @@ 
+#ifndef EVENT_POLL_H
+#define EVENT_POLL_H
+
+#include <sys/epoll.h>
+#include "event_notifier.h"
+
+typedef struct EventHandler EventHandler;
+typedef void EventCallback(EventHandler *handler);
+struct EventHandler
+{
+    EventNotifier *notifier;    /* eventfd */
+    EventCallback *callback;    /* callback function */
+};
+
+typedef struct {
+    int epoll_fd;               /* epoll(2) file descriptor */
+} EventPoll;
+
+static void event_poll_init(EventPoll *poll)
+{
+    /* Create epoll file descriptor */
+    poll->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+    if (poll->epoll_fd < 0) {
+        fprintf(stderr, "epoll_create1 failed: %m\n");
+        exit(1);
+    }
+}
+
+static void event_poll_cleanup(EventPoll *poll)
+{
+    close(poll->epoll_fd);
+    poll->epoll_fd = -1;
+}
+
+/* Add an event notifier and its callback for polling */
+static void event_poll_add(EventPoll *poll, EventHandler *handler, EventNotifier *notifier, EventCallback *callback)
+{
+    struct epoll_event event = {
+        .events = EPOLLIN,
+        .data.ptr = handler,
+    };
+    handler->notifier = notifier;
+    handler->callback = callback;
+    if (epoll_ctl(poll->epoll_fd, EPOLL_CTL_ADD, event_notifier_get_fd(notifier), &event) != 0) {
+        fprintf(stderr, "failed to add event handler to epoll: %m\n");
+        exit(1);
+    }
+}
+
+/* Block until the next event and invoke its callback
+ *
+ * Signals must be masked, EINTR should never happen.  This is true for QEMU
+ * threads.
+ */
+static void event_poll(EventPoll *poll)
+{
+    EventHandler *handler;
+    struct epoll_event event;
+    int nevents;
+
+    /* Wait for the next event.  Only do one event per call to keep the
+     * function simple, this could be changed later. */
+    nevents = epoll_wait(poll->epoll_fd, &event, 1, -1);
+    if (unlikely(nevents != 1)) {
+        fprintf(stderr, "epoll_wait failed: %m\n");
+        exit(1); /* should never happen */
+    }
+
+    /* Find out which event handler has become active */
+    handler = event.data.ptr;
+
+    /* Clear the eventfd */
+    event_notifier_test_and_clear(handler->notifier);
+
+    /* Handle the event */
+    handler->callback(handler);
+}
+
+#endif /* EVENT_POLL_H */
diff --git a/hw/dataplane/vring.h b/hw/dataplane/vring.h
new file mode 100644
index 0000000..7099a99
--- /dev/null
+++ b/hw/dataplane/vring.h
@@ -0,0 +1,191 @@ 
+#ifndef VRING_H
+#define VRING_H
+
+#include <linux/virtio_ring.h>
+#include "qemu-common.h"
+
+typedef struct {
+    void *phys_mem_zero_host_ptr;   /* host pointer to guest RAM */
+    struct vring vr;                /* virtqueue vring mapped to host memory */
+    __u16 last_avail_idx;           /* last processed avail ring index */
+    __u16 last_used_idx;            /* last processed used ring index */
+} Vring;
+
+static inline unsigned int vring_get_num(Vring *vring)
+{
+    return vring->vr.num;
+}
+
+/* Map target physical address to host address
+ */
+static inline void *phys_to_host(Vring *vring, target_phys_addr_t phys)
+{
+    /* Adjust for 3.6-4 GB PCI memory range */
+    if (phys >= 0x100000000) {
+        phys -= 0x100000000 - 0xe0000000;
+    } else if (phys >= 0xe0000000) {
+        fprintf(stderr, "phys_to_host bad physical address in PCI range %#lx\n", phys);
+        exit(1);
+    }
+    return vring->phys_mem_zero_host_ptr + phys;
+}
+
+/* Setup for cheap target physical to host address conversion
+ *
+ * This is a hack for direct access to guest memory, we're not really allowed
+ * to do this.
+ */
+static void setup_phys_to_host(Vring *vring)
+{
+    target_phys_addr_t len = 4096; /* RAM is really much larger but we cheat */
+    vring->phys_mem_zero_host_ptr = cpu_physical_memory_map(0, &len, 0);
+    if (!vring->phys_mem_zero_host_ptr) {
+        fprintf(stderr, "setup_phys_to_host failed\n");
+        exit(1);
+    }
+}
+
+/* Map the guest's vring to host memory
+ *
+ * This is not allowed but we know the ring won't move.
+ */
+static void vring_setup(Vring *vring, VirtIODevice *vdev, int n)
+{
+    setup_phys_to_host(vring);
+
+    vring_init(&vring->vr, virtio_queue_get_num(vdev, n),
+               phys_to_host(vring, virtio_queue_get_ring_addr(vdev, n)), 4096);
+
+    vring->last_avail_idx = vring->vr.avail->idx;
+    vring->last_used_idx = vring->vr.used->idx;
+
+    fprintf(stderr, "vring physical=%#lx desc=%p avail=%p used=%p\n",
+            virtio_queue_get_ring_addr(vdev, n),
+            vring->vr.desc, vring->vr.avail, vring->vr.used);
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access.  Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found.  A negative code is
+ * returned on error.
+ *
+ * Stolen from linux-2.6/drivers/vhost/vhost.c.
+ */
+static unsigned int vring_pop(Vring *vring,
+		      struct iovec iov[], unsigned int iov_size,
+		      unsigned int *out_num, unsigned int *in_num)
+{
+	struct vring_desc desc;
+	unsigned int i, head, found = 0, num = vring->vr.num;
+    __u16 avail_idx, last_avail_idx;
+
+	/* Check it isn't doing very strange things with descriptor numbers. */
+	last_avail_idx = vring->last_avail_idx;
+    avail_idx = vring->vr.avail->idx;
+
+	if (unlikely((__u16)(avail_idx - last_avail_idx) > num)) {
+		fprintf(stderr, "Guest moved used index from %u to %u\n",
+		        last_avail_idx, avail_idx);
+		exit(1);
+	}
+
+	/* If there's nothing new since last we looked, return invalid. */
+	if (avail_idx == last_avail_idx)
+		return num;
+
+	/* Only get avail ring entries after they have been exposed by guest. */
+	__sync_synchronize(); /* smp_rmb() */
+
+	/* Grab the next descriptor number they're advertising, and increment
+	 * the index we've seen. */
+	head = vring->vr.avail->ring[last_avail_idx % num];
+
+	/* If their number is silly, that's an error. */
+	if (unlikely(head >= num)) {
+		fprintf(stderr, "Guest says index %u > %u is available\n",
+		        head, num);
+		exit(1);
+	}
+
+	/* When we start there are none of either input nor output. */
+	*out_num = *in_num = 0;
+
+	i = head;
+	do {
+		if (unlikely(i >= num)) {
+			fprintf(stderr, "Desc index is %u > %u, head = %u\n",
+			        i, num, head);
+            exit(1);
+		}
+		if (unlikely(++found > num)) {
+			fprintf(stderr, "Loop detected: last one at %u "
+			        "vq size %u head %u\n",
+			        i, num, head);
+            exit(1);
+		}
+        desc = vring->vr.desc[i];
+		if (desc.flags & VRING_DESC_F_INDIRECT) {
+/*			ret = get_indirect(dev, vq, iov, iov_size,
+					   out_num, in_num,
+					   log, log_num, &desc);
+			if (unlikely(ret < 0)) {
+				vq_err(vq, "Failure detected "
+				       "in indirect descriptor at idx %d\n", i);
+				return ret;
+			}
+			continue; */
+            fprintf(stderr, "virtio-blk indirect vring not supported\n");
+            exit(1);
+		}
+
+        iov->iov_base = phys_to_host(vring, desc.addr);
+        iov->iov_len  = desc.len;
+        iov++;
+
+		if (desc.flags & VRING_DESC_F_WRITE) {
+			/* If this is an input descriptor,
+			 * increment that count. */
+			*in_num += 1;
+		} else {
+			/* If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors. */
+			if (unlikely(*in_num)) {
+				fprintf(stderr, "Descriptor has out after in: "
+				        "idx %d\n", i);
+                exit(1);
+			}
+			*out_num += 1;
+		}
+        i = desc.next;
+	} while (desc.flags & VRING_DESC_F_NEXT);
+
+	/* On success, increment avail index. */
+	vring->last_avail_idx++;
+	return head;
+}
+
+/* After we've used one of their buffers, we tell them about it.
+ *
+ * Stolen from linux-2.6/drivers/vhost/vhost.c.
+ */
+static __attribute__((unused)) void vring_push(Vring *vring, unsigned int head, int len)
+{
+	struct vring_used_elem *used;
+
+	/* The virtqueue contains a ring of used buffers.  Get a pointer to the
+	 * next entry in that used ring. */
+	used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num];
+    used->id = head;
+    used->len = len;
+
+	/* Make sure buffer is written before we update index. */
+	__sync_synchronize(); /* smp_wmb() */
+
+    vring->vr.used->idx = ++vring->last_used_idx;
+}
+
+#endif /* VRING_H */
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 99654f1..2c1cce8 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -11,26 +11,21 @@ 
  *
  */
 
-#include <sys/epoll.h>
-#include <sys/eventfd.h>
 #include <libaio.h>
-#include <linux/virtio_ring.h>
 #include "qemu-common.h"
 #include "qemu-thread.h"
 #include "qemu-error.h"
 #include "blockdev.h"
 #include "virtio-blk.h"
+#include "hw/dataplane/event-poll.h"
+#include "hw/dataplane/vring.h"
+#include "kvm.h"
 
 enum {
-    SEG_MAX = 126, /* maximum number of I/O segments */
+    SEG_MAX = 126,                  /* maximum number of I/O segments */
+    VRING_MAX = SEG_MAX + 2,        /* maximum number of vring descriptors */
 };
 
-typedef struct
-{
-    EventNotifier *notifier;        /* eventfd */
-    void (*handler)(void);          /* handler function */
-} EventHandler;
-
 typedef struct VirtIOBlock
 {
     VirtIODevice vdev;
@@ -44,15 +39,13 @@  typedef struct VirtIOBlock
     bool data_plane_started;
     QemuThread data_plane_thread;
 
-    struct vring vring;
+    Vring vring;                    /* virtqueue vring */
 
-    int epoll_fd;                   /* epoll(2) file descriptor */
+    EventPoll event_poll;           /* event poller */
     io_context_t io_ctx;            /* Linux AIO context */
     EventNotifier io_notifier;      /* Linux AIO eventfd */
     EventHandler io_handler;        /* Linux AIO completion handler */
     EventHandler notify_handler;    /* virtqueue notify handler */
-
-    void *phys_mem_zero_host_ptr;   /* host pointer to guest RAM */
 } VirtIOBlock;
 
 static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
@@ -60,138 +53,64 @@  static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
     return (VirtIOBlock *)vdev;
 }
 
-/* Map target physical address to host address
- */
-static inline void *phys_to_host(VirtIOBlock *s, target_phys_addr_t phys)
+static void handle_io(EventHandler *handler)
 {
-    /* Adjust for 3.6-4 GB PCI memory range */
-    if (phys >= 0x100000000) {
-        phys -= 0x100000000 - 0xe0000000;
-    } else if (phys >= 0xe0000000) {
-        fprintf(stderr, "phys_to_host bad physical address in PCI range %#lx\n", phys);
-        exit(1);
-    }
-    return s->phys_mem_zero_host_ptr + phys;
+    fprintf(stderr, "io completion happened\n");
 }
 
-/* Setup for cheap target physical to host address conversion
- *
- * This is a hack for direct access to guest memory, we're not really allowed
- * to do this.
- */
-static void setup_phys_to_host(VirtIOBlock *s)
+static void handle_notify(EventHandler *handler)
 {
-    target_phys_addr_t len = 4096; /* RAM is really much larger but we cheat */
-    s->phys_mem_zero_host_ptr = cpu_physical_memory_map(0, &len, 0);
-    if (!s->phys_mem_zero_host_ptr) {
-        fprintf(stderr, "setup_phys_to_host failed\n");
-        exit(1);
+    VirtIOBlock *s = container_of(handler, VirtIOBlock, notify_handler);
+    struct iovec iov[VRING_MAX];
+    unsigned int out_num, in_num;
+    int head;
+
+    head = vring_pop(&s->vring, iov, ARRAY_SIZE(iov), &out_num, &in_num);
+    if (unlikely(head >= vring_get_num(&s->vring))) {
+        fprintf(stderr, "false alarm, nothing on vring\n");
+        return;
     }
-}
 
-/* Map the guest's vring to host memory
- *
- * This is not allowed but we know the ring won't move.
- */
-static void map_vring(struct vring *vring, VirtIOBlock *s, VirtIODevice *vdev, int n)
-{
-    vring->num = virtio_queue_get_num(vdev, n);
-    vring->desc = phys_to_host(s, virtio_queue_get_desc_addr(vdev, n));
-    vring->avail = phys_to_host(s, virtio_queue_get_avail_addr(vdev, n));
-    vring->used = phys_to_host(s, virtio_queue_get_used_addr(vdev, n));
-
-    fprintf(stderr, "virtio-blk vring physical=%#lx desc=%p avail=%p used=%p\n",
-            virtio_queue_get_ring_addr(vdev, n),
-            vring->desc, vring->avail, vring->used);
-}
-
-static void handle_io(void)
-{
-    fprintf(stderr, "io completion happened\n");
-}
-
-static void handle_notify(void)
-{
-    fprintf(stderr, "virtqueue notify happened\n");
+    fprintf(stderr, "head=%u out_num=%u in_num=%u\n", head, out_num, in_num);
 }
 
 static void *data_plane_thread(void *opaque)
 {
     VirtIOBlock *s = opaque;
-    struct epoll_event event;
-    int nevents;
-    EventHandler *event_handler;
-
-    /* Signals are masked, EINTR should never happen */
 
     for (;;) {
-        /* Wait for the next event.  Only do one event per call to keep the
-         * function simple, this could be changed later. */
-        nevents = epoll_wait(s->epoll_fd, &event, 1, -1);
-        if (unlikely(nevents != 1)) {
-            fprintf(stderr, "epoll_wait failed: %m\n");
-            continue; /* should never happen */
-        }
-
-        /* Find out which event handler has become active */
-        event_handler = event.data.ptr;
-
-        /* Clear the eventfd */
-        event_notifier_test_and_clear(event_handler->notifier);
-
-        /* Handle the event */
-        event_handler->handler();
+        event_poll(&s->event_poll);
     }
     return NULL;
 }
 
-static void add_event_handler(int epoll_fd, EventHandler *event_handler)
-{
-    struct epoll_event event = {
-        .events = EPOLLIN,
-        .data.ptr = event_handler,
-    };
-    if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, event_notifier_get_fd(event_handler->notifier), &event) != 0) {
-        fprintf(stderr, "virtio-blk failed to add event handler to epoll: %m\n");
-        exit(1);
-    }
-}
-
 static void data_plane_start(VirtIOBlock *s)
 {
-    setup_phys_to_host(s);
-    map_vring(&s->vring, s, &s->vdev, 0);
-
-    /* Create epoll file descriptor */
-    s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
-    if (s->epoll_fd < 0) {
-        fprintf(stderr, "epoll_create1 failed: %m\n");
-        return; /* TODO error handling */
-    }
+    vring_setup(&s->vring, &s->vdev, 0);
+
+    event_poll_init(&s->event_poll);
 
     if (s->vdev.binding->set_host_notifier(s->vdev.binding_opaque, 0, true) != 0) {
-        fprintf(stderr, "virtio-blk failed to set host notifier\n");
-        return; /* TODO error handling */
+        fprintf(stderr, "virtio-blk failed to set host notifier, ensure -enable-kvm is set\n");
+        exit(1);
     }
 
-    s->notify_handler.notifier = virtio_queue_get_host_notifier(s->vq),
-    s->notify_handler.handler = handle_notify;
-    add_event_handler(s->epoll_fd, &s->notify_handler);
+    event_poll_add(&s->event_poll, &s->notify_handler,
+                   virtio_queue_get_host_notifier(s->vq),
+                   handle_notify);
 
     /* Create aio context */
     if (io_setup(SEG_MAX, &s->io_ctx) != 0) {
         fprintf(stderr, "virtio-blk io_setup failed\n");
-        return; /* TODO error handling */
+        exit(1);
     }
 
     if (event_notifier_init(&s->io_notifier, 0) != 0) {
         fprintf(stderr, "virtio-blk io event notifier creation failed\n");
-        return; /* TODO error handling */
+        exit(1);
     }
 
-    s->io_handler.notifier = &s->io_notifier;
-    s->io_handler.handler = handle_io;
-    add_event_handler(s->epoll_fd, &s->io_handler);
+    event_poll_add(&s->event_poll, &s->io_handler, &s->io_notifier, handle_io);
 
     qemu_thread_create(&s->data_plane_thread, data_plane_thread, s, QEMU_THREAD_JOINABLE);
 
@@ -209,7 +128,7 @@  static void data_plane_stop(VirtIOBlock *s)
 
     s->vdev.binding->set_host_notifier(s->vdev.binding_opaque, 0, false);
 
-    close(s->epoll_fd);
+    event_poll_cleanup(&s->event_poll);
 }
 
 static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t val)
@@ -317,7 +236,7 @@  VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf,
     s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
     bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
 
-    s->vq = virtio_add_queue(&s->vdev, SEG_MAX + 2, virtio_blk_handle_output);
+    s->vq = virtio_add_queue(&s->vdev, VRING_MAX, virtio_blk_handle_output);
     s->data_plane_started = false;
 
     s->qdev = dev;