@@ -77,10 +77,12 @@ struct virt_queue {
u16 endian;
bool use_event_idx;
bool enabled;
+ struct virtio_device *vdev;
/* vhost IRQ handling */
int gsi;
int irqfd;
+ int index;
};
/*
@@ -267,8 +269,8 @@ void virtio_vhost_set_vring(struct kvm *kvm, int vhost_fd, u32 index,
struct virt_queue *queue);
void virtio_vhost_set_vring_kick(struct kvm *kvm, int vhost_fd,
u32 index, int event_fd);
-void virtio_vhost_set_vring_call(struct kvm *kvm, int vhost_fd, u32 index,
- u32 gsi, struct virt_queue *queue);
+void virtio_vhost_set_vring_irqfd(struct kvm *kvm, u32 gsi,
+ struct virt_queue *queue);
void virtio_vhost_reset_vring(struct kvm *kvm, int vhost_fd, u32 index,
struct virt_queue *queue);
@@ -198,6 +198,7 @@ void virtio_init_device_vq(struct kvm *kvm, struct virtio_device *vdev,
vq->endian = vdev->endian;
vq->use_event_idx = (vdev->features & (1UL << VIRTIO_RING_F_EVENT_IDX));
vq->enabled = true;
+ vq->vdev = vdev;
if (addr->legacy) {
unsigned long base = (u64)addr->pfn * addr->pgsize;
@@ -674,8 +674,7 @@ static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
if (ndev->vhost_fd == 0 || is_ctrl_vq(ndev, vq))
return;
- virtio_vhost_set_vring_call(kvm, ndev->vhost_fd, vq, gsi,
- &queue->vq);
+ virtio_vhost_set_vring_irqfd(kvm, gsi, &queue->vq);
}
static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
@@ -119,8 +119,7 @@ static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
if (sdev->vhost_fd == 0)
return;
- virtio_vhost_set_vring_call(kvm, sdev->vhost_fd, vq, gsi,
- &sdev->vqs[vq]);
+ virtio_vhost_set_vring_irqfd(kvm, gsi, &sdev->vqs[vq]);
}
static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
@@ -1,5 +1,6 @@
#include "kvm/irq.h"
#include "kvm/virtio.h"
+#include "kvm/epoll.h"
#include <linux/kvm.h>
#include <linux/vhost.h>
@@ -7,12 +8,52 @@
#include <sys/eventfd.h>
+static struct kvm__epoll epoll;
+
+static void virtio_vhost_signal_vq(struct kvm *kvm, struct epoll_event *ev)
+{
+ int r;
+ u64 tmp;
+ struct virt_queue *queue = ev->data.ptr;
+
+ if (read(queue->irqfd, &tmp, sizeof(tmp)) < 0)
+ pr_warning("%s: failed to read eventfd", __func__);
+
+ r = queue->vdev->ops->signal_vq(kvm, queue->vdev, queue->index);
+ if (r)
+ pr_warning("%s failed to signal virtqueue", __func__);
+}
+
+static int virtio_vhost_start_poll(struct kvm *kvm)
+{
+ if (epoll.fd)
+ return 0;
+
+ if (epoll__init(kvm, &epoll, "vhost-irq-worker",
+ virtio_vhost_signal_vq))
+ return -1;
+
+ return 0;
+}
+
+static int virtio_vhost_stop_poll(struct kvm *kvm)
+{
+ if (epoll.fd)
+ epoll__exit(&epoll);
+ return 0;
+}
+base_exit(virtio_vhost_stop_poll);
+
void virtio_vhost_init(struct kvm *kvm, int vhost_fd)
{
struct kvm_mem_bank *bank;
struct vhost_memory *mem;
int i = 0, r;
+ r = virtio_vhost_start_poll(kvm);
+ if (r)
+ die("Unable to start vhost polling thread\n");
+
mem = calloc(1, sizeof(*mem) +
kvm->mem_slots * sizeof(struct vhost_memory_region));
if (mem == NULL)
@@ -39,6 +80,16 @@ void virtio_vhost_init(struct kvm *kvm, int vhost_fd)
free(mem);
}
+static int virtio_vhost_get_irqfd(struct virt_queue *queue)
+{
+ if (!queue->irqfd) {
+ queue->irqfd = eventfd(0, 0);
+ if (queue->irqfd < 0)
+ die_perror("eventfd()");
+ }
+ return queue->irqfd;
+}
+
void virtio_vhost_set_vring(struct kvm *kvm, int vhost_fd, u32 index,
struct virt_queue *queue)
{
@@ -50,6 +101,16 @@ void virtio_vhost_set_vring(struct kvm *kvm, int vhost_fd, u32 index,
.used_user_addr = (u64)(unsigned long)queue->vring.used,
};
struct vhost_vring_state state = { .index = index };
+ struct vhost_vring_file file = {
+ .index = index,
+ .fd = virtio_vhost_get_irqfd(queue),
+ };
+ struct epoll_event event = {
+ .events = EPOLLIN,
+ .data.ptr = queue,
+ };
+
+ queue->index = index;
if (queue->endian != VIRTIO_ENDIAN_HOST)
die("VHOST requires the same endianness in guest and host");
@@ -67,6 +128,14 @@ void virtio_vhost_set_vring(struct kvm *kvm, int vhost_fd, u32 index,
r = ioctl(vhost_fd, VHOST_SET_VRING_ADDR, &addr);
if (r < 0)
die_perror("VHOST_SET_VRING_ADDR failed");
+
+ r = ioctl(vhost_fd, VHOST_SET_VRING_CALL, &file);
+ if (r < 0)
+ die_perror("VHOST_SET_VRING_CALL failed");
+
+ r = epoll_ctl(epoll.fd, EPOLL_CTL_ADD, file.fd, &event);
+ if (r < 0)
+ die_perror("EPOLL_CTL_ADD vhost call fd");
}
void virtio_vhost_set_vring_kick(struct kvm *kvm, int vhost_fd,
@@ -83,24 +152,23 @@ void virtio_vhost_set_vring_kick(struct kvm *kvm, int vhost_fd,
die_perror("VHOST_SET_VRING_KICK failed");
}
-void virtio_vhost_set_vring_call(struct kvm *kvm, int vhost_fd, u32 index,
- u32 gsi, struct virt_queue *queue)
+void virtio_vhost_set_vring_irqfd(struct kvm *kvm, u32 gsi,
+ struct virt_queue *queue)
{
int r;
- struct vhost_vring_file file = {
- .index = index,
- .fd = eventfd(0, 0),
- };
+ int fd = virtio_vhost_get_irqfd(queue);
- r = irq__add_irqfd(kvm, gsi, file.fd, -1);
+ if (queue->gsi)
+ irq__del_irqfd(kvm, queue->gsi, fd);
+ else
+ /* Disconnect user polling thread */
+ epoll_ctl(epoll.fd, EPOLL_CTL_DEL, fd, NULL);
+
+ /* Connect the direct IRQFD route */
+ r = irq__add_irqfd(kvm, gsi, fd, -1);
if (r < 0)
die_perror("KVM_IRQFD failed");
- r = ioctl(vhost_fd, VHOST_SET_VRING_CALL, &file);
- if (r < 0)
- die_perror("VHOST_SET_VRING_CALL failed");
-
- queue->irqfd = file.fd;
queue->gsi = gsi;
}
@@ -108,9 +176,23 @@ void virtio_vhost_reset_vring(struct kvm *kvm, int vhost_fd, u32 index,
struct virt_queue *queue)
{
+ struct vhost_vring_file file = {
+ .index = index,
+ .fd = -1,
+ };
+
+ if (!queue->irqfd)
+ return;
+
if (queue->gsi) {
irq__del_irqfd(kvm, queue->gsi, queue->irqfd);
- close(queue->irqfd);
- queue->gsi = queue->irqfd = 0;
+ queue->gsi = 0;
}
+
+ epoll_ctl(epoll.fd, EPOLL_CTL_DEL, queue->irqfd, NULL);
+
+ if (ioctl(vhost_fd, VHOST_SET_VRING_CALL, &file))
+ perror("SET_VRING_CALL");
+ close(queue->irqfd);
+ queue->irqfd = 0;
}
@@ -151,8 +151,7 @@ static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
if (vdev->vhost_fd == -1 || is_event_vq(vq))
return;
- virtio_vhost_set_vring_call(kvm, vdev->vhost_fd, vq, gsi,
- &vdev->vqs[vq]);
+ virtio_vhost_set_vring_irqfd(kvm, gsi, &vdev->vqs[vq]);
}
static unsigned int get_vq_count(struct kvm *kvm, void *dev)
To signal a virtqueue, a kernel vhost worker writes an eventfd registered by kvmtool with VHOST_SET_VRING_CALL. When MSIs are supported, this eventfd is connected directly to KVM IRQFD to inject the interrupt into the guest. However direct injection does not work when MSIs are not supported. The virtio-mmio transport does not support MSIs at all, and even with virtio-pci, the guest may use INTx if the irqchip does not support MSIs (e.g. irqchip=gicv3 on arm64). In this case, injecting the interrupt requires writing an ISR register in virtio to signal that it is a virtqueue notification rather than a config change. Add a thread that polls the vhost eventfd for interrupts, and notifies the guest. When the guest configures MSIs, disable polling on the eventfd and enable direct injection. Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> --- include/kvm/virtio.h | 6 ++- virtio/core.c | 1 + virtio/net.c | 3 +- virtio/scsi.c | 3 +- virtio/vhost.c | 110 +++++++++++++++++++++++++++++++++++++------ virtio/vsock.c | 3 +- 6 files changed, 104 insertions(+), 22 deletions(-)