@@ -160,9 +160,59 @@ static const KVMCapabilityInfo kvm_required_capabilites[] = {
static NotifierList kvm_irqchip_change_notifiers =
NOTIFIER_LIST_INITIALIZER(kvm_irqchip_change_notifiers);
+struct KVMResampleFd {
+ int gsi;
+ EventNotifier *resample_event;
+ QLIST_ENTRY(KVMResampleFd) node;
+};
+typedef struct KVMResampleFd KVMResampleFd;
+
+/*
+ * Only used with split irqchip where we need to do the resample fd
+ * kick for the kernel from userspace.
+ */
+static QLIST_HEAD(, KVMResampleFd) kvm_resample_fd_list =
+ QLIST_HEAD_INITIALIZER(kvm_resample_fd_list);
+
#define kvm_slots_lock(kml) qemu_mutex_lock(&(kml)->slots_lock)
#define kvm_slots_unlock(kml) qemu_mutex_unlock(&(kml)->slots_lock)
+static inline void kvm_resample_fd_remove(int gsi)
+{
+ KVMResampleFd *rfd;
+
+ QLIST_FOREACH(rfd, &kvm_resample_fd_list, node) {
+ if (rfd->gsi == gsi) {
+ QLIST_REMOVE(rfd, node);
+ g_free(rfd);
+ break;
+ }
+ }
+}
+
+static inline void kvm_resample_fd_insert(int gsi, EventNotifier *event)
+{
+ KVMResampleFd *rfd = g_new0(KVMResampleFd, 1);
+
+ rfd->gsi = gsi;
+ rfd->resample_event = event;
+
+ QLIST_INSERT_HEAD(&kvm_resample_fd_list, rfd, node);
+}
+
+void kvm_resample_fd_notify(int gsi)
+{
+ KVMResampleFd *rfd;
+
+ QLIST_FOREACH(rfd, &kvm_resample_fd_list, node) {
+ if (rfd->gsi == gsi) {
+ event_notifier_set(rfd->resample_event);
+ trace_kvm_resample_fd_notify(gsi);
+ return;
+ }
+ }
+}
+
int kvm_get_max_memslots(void)
{
KVMState *s = KVM_STATE(current_accel());
@@ -1676,8 +1726,33 @@ static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event,
};
if (rfd != -1) {
- irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE;
- irqfd.resamplefd = rfd;
+ assert(assign);
+ if (kvm_irqchip_is_split()) {
+ /*
+ * When the slow irqchip (e.g. IOAPIC) is in the
+ * userspace, KVM kernel resamplefd will not work because
+ * the EOI of the interrupt will be delivered to userspace
+ * instead, so the KVM kernel resamplefd kick will be
+ * skipped. The userspace here mimics what the kernel
+ * provides with resamplefd, remember the resamplefd and
+ * kick it when we receive EOI of this IRQ.
+ *
+ * This is hackery because IOAPIC is mostly bypassed
+ * (except EOI broadcasts) when irqfd is used. However
+ * this can bring much performance back for split irqchip
+ * with INTx IRQs (for VFIO, this gives 93% perf of the
+ * full fast path, which is 46% perf boost comparing to
+ * the INTx slow path).
+ */
+ kvm_resample_fd_insert(virq, resample);
+ } else {
+ irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE;
+ irqfd.resamplefd = rfd;
+ }
+ } else if (!assign) {
+ if (kvm_irqchip_is_split()) {
+ kvm_resample_fd_remove(virq);
+ }
}
if (!kvm_irqfds_enabled()) {
@@ -16,4 +16,5 @@ kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_
kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
+kvm_resample_fd_notify(int gsi) "gsi %d"
@@ -241,6 +241,25 @@ void ioapic_eoi_broadcast(int vector)
continue;
}
+#ifdef CONFIG_KVM
+ /*
+ * When IOAPIC is in the userspace while APIC is still in
+ * the kernel (i.e., split irqchip), we have a trick to
+ * kick the resamplefd logic for registered irqfds from
+ * userspace to deactivate the IRQ. When that happens, it
+ * means the irq bypassed userspace IOAPIC (so the irr and
+ * remote-irr of the table entry should be bypassed too
+ * even if interrupt come). Still kick the resamplefds if
+ * they're bound to the IRQ, to make sure to EOI the
+ * interrupt for the hardware correctly.
+ *
+ * Note: We still need to go through the irr & remote-irr
+ * operations below because we don't know whether there're
+ * emulated devices that are using/sharing the same IRQ.
+ */
+ kvm_resample_fd_notify(n);
+#endif
+
if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
continue;
}
@@ -554,4 +554,8 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source);
int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
struct ppc_radix_page_info *kvm_get_radix_page_info(void);
int kvm_get_max_memslots(void);
+
+/* Notify resamplefd for EOI of specific interrupts. */
+void kvm_resample_fd_notify(int gsi);
+
#endif