diff mbox

[6/6] KVM: assigned dev: MSI-X mask support

Message ID 1289812532-3227-7-git-send-email-sheng@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sheng Yang Nov. 15, 2010, 9:15 a.m. UTC
None
diff mbox

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc29223..37602e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1966,6 +1966,7 @@  int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 	case KVM_CAP_XSAVE:
 	case KVM_CAP_ASYNC_PF:
+	case KVM_CAP_MSIX_MASK:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ea2dc1a..b3e5ffe 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -541,6 +541,9 @@  struct kvm_ppc_pvinfo {
 #define KVM_CAP_PPC_GET_PVINFO 57
 #define KVM_CAP_PPC_IRQ_LEVEL 58
 #define KVM_CAP_ASYNC_PF 59
+#ifdef __KVM_HAVE_MSIX
+#define KVM_CAP_MSIX_MASK 60
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -672,6 +675,9 @@  struct kvm_clock_data {
 #define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct kvm_xen_hvm_config)
 #define KVM_SET_CLOCK             _IOW(KVMIO,  0x7b, struct kvm_clock_data)
 #define KVM_GET_CLOCK             _IOR(KVMIO,  0x7c, struct kvm_clock_data)
+/* Available with KVM_CAP_MSIX_MASK */
+#define KVM_GET_MSIX_ENTRY        _IOWR(KVMIO,  0x7d, struct kvm_msix_entry)
+#define KVM_UPDATE_MSIX_MMIO      _IOW(KVMIO,  0x7e, struct kvm_msix_mmio)
 /* Available with KVM_CAP_PIT_STATE2 */
 #define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
 #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
@@ -795,4 +801,30 @@  struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+#define KVM_MSIX_TYPE_ASSIGNED_DEV	1
+
+#define KVM_MSIX_FLAG_MASKBIT		(1 << 0)
+#define KVM_MSIX_FLAG_QUERY_MASKBIT	(1 << 0)
+
+struct kvm_msix_entry {
+	__u32 id;
+	__u32 type;
+	__u32 entry; /* The index of entry in the MSI-X table */
+	__u32 flags;
+	__u32 query_flags;
+	__u32 reserved[5];
+};
+
+#define KVM_MSIX_MMIO_FLAG_REGISTER	(1 << 0)
+#define KVM_MSIX_MMIO_FLAG_UNREGISTER	(1 << 1)
+
+struct kvm_msix_mmio {
+	__u32 id;
+	__u32 type;
+	__u64 base_addr;
+	__u32 max_entries_nr;
+	__u32 flags;
+	__u32 reserved[6];
+};
+
 #endif /* __LINUX_KVM_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f09db87..57a437a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -501,6 +501,7 @@  struct kvm_guest_msix_entry {
 };
 
 #define KVM_ASSIGNED_ENABLED_IOMMU		(1 << 0)
+#define KVM_ASSIGNED_ENABLED_MSIX_MMIO		(1 << 1)
 struct kvm_assigned_dev_kernel {
 	struct kvm_irq_ack_notifier ack_notifier;
 	struct work_struct interrupt_work;
@@ -521,6 +522,10 @@  struct kvm_assigned_dev_kernel {
 	struct pci_dev *dev;
 	struct kvm *kvm;
 	spinlock_t assigned_dev_lock;
+	DECLARE_BITMAP(msix_mask_bitmap, KVM_MAX_MSIX_PER_DEV);
+	gpa_t msix_mmio_base;
+	struct kvm_io_device msix_mmio_dev;
+	int msix_max_entries_nr;
 };
 
 struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 5c6b96d..76a1f12 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -226,12 +226,27 @@  static void kvm_free_assigned_irq(struct kvm *kvm,
 	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
 }
 
+static void unregister_msix_mmio(struct kvm *kvm,
+				 struct kvm_assigned_dev_kernel *adev)
+{
+	if (adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO) {
+		mutex_lock(&kvm->slots_lock);
+		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+				&adev->msix_mmio_dev);
+		mutex_unlock(&kvm->slots_lock);
+		adev->flags &= ~KVM_ASSIGNED_ENABLED_MSIX_MMIO;
+	}
+}
+
 static void kvm_free_assigned_device(struct kvm *kvm,
 				     struct kvm_assigned_dev_kernel
 				     *assigned_dev)
 {
 	kvm_free_assigned_irq(kvm, assigned_dev);
 
+#ifdef __KVM_HAVE_MSIX
+	unregister_msix_mmio(kvm, assigned_dev);
+#endif
 	pci_reset_function(assigned_dev->dev);
 
 	pci_release_regions(assigned_dev->dev);
@@ -504,7 +519,7 @@  out:
 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 				      struct kvm_assigned_pci_dev *assigned_dev)
 {
-	int r = 0, idx;
+	int r = 0, idx, i;
 	struct kvm_assigned_dev_kernel *match;
 	struct pci_dev *dev;
 
@@ -564,6 +579,10 @@  static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	/* The state after reset of MSI-X table is all masked */
+	for (i = 0; i < KVM_MAX_MSIX_PER_DEV; i++)
+		set_bit(i, match->msix_mask_bitmap);
+
 	if (assigned_dev->flags & KVM_ASSIGNED_ENABLED_IOMMU) {
 		if (!kvm->arch.iommu_domain) {
 			r = kvm_iommu_map_guest(kvm);
@@ -667,6 +686,43 @@  msix_nr_out:
 	return r;
 }
 
+static void update_msix_mask(struct kvm_assigned_dev_kernel *adev,
+			     int idx, bool new_mask_flag)
+{
+	int irq;
+	bool old_mask_flag, need_flush = false;
+
+	spin_lock_irq(&adev->assigned_dev_lock);
+
+	if (!adev->dev->msix_enabled ||
+	    !(adev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX))
+		goto out;
+
+	old_mask_flag = test_bit(adev->guest_msix_entries[idx].entry,
+			adev->msix_mask_bitmap);
+	if (old_mask_flag == new_mask_flag)
+		goto out;
+
+	irq = adev->host_msix_entries[idx].vector;
+	BUG_ON(irq == 0);
+
+	if (new_mask_flag) {
+		set_bit(adev->guest_msix_entries[idx].entry,
+				adev->msix_mask_bitmap);
+		disable_irq_nosync(irq);
+		need_flush = true;
+	} else {
+		clear_bit(adev->guest_msix_entries[idx].entry,
+				adev->msix_mask_bitmap);
+		enable_irq(irq);
+	}
+out:
+	spin_unlock_irq(&adev->assigned_dev_lock);
+
+	if (need_flush)
+		flush_work(&adev->interrupt_work);
+}
+
 static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
 				       struct kvm_assigned_msix_entry *entry)
 {
@@ -701,6 +757,235 @@  msix_entry_out:
 
 	return r;
 }
+
+static int kvm_vm_ioctl_get_msix_entry(struct kvm *kvm,
+				       struct kvm_msix_entry *entry)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *adev;
+
+	if (entry->type != KVM_MSIX_TYPE_ASSIGNED_DEV)
+		return -EINVAL;
+
+	if (!entry->query_flags)
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry->id);
+
+	if (!adev) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (entry->entry >= adev->msix_max_entries_nr) {
+		r = -ENOSPC;
+		goto out;
+	}
+
+	if (entry->query_flags & KVM_MSIX_FLAG_QUERY_MASKBIT) {
+		if (test_bit(entry->entry, adev->msix_mask_bitmap))
+			entry->flags |= KVM_MSIX_FLAG_MASKBIT;
+		else
+			entry->flags &= ~KVM_MSIX_FLAG_MASKBIT;
+	}
+
+out:
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
+
+static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev,
+			      gpa_t addr, int len)
+{
+	gpa_t start, end;
+
+	BUG_ON(!(adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO));
+	start = adev->msix_mmio_base;
+	end = adev->msix_mmio_base + PCI_MSIX_ENTRY_SIZE *
+		adev->msix_max_entries_nr;
+	if (addr >= start && addr + len <= end)
+		return true;
+
+	return false;
+}
+
+static int msix_get_enabled_idx(struct kvm_assigned_dev_kernel *adev,
+			      gpa_t addr, int len)
+{
+	int i, index = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
+
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->guest_msix_entries[i].entry == index)
+			return i;
+
+	return -EINVAL;
+}
+
+static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+			  void *val)
+{
+	struct kvm_assigned_dev_kernel *adev =
+			container_of(this, struct kvm_assigned_dev_kernel,
+				     msix_mmio_dev);
+	int idx, r = 0;
+	u32 entry[4];
+	struct kvm_kernel_irq_routing_entry e;
+
+	/* TODO: Get big-endian machine work */
+	mutex_lock(&adev->kvm->lock);
+	if (!msix_mmio_in_range(adev, addr, len)) {
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+	if ((addr & 0x3) || len != 4)
+		goto out;
+
+	idx = msix_get_enabled_idx(adev, addr, len);
+	if (idx < 0) {
+		idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
+		if ((addr % PCI_MSIX_ENTRY_SIZE) ==
+				PCI_MSIX_ENTRY_VECTOR_CTRL)
+			*(unsigned long *)val =
+				test_bit(idx, adev->msix_mask_bitmap) ?
+				PCI_MSIX_ENTRY_CTRL_MASKBIT : 0;
+		else
+			r = -EOPNOTSUPP;
+		goto out;
+	}
+
+	r = kvm_get_irq_routing_entry(adev->kvm,
+			adev->guest_msix_entries[idx].vector, &e);
+	if (r || e.type != KVM_IRQ_ROUTING_MSI) {
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+	entry[0] = e.msi.address_lo;
+	entry[1] = e.msi.address_hi;
+	entry[2] = e.msi.data;
+	entry[3] = test_bit(adev->guest_msix_entries[idx].entry,
+			adev->msix_mask_bitmap);
+	memcpy(val, &entry[addr % PCI_MSIX_ENTRY_SIZE / sizeof *entry], len);
+
+out:
+	mutex_unlock(&adev->kvm->lock);
+	return r;
+}
+
+static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+			   const void *val)
+{
+	struct kvm_assigned_dev_kernel *adev =
+			container_of(this, struct kvm_assigned_dev_kernel,
+				     msix_mmio_dev);
+	int idx, r = 0;
+	unsigned long new_val = *(unsigned long *)val;
+
+	/* TODO: Get big-endian machine work */
+	mutex_lock(&adev->kvm->lock);
+	if (!msix_mmio_in_range(adev, addr, len)) {
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+	if ((addr & 0x3) || len != 4)
+		goto out;
+
+	idx = msix_get_enabled_idx(adev, addr, len);
+	if (idx < 0) {
+		idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
+		if (((addr % PCI_MSIX_ENTRY_SIZE) ==
+				PCI_MSIX_ENTRY_VECTOR_CTRL)) {
+			if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT)
+				goto out;
+			if (new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT)
+				set_bit(idx, adev->msix_mask_bitmap);
+			else
+				clear_bit(idx, adev->msix_mask_bitmap);
+			/* It's possible that we need re-enable MSI-X, so go
+			 * back to userspace */
+		}
+		/* Userspace would handle other MMIO writing */
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+	if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) {
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+	if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT)
+		goto out;
+	update_msix_mask(adev, idx, !!(new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT));
+out:
+	mutex_unlock(&adev->kvm->lock);
+
+	return r;
+}
+
+static const struct kvm_io_device_ops msix_mmio_ops = {
+	.read     = msix_mmio_read,
+	.write    = msix_mmio_write,
+};
+
+static int kvm_vm_ioctl_update_msix_mmio(struct kvm *kvm,
+				struct kvm_msix_mmio *msix_mmio)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *adev;
+
+	if (msix_mmio->type != KVM_MSIX_TYPE_ASSIGNED_DEV)
+		return -EINVAL;
+
+	if (!msix_mmio->flags)
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      msix_mmio->id);
+	if (!adev) {
+		r = -EINVAL;
+		goto out;
+	}
+	if (msix_mmio->base_addr == 0) {
+		r = -EINVAL;
+		goto out;
+	}
+	if (msix_mmio->max_entries_nr == 0 ||
+			msix_mmio->max_entries_nr > KVM_MAX_MSIX_PER_DEV) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if ((msix_mmio->flags & KVM_MSIX_MMIO_FLAG_REGISTER) &&
+			(msix_mmio->flags & KVM_MSIX_MMIO_FLAG_UNREGISTER)) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (msix_mmio->flags & KVM_MSIX_MMIO_FLAG_REGISTER) {
+		if (!(adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO)) {
+			mutex_lock(&kvm->slots_lock);
+			kvm_iodevice_init(&adev->msix_mmio_dev,
+					&msix_mmio_ops);
+			r = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+					&adev->msix_mmio_dev);
+			if (!r)
+				adev->flags |= KVM_ASSIGNED_ENABLED_MSIX_MMIO;
+			mutex_unlock(&kvm->slots_lock);
+		}
+		if (!r) {
+			adev->msix_mmio_base = msix_mmio->base_addr;
+			adev->msix_max_entries_nr = msix_mmio->max_entries_nr;
+		}
+	} else if (msix_mmio->flags & KVM_MSIX_MMIO_FLAG_UNREGISTER)
+		unregister_msix_mmio(kvm, adev);
+out:
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
 #endif
 
 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
@@ -813,6 +1098,37 @@  long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 			goto out;
 		break;
 	}
+	case KVM_GET_MSIX_ENTRY: {
+		struct kvm_msix_entry entry;
+		r = -EFAULT;
+		if (copy_from_user(&entry, argp, sizeof entry))
+			goto out;
+		r = kvm_vm_ioctl_get_msix_entry(kvm, &entry);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(argp, &entry, sizeof entry))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_UPDATE_MSIX_MMIO: {
+		struct kvm_msix_mmio msix_mmio;
+
+		r = -EFAULT;
+		if (copy_from_user(&msix_mmio, argp, sizeof(msix_mmio)))
+			goto out;
+
+		r = -EINVAL;
+		if (find_first_bit((unsigned long *)msix_mmio.reserved,
+		    sizeof(msix_mmio.reserved)) < sizeof(msix_mmio.reserved))
+			goto out;
+
+		r = kvm_vm_ioctl_update_msix_mmio(kvm, &msix_mmio);
+		if (r)
+			goto out;
+		break;
+	}
 #endif
 	}
 out: