@@ -90,6 +90,20 @@ static inline void msleep(unsigned int msecs)
usleep(MSECS_TO_USECS(msecs));
}
+/*
+ * Find last (most significant) bit set. Same implementation as Linux:
+ * fls(0) = 0, fls(1) = 1, fls(1UL << 63) = 64
+ */
+static inline int fls_long(unsigned long x)
+{
+ return x ? sizeof(x) * 8 - __builtin_clzl(x) : 0;
+}
+
+static inline unsigned long roundup_pow_of_two(unsigned long x)
+{
+ return x ? 1UL << fls_long(x - 1) : 0;
+}
+
struct kvm;
void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
@@ -665,6 +665,10 @@ static int vfio__exit(struct kvm *kvm)
free(vfio_devices);
kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_unmap_mem_bank, NULL);
- return close(vfio_container);
+ close(vfio_container);
+
+ free(kvm->cfg.vfio_devices);
+
+ return 0;
}
dev_base_exit(vfio__exit);
@@ -150,8 +150,7 @@ static int vfio_pci_disable_msis(struct kvm *kvm, struct vfio_device *vdev,
.count = 0,
};
- if (!msi_is_enabled(msis->phys_state) ||
- msi_is_enabled(msis->virt_state))
+ if (!msi_is_enabled(msis->phys_state))
return 0;
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
@@ -253,8 +252,17 @@ static void vfio_pci_msix_table_access(struct kvm_cpu *vcpu, u64 addr, u8 *data,
u64 offset = addr - pdev->msix_table.guest_phys_addr;
size_t vector = offset / PCI_MSIX_ENTRY_SIZE;
- /* PCI spec says that software must use aligned 4 or 8 bytes accesses */
off_t field = offset % PCI_MSIX_ENTRY_SIZE;
+
+ /*
+ * PCI spec says that software must use aligned 4 or 8 bytes accesses
+ * for the MSI-X tables.
+ */
+ if ((len != 4 && len != 8) || addr & (len - 1)) {
+ dev_warn(vdev, "invalid MSI-X table access");
+ return;
+ }
+
entry = &pdev->msix.entries[vector];
mutex_lock(&pdev->msix.mutex);
@@ -266,7 +274,11 @@ static void vfio_pci_msix_table_access(struct kvm_cpu *vcpu, u64 addr, u8 *data,
memcpy((void *)&entry->config + field, data, len);
- if (field != PCI_MSIX_ENTRY_VECTOR_CTRL)
+ /*
+ * Check if access touched the vector control register, which is at the
+ * end of the MSI-X entry.
+ */
+ if (field + len <= PCI_MSIX_ENTRY_VECTOR_CTRL)
goto out_unlock;
msi_set_masked(entry->virt_state, entry->config.ctrl &
@@ -302,12 +314,11 @@ static void vfio_pci_msix_cap_write(struct kvm *kvm,
/* Read byte that contains the Enable bit */
flags = *(u8 *)(data + enable_pos - off) << 8;
- msi_set_masked(pdev->msix.virt_state, flags & PCI_MSIX_FLAGS_MASKALL);
- msi_set_enabled(pdev->msix.virt_state, flags & PCI_MSIX_FLAGS_ENABLE);
+ mutex_lock(&pdev->msix.mutex);
+ msi_set_masked(pdev->msix.virt_state, flags & PCI_MSIX_FLAGS_MASKALL);
enable = flags & PCI_MSIX_FLAGS_ENABLE;
-
- mutex_lock(&pdev->msix.mutex);
+ msi_set_enabled(pdev->msix.virt_state, enable);
if (enable && vfio_pci_enable_msis(kvm, vdev, true))
dev_err(vdev, "cannot enable MSIX");
@@ -702,8 +713,8 @@ static int vfio_pci_create_msix_table(struct kvm *kvm,
{
int ret;
size_t i;
+ size_t mmio_size;
size_t nr_entries;
- size_t table_size;
struct vfio_pci_msi_entry *entries;
struct vfio_pci_msix_pba *pba = &pdev->msix_pba;
struct vfio_pci_msix_table *table = &pdev->msix_table;
@@ -716,7 +727,8 @@ static int vfio_pci_create_msix_table(struct kvm *kvm,
* KVM needs memory regions to be multiple of and aligned on PAGE_SIZE.
*/
nr_entries = (msix->ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
- table_size = ALIGN(nr_entries * PCI_MSIX_ENTRY_SIZE, PAGE_SIZE);
+ table->size = ALIGN(nr_entries * PCI_MSIX_ENTRY_SIZE, PAGE_SIZE);
+ pba->size = ALIGN(DIV_ROUND_UP(nr_entries, 64), PAGE_SIZE);
entries = calloc(nr_entries, sizeof(struct vfio_pci_msi_entry));
if (!entries)
@@ -727,20 +739,20 @@ static int vfio_pci_create_msix_table(struct kvm *kvm,
/*
* To ease MSI-X cap configuration in case they share the same BAR,
- * collapse table and pending array. According to PCI, address spaces
- * must be power of two. Since nr_entries is a power of two, and PBA
- * size is less than table_size, reserve 2*table_size.
+ * collapse table and pending array. The size of the BAR regions must be
+ * powers of two.
*/
- table->guest_phys_addr = pci_get_io_space_block(2 * table_size);
+ mmio_size = roundup_pow_of_two(table->size + pba->size);
+ table->guest_phys_addr = pci_get_io_space_block(mmio_size);
if (!table->guest_phys_addr) {
pr_err("cannot allocate IO space");
ret = -ENOMEM;
goto out_free;
}
- pba->guest_phys_addr = table->guest_phys_addr + table_size;
+ pba->guest_phys_addr = table->guest_phys_addr + table->size;
- ret = kvm__register_mmio(kvm, table->guest_phys_addr, table_size, false,
- vfio_pci_msix_table_access, pdev);
+ ret = kvm__register_mmio(kvm, table->guest_phys_addr, table->size,
+ false, vfio_pci_msix_table_access, pdev);
if (ret < 0)
goto out_free;
@@ -752,8 +764,6 @@ static int vfio_pci_create_msix_table(struct kvm *kvm,
* between MSI-X table and PBA. For the sake of isolation, create a
* virtual PBA.
*/
- pba->size = nr_entries / 8;
-
ret = kvm__register_mmio(kvm, pba->guest_phys_addr, pba->size, false,
vfio_pci_msix_pba_access, pdev);
if (ret < 0)
@@ -761,7 +771,6 @@ static int vfio_pci_create_msix_table(struct kvm *kvm,
pdev->msix.entries = entries;
pdev->msix.nr_entries = nr_entries;
- table->size = table_size;
return 0;
@@ -801,7 +810,7 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev,
region->vdev = vdev;
region->is_ioport = !!(bar & PCI_BASE_ADDRESS_SPACE_IO);
region->info = (struct vfio_region_info) {
- .argsz = sizeof(*region),
+ .argsz = sizeof(region->info),
.index = nr,
};