@@ -59,6 +59,29 @@ struct msix_cap {
u32 pba_offset;
};
+struct msi_cap_64 {
+ u8 cap;
+ u8 next;
+ u16 ctrl;
+ u32 address_lo;
+ u32 address_hi;
+ u16 data;
+ u16 _align;
+ u32 mask_bits;
+ u32 pend_bits;
+};
+
+struct msi_cap_32 {
+ u8 cap;
+ u8 next;
+ u16 ctrl;
+ u32 address_lo;
+ u16 data;
+ u16 _align;
+ u32 mask_bits;
+ u32 pend_bits;
+};
+
struct pci_cap_hdr {
u8 type;
u8 next;
@@ -297,6 +297,112 @@ static void vfio_pci_msix_cap_write(struct kvm *kvm,
mutex_unlock(&pdev->msi.mutex);
}
+static int vfio_pci_msi_vector_write(struct kvm *kvm, struct vfio_device *vdev,
+ u8 off, u8 *data, u32 sz)
+{
+ size_t i;
+ u32 mask = 0;
+ size_t mask_pos, start, limit;
+ struct vfio_pci_msi_entry *entry;
+ struct vfio_pci_device *pdev = &vdev->pci;
+ struct msi_cap_64 *msi_cap_64 = (void *)&pdev->hdr + pdev->msi.pos;
+
+ if (!(msi_cap_64->ctrl & PCI_MSI_FLAGS_MASKBIT))
+ return 0;
+
+ if (msi_cap_64->ctrl & PCI_MSI_FLAGS_64BIT)
+ mask_pos = PCI_MSI_MASK_64;
+ else
+ mask_pos = PCI_MSI_MASK_32;
+
+ if (off >= mask_pos + 4 || off + sz <= mask_pos)
+ return 0;
+
+ /* Set mask to current state */
+ for (i = 0; i < pdev->msi.nr_entries; i++) {
+ entry = &pdev->msi.entries[i];
+ mask |= !!msi_is_masked(entry->virt_state) << i;
+ }
+
+ /* Update mask following the intersection of access and register */
+ start = max_t(size_t, off, mask_pos);
+ limit = min_t(size_t, off + sz, mask_pos + 4);
+
+ memcpy((void *)&mask + start - mask_pos, data + start - off,
+ limit - start);
+
+ /* Update states if necessary */
+ for (i = 0; i < pdev->msi.nr_entries; i++) {
+ bool masked = mask & (1 << i);
+
+ entry = &pdev->msi.entries[i];
+ if (masked != msi_is_masked(entry->virt_state)) {
+ msi_set_masked(entry->virt_state, masked);
+ vfio_pci_update_msi_entry(kvm, vdev, entry);
+ }
+ }
+
+ return 1;
+}
+
+static void vfio_pci_msi_cap_write(struct kvm *kvm, struct vfio_device *vdev,
+ u8 off, u8 *data, u32 sz)
+{
+ u8 ctrl;
+ struct msi_msg msg;
+ size_t i, nr_vectors;
+ struct vfio_pci_msi_entry *entry;
+ struct vfio_pci_device *pdev = &vdev->pci;
+ struct msi_cap_64 *msi_cap_64 = (void *)&pdev->hdr + pdev->msi.pos;
+
+ off -= pdev->msi.pos;
+
+ /* Check if the guest is trying to update mask bits */
+ if (vfio_pci_msi_vector_write(kvm, vdev, off, data, sz))
+ return;
+
+ /* Only modify routes when guest pokes the enable bit */
+ if (off > PCI_MSI_FLAGS || off + sz <= PCI_MSI_FLAGS)
+ return;
+
+ ctrl = *(u8 *)(data + PCI_MSI_FLAGS - off);
+
+ mutex_lock(&pdev->msi.mutex);
+
+ msi_set_enabled(pdev->msi.virt_state, ctrl & PCI_MSI_FLAGS_ENABLE);
+
+ if (!msi_is_enabled(pdev->msi.virt_state)) {
+ vfio_pci_disable_msis(kvm, vdev);
+ mutex_unlock(&pdev->msi.mutex);
+ return;
+ }
+
+ /* Create routes for the requested vectors */
+ nr_vectors = 1 << ((ctrl & PCI_MSI_FLAGS_QSIZE) >> 4);
+
+ msg.address_lo = msi_cap_64->address_lo;
+ if (msi_cap_64->ctrl & PCI_MSI_FLAGS_64BIT) {
+ msg.address_hi = msi_cap_64->address_hi;
+ msg.data = msi_cap_64->data;
+ } else {
+ struct msi_cap_32 *msi_cap_32 = (void *)msi_cap_64;
+ msg.address_hi = 0;
+ msg.data = msi_cap_32->data;
+ }
+
+ for (i = 0; i < nr_vectors; i++) {
+ entry = &pdev->msi.entries[i];
+ entry->config.msg = msg;
+ vfio_pci_update_msi_entry(kvm, vdev, entry);
+ }
+
+ /* Update the physical capability if necessary */
+ if (vfio_pci_enable_msis(kvm, vdev))
+ dev_err(vdev, "cannot enable MSIX");
+
+ mutex_unlock(&pdev->msi.mutex);
+}
+
static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr,
u8 offset, void *data, int sz)
{
@@ -333,6 +439,9 @@ static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hd
/* Handle MSI write now, since it might update the hardware capability */
switch (pdev->irq_type) {
+ case VFIO_PCI_IRQ_MSI:
+ vfio_pci_msi_cap_write(kvm, vdev, offset, data, sz);
+ break;
case VFIO_PCI_IRQ_MSIX:
vfio_pci_msix_cap_write(kvm, vdev, offset, data, sz);
break;
@@ -345,11 +454,25 @@ static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hd
sz, offset);
}
+static ssize_t vfio_pci_msi_cap_size(struct msi_cap_64 *cap_hdr)
+{
+ size_t size = 10;
+
+ if (cap_hdr->ctrl & PCI_MSI_FLAGS_64BIT)
+ size += 4;
+ if (cap_hdr->ctrl & PCI_MSI_FLAGS_MASKBIT)
+ size += 10;
+
+ return size;
+}
+
static ssize_t vfio_pci_cap_size(struct pci_cap_hdr *cap_hdr)
{
switch (cap_hdr->type) {
case PCI_CAP_ID_MSIX:
return PCI_CAP_MSIX_SIZEOF;
+ case PCI_CAP_ID_MSI:
+ return vfio_pci_msi_cap_size((void *)cap_hdr);
default:
pr_err("unknown PCI capability 0x%x", cap_hdr->type);
return 0;
@@ -423,6 +546,7 @@ static int vfio_pci_parse_caps(struct vfio_device *vdev)
switch (cap.type) {
case PCI_CAP_ID_MSIX:
+ case PCI_CAP_ID_MSI:
ret = vfio_pci_add_cap(vdev, &cap, info->offset, pos);
if (ret) {
dev_warn(vdev, "failed to read capability structure %x",
@@ -431,7 +555,8 @@ static int vfio_pci_parse_caps(struct vfio_device *vdev)
}
pdev->msi.pos = pos;
- pdev->irq_type = VFIO_PCI_IRQ_MSIX;
+ pdev->irq_type = cap.type == PCI_CAP_ID_MSIX ?
+ VFIO_PCI_IRQ_MSIX : VFIO_PCI_IRQ_MSI;
break;
/* Any other capability is hidden */
@@ -646,6 +771,19 @@ out_free:
return ret;
}
+static int vfio_pci_create_msi_cap(struct kvm *kvm, struct vfio_pci_device *pdev)
+{
+ struct msi_cap_64 *cap = (void *)&pdev->hdr + pdev->msi.pos;
+
+ pdev->msi.nr_entries = 1 << ((cap->ctrl & PCI_MSI_FLAGS_QMASK) >> 1),
+ pdev->msi.entries = calloc(pdev->msi.nr_entries,
+ sizeof(struct vfio_pci_msi_entry));
+ if (!pdev->msi.entries)
+ return -ENOMEM;
+
+ return 0;
+}
+
static int vfio_pci_configure_dev_regions(struct kvm *kvm,
struct vfio_device *vdev)
{
@@ -662,6 +800,9 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm,
case VFIO_PCI_IRQ_MSIX:
ret = vfio_pci_create_msix_table(kvm, pdev);
break;
+ case VFIO_PCI_IRQ_MSI:
+ ret = vfio_pci_create_msi_cap(kvm, pdev);
+ break;
default:
break;
}
@@ -900,7 +1041,8 @@ static int vfio_pci_configure_dev_irqs(struct kvm *kvm, struct vfio_device *vdev
return -EINVAL;
}
- if (pdev->irq_type == VFIO_PCI_IRQ_MSIX) {
+ if (pdev->irq_type == VFIO_PCI_IRQ_MSIX ||
+ pdev->irq_type == VFIO_PCI_IRQ_MSI) {
if (vdev->irq_info.count != pdev->msi.nr_entries) {
dev_err(vdev, "invalid number of MSIs reported by VFIO");
return -EINVAL;
When a device has MSI capability but not MSI-X, use it. This patch is untested. Consider it broken unless proven otherwise. Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com> --- include/kvm/pci.h | 23 +++++++++ vfio/pci.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 167 insertions(+), 2 deletions(-)