@@ -63,6 +63,7 @@ struct vfio_pci_core_device {
int irq_type;
int num_regions;
struct vfio_pci_region *region;
+ dma_addr_t *msi_iovas;
u8 msi_qmax;
u8 msix_bar;
u16 msix_size;
@@ -590,6 +590,8 @@ struct vfio_irq_set {
#define VFIO_IRQ_SET_ACTION_MASK (1 << 3) /* Mask interrupt */
#define VFIO_IRQ_SET_ACTION_UNMASK (1 << 4) /* Unmask interrupt */
#define VFIO_IRQ_SET_ACTION_TRIGGER (1 << 5) /* Trigger interrupt */
+#define VFIO_IRQ_SET_DATA_MSI_IOVA (1 << 6) /* Data is MSI IOVA (u64) */
+#define VFIO_IRQ_SET_ACTION_PREPARE (1 << 7) /* Prepare interrupt */
__u32 index;
__u32 start;
__u32 count;
@@ -599,10 +601,12 @@ struct vfio_irq_set {
#define VFIO_IRQ_SET_DATA_TYPE_MASK (VFIO_IRQ_SET_DATA_NONE | \
VFIO_IRQ_SET_DATA_BOOL | \
- VFIO_IRQ_SET_DATA_EVENTFD)
+ VFIO_IRQ_SET_DATA_EVENTFD | \
+ VFIO_IRQ_SET_DATA_MSI_IOVA)
#define VFIO_IRQ_SET_ACTION_TYPE_MASK (VFIO_IRQ_SET_ACTION_MASK | \
VFIO_IRQ_SET_ACTION_UNMASK | \
- VFIO_IRQ_SET_ACTION_TRIGGER)
+ VFIO_IRQ_SET_ACTION_TRIGGER | \
+ VFIO_IRQ_SET_ACTION_PREPARE)
/**
* VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
*
@@ -383,7 +383,7 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
/* return the number of supported vectors if we can't get all: */
cmd = vfio_pci_memory_lock_and_enable(vdev);
- ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
+ ret = pci_alloc_irq_vectors_iovas(pdev, 1, nvec, flag, vdev->msi_iovas);
if (ret < nvec) {
if (ret > 0)
pci_free_irq_vectors(pdev);
@@ -685,6 +685,9 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
vfio_msi_disable(vdev, msix);
+ /* FIXME we need a better cleanup routine */
+ kfree(vdev->msi_iovas);
+ vdev->msi_iovas = NULL;
return 0;
}
@@ -728,6 +731,39 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
return 0;
}
+static int vfio_pci_set_msi_prepare(struct vfio_pci_core_device *vdev,
+ unsigned index, unsigned start,
+ unsigned count, uint32_t flags, void *data)
+{
+ uint64_t *iovas = data;
+ unsigned int i;
+
+ if (!(irq_is(vdev, index) || is_irq_none(vdev)))
+ return -EINVAL;
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ if (!count)
+ return -EINVAL;
+ /* FIXME support partial unset */
+ kfree(vdev->msi_iovas);
+ vdev->msi_iovas = NULL;
+ return 0;
+ }
+
+ if (!(flags & VFIO_IRQ_SET_DATA_MSI_IOVA))
+ return -EOPNOTSUPP;
+ if (!IS_ENABLED(CONFIG_IRQ_MSI_IOMMU))
+ return -EOPNOTSUPP;
+ if (!vdev->msi_iovas)
+ vdev->msi_iovas = kcalloc(count, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!vdev->msi_iovas)
+ return -ENOMEM;
+ for (i = 0; i < count; i++)
+ vdev->msi_iovas[i] = iovas[i];
+
+ return 0;
+}
+
static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
unsigned int count, uint32_t flags,
void *data)
@@ -837,6 +873,9 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
case VFIO_IRQ_SET_ACTION_TRIGGER:
func = vfio_pci_set_msi_trigger;
break;
+ case VFIO_IRQ_SET_ACTION_PREPARE:
+ func = vfio_pci_set_msi_prepare;
+ break;
}
break;
case VFIO_PCI_ERR_IRQ_INDEX:
@@ -1554,6 +1554,9 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
case VFIO_IRQ_SET_DATA_EVENTFD:
size = sizeof(int32_t);
break;
+ case VFIO_IRQ_SET_DATA_MSI_IOVA:
+ size = sizeof(uint64_t);
+ break;
default:
return -EINVAL;
}
Add a new VFIO_IRQ_SET_ACTION_PREPARE to set VFIO_IRQ_SET_DATA_MSI_IOVA, giving user space an interface to forward to kernel the stage-1 IOVA (of a 2-stage translation: IOVA->IPA->PA) for an MSI doorbell address, since the ITS hardware needs to be programmed with the top level IOVA address, in order to work with the IOMMU on ARM64. Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> --- include/linux/vfio_pci_core.h | 1 + include/uapi/linux/vfio.h | 8 ++++-- drivers/vfio/pci/vfio_pci_intrs.c | 41 ++++++++++++++++++++++++++++++- drivers/vfio/vfio_main.c | 3 +++ 4 files changed, 50 insertions(+), 3 deletions(-)