diff mbox series

[RFCv1,7/7] vfio/pci: Allow preset MSI IOVAs via VFIO_IRQ_SET_ACTION_PREPARE

Message ID 07623edc330420376e235607285a0f56b54787f2.1731130093.git.nicolinc@nvidia.com (mailing list archive)
State New
Headers show
Series vfio: Allow userspace to specify the address for each MSI vector | expand

Commit Message

Nicolin Chen Nov. 9, 2024, 5:48 a.m. UTC
Add a new VFIO_IRQ_SET_ACTION_PREPARE to set VFIO_IRQ_SET_DATA_MSI_IOVA,
giving user space an interface to forward to kernel the stage-1 IOVA (of
a 2-stage translation: IOVA->IPA->PA) for an MSI doorbell address, since
the ITS hardware needs to be programmed with the top level IOVA address,
in order to work with the IOMMU on ARM64.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 include/linux/vfio_pci_core.h     |  1 +
 include/uapi/linux/vfio.h         |  8 ++++--
 drivers/vfio/pci/vfio_pci_intrs.c | 41 ++++++++++++++++++++++++++++++-
 drivers/vfio/vfio_main.c          |  3 +++
 4 files changed, 50 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index fbb472dd99b3..08027b8331f0 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -63,6 +63,7 @@  struct vfio_pci_core_device {
 	int			irq_type;
 	int			num_regions;
 	struct vfio_pci_region	*region;
+	dma_addr_t		*msi_iovas;
 	u8			msi_qmax;
 	u8			msix_bar;
 	u16			msix_size;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2b68e6cdf190..d6be351abcde 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -590,6 +590,8 @@  struct vfio_irq_set {
 #define VFIO_IRQ_SET_ACTION_MASK	(1 << 3) /* Mask interrupt */
 #define VFIO_IRQ_SET_ACTION_UNMASK	(1 << 4) /* Unmask interrupt */
 #define VFIO_IRQ_SET_ACTION_TRIGGER	(1 << 5) /* Trigger interrupt */
+#define VFIO_IRQ_SET_DATA_MSI_IOVA	(1 << 6) /* Data is MSI IOVA (u64) */
+#define VFIO_IRQ_SET_ACTION_PREPARE	(1 << 7) /* Prepare interrupt */
 	__u32	index;
 	__u32	start;
 	__u32	count;
@@ -599,10 +601,12 @@  struct vfio_irq_set {
 
 #define VFIO_IRQ_SET_DATA_TYPE_MASK	(VFIO_IRQ_SET_DATA_NONE | \
 					 VFIO_IRQ_SET_DATA_BOOL | \
-					 VFIO_IRQ_SET_DATA_EVENTFD)
+					 VFIO_IRQ_SET_DATA_EVENTFD | \
+					 VFIO_IRQ_SET_DATA_MSI_IOVA)
 #define VFIO_IRQ_SET_ACTION_TYPE_MASK	(VFIO_IRQ_SET_ACTION_MASK | \
 					 VFIO_IRQ_SET_ACTION_UNMASK | \
-					 VFIO_IRQ_SET_ACTION_TRIGGER)
+					 VFIO_IRQ_SET_ACTION_TRIGGER | \
+					 VFIO_IRQ_SET_ACTION_PREPARE)
 /**
  * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
  *
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 8382c5834335..18bcdc5b1ef5 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -383,7 +383,7 @@  static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
 
 	/* return the number of supported vectors if we can't get all: */
 	cmd = vfio_pci_memory_lock_and_enable(vdev);
-	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
+	ret = pci_alloc_irq_vectors_iovas(pdev, 1, nvec, flag, vdev->msi_iovas);
 	if (ret < nvec) {
 		if (ret > 0)
 			pci_free_irq_vectors(pdev);
@@ -685,6 +685,9 @@  static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
 
 	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
 		vfio_msi_disable(vdev, msix);
+		/* FIXME we need a better cleanup routine */
+		kfree(vdev->msi_iovas);
+		vdev->msi_iovas = NULL;
 		return 0;
 	}
 
@@ -728,6 +731,39 @@  static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
 	return 0;
 }
 
+static int vfio_pci_set_msi_prepare(struct vfio_pci_core_device *vdev,
+				    unsigned index, unsigned start,
+				    unsigned count, uint32_t flags, void *data)
+{
+	uint64_t *iovas = data;
+	unsigned int i;
+
+	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
+		return -EINVAL;
+
+	if (flags & VFIO_IRQ_SET_DATA_NONE) {
+		if (!count)
+			return -EINVAL;
+		/* FIXME support partial unset */
+		kfree(vdev->msi_iovas);
+		vdev->msi_iovas = NULL;
+		return 0;
+	}
+
+	if (!(flags & VFIO_IRQ_SET_DATA_MSI_IOVA))
+		return -EOPNOTSUPP;
+	if (!IS_ENABLED(CONFIG_IRQ_MSI_IOMMU))
+		return -EOPNOTSUPP;
+	if (!vdev->msi_iovas)
+		vdev->msi_iovas = kcalloc(count, sizeof(dma_addr_t), GFP_KERNEL);
+	if (!vdev->msi_iovas)
+		return -ENOMEM;
+	for (i = 0; i < count; i++)
+		vdev->msi_iovas[i] = iovas[i];
+
+	return 0;
+}
+
 static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
 					   unsigned int count, uint32_t flags,
 					   void *data)
@@ -837,6 +873,9 @@  int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
 		case VFIO_IRQ_SET_ACTION_TRIGGER:
 			func = vfio_pci_set_msi_trigger;
 			break;
+		case VFIO_IRQ_SET_ACTION_PREPARE:
+			func = vfio_pci_set_msi_prepare;
+			break;
 		}
 		break;
 	case VFIO_PCI_ERR_IRQ_INDEX:
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index a5a62d9d963f..61211c082a64 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1554,6 +1554,9 @@  int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
 	case VFIO_IRQ_SET_DATA_EVENTFD:
 		size = sizeof(int32_t);
 		break;
+	case VFIO_IRQ_SET_DATA_MSI_IOVA:
+		size = sizeof(uint64_t);
+		break;
 	default:
 		return -EINVAL;
 	}