diff mbox series

[v4,22/32] KVM: s390: pci: routines for (dis)associating zPCI devices with a KVM

Message ID 20220314194451.58266-23-mjrosato@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series KVM: s390: enable zPCI for interpretive execution | expand

Commit Message

Matthew Rosato March 14, 2022, 7:44 p.m. UTC
These routines will be wired into a KVM ioctl, to be issued from
userspace to (dis)associate a specific zPCI device with the issuing
KVM.  This will create/delete a relationship between KVM, zPCI device
and the associated IOMMU domain for the device.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |   2 +
 arch/s390/include/asm/kvm_pci.h  |   2 +
 arch/s390/kvm/kvm-s390.c         |   5 +
 arch/s390/kvm/pci.c              | 225 +++++++++++++++++++++++++++++++
 arch/s390/kvm/pci.h              |   5 +
 5 files changed, 239 insertions(+)

Comments

Jason Gunthorpe March 14, 2022, 9:46 p.m. UTC | #1
On Mon, Mar 14, 2022 at 03:44:41PM -0400, Matthew Rosato wrote:
> +int kvm_s390_pci_zpci_start(struct kvm *kvm, struct zpci_dev *zdev)
> +{
> +	struct vfio_device *vdev;
> +	struct pci_dev *pdev;
> +	int rc;
> +
> +	rc = kvm_s390_pci_dev_open(zdev);
> +	if (rc)
> +		return rc;
> +
> +	pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
> +	if (!pdev) {
> +		rc = -ENODEV;
> +		goto exit_err;
> +	}
> +
> +	vdev = get_vdev(&pdev->dev);
> +	if (!vdev) {
> +		pci_dev_put(pdev);
> +		rc = -ENODEV;
> +		goto exit_err;
> +	}
> +
> +	zdev->kzdev->nb.notifier_call = kvm_s390_pci_group_notifier;
> +
> +	/*
> +	 * At this point, a KVM should already be associated with this device,
> +	 * so registering the notifier now should immediately trigger the
> +	 * event.  We also want to know if the KVM association is later removed
> +	 * to ensure proper cleanup happens.
> +	 */
> +	rc = register_notifier(vdev->dev, &zdev->kzdev->nb);
> +
> +	put_vdev(vdev);
> +	pci_dev_put(pdev);
> +
> +	/* Make sure the registered KVM matches the KVM issuing the ioctl */
> +	if (rc || zdev->kzdev->kvm != kvm) {
> +		rc = -ENODEV;
> +		goto exit_err;
> +	}
> +
> +	/* Must support KVM-managed IOMMU to proceed */
> +	if (IS_ENABLED(CONFIG_S390_KVM_IOMMU))
> +		rc = zpci_iommu_attach_kvm(zdev, kvm);
> +	else
> +		rc = -EINVAL;

This seems like kind of a strange API, shouldn't kvm be getting a
reference on the underlying iommu_domain and then calling into it to
get the mapping table instead of pushing KVM specific logic into the
iommu driver?

I would be nice if all the special kvm stuff could more isolated in
kvm code.

I'm still a little unclear about why this is so complicated - can't
you get the iommu_domain from the group FD directly in KVM code as
power does?

Jason
Matthew Rosato March 15, 2022, 4:39 p.m. UTC | #2
On 3/14/22 5:46 PM, Jason Gunthorpe wrote:
> On Mon, Mar 14, 2022 at 03:44:41PM -0400, Matthew Rosato wrote:
>> +int kvm_s390_pci_zpci_start(struct kvm *kvm, struct zpci_dev *zdev)
>> +{
>> +	struct vfio_device *vdev;
>> +	struct pci_dev *pdev;
>> +	int rc;
>> +
>> +	rc = kvm_s390_pci_dev_open(zdev);
>> +	if (rc)
>> +		return rc;
>> +
>> +	pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
>> +	if (!pdev) {
>> +		rc = -ENODEV;
>> +		goto exit_err;
>> +	}
>> +
>> +	vdev = get_vdev(&pdev->dev);
>> +	if (!vdev) {
>> +		pci_dev_put(pdev);
>> +		rc = -ENODEV;
>> +		goto exit_err;
>> +	}
>> +
>> +	zdev->kzdev->nb.notifier_call = kvm_s390_pci_group_notifier;
>> +
>> +	/*
>> +	 * At this point, a KVM should already be associated with this device,
>> +	 * so registering the notifier now should immediately trigger the
>> +	 * event.  We also want to know if the KVM association is later removed
>> +	 * to ensure proper cleanup happens.
>> +	 */
>> +	rc = register_notifier(vdev->dev, &zdev->kzdev->nb);
>> +
>> +	put_vdev(vdev);
>> +	pci_dev_put(pdev);
>> +
>> +	/* Make sure the registered KVM matches the KVM issuing the ioctl */
>> +	if (rc || zdev->kzdev->kvm != kvm) {
>> +		rc = -ENODEV;
>> +		goto exit_err;
>> +	}
>> +
>> +	/* Must support KVM-managed IOMMU to proceed */
>> +	if (IS_ENABLED(CONFIG_S390_KVM_IOMMU))
>> +		rc = zpci_iommu_attach_kvm(zdev, kvm);
>> +	else
>> +		rc = -EINVAL;
> 
> This seems like kind of a strange API, shouldn't kvm be getting a
> reference on the underlying iommu_domain and then calling into it to
> get the mapping table instead of pushing KVM specific logic into the
> iommu driver?
> 
> I would be nice if all the special kvm stuff could more isolated in
> kvm code.
> 
> I'm still a little unclear about why this is so complicated - can't
> you get the iommu_domain from the group FD directly in KVM code as
> power does?

Yeah, I think I could do something like that using the vfio group fd 
like power does.

Providing a reference to the kvm itself inside iommu was being used for 
the pin/unpin operations, which would not be necessary if we switched to 
the 1st layer iommu pinning all of guest memory.
diff mbox series

Patch

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index bf61ab05f98c..bd171abbb8ef 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -965,6 +965,8 @@  struct kvm_arch{
 	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
 	struct kvm_s390_gisa_interrupt gisa_int;
 	struct kvm_s390_pv pv;
+	struct list_head kzdev_list;
+	spinlock_t kzdev_list_lock;
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index ebc0da5d9ac1..47ce18b5bddd 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -21,6 +21,8 @@  struct kvm_zdev {
 	struct zpci_dev *zdev;
 	struct kvm *kvm;
 	struct iommu_domain *dom; /* Used to invoke IOMMU API for RPCIT */
+	struct notifier_block nb;
+	struct list_head entry;
 };
 
 int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d91b2547f0bf..84acaf59a7d3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2775,6 +2775,9 @@  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	kvm_s390_crypto_init(kvm);
 
+	if (IS_ENABLED(CONFIG_VFIO_PCI))
+		kvm_s390_pci_init_list(kvm);
+
 	mutex_init(&kvm->arch.float_int.ais_lock);
 	spin_lock_init(&kvm->arch.float_int.lock);
 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -2860,6 +2863,8 @@  void kvm_arch_destroy_vm(struct kvm *kvm)
 	if (!kvm_is_ucontrol(kvm))
 		gmap_remove(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
+	if (IS_ENABLED(CONFIG_VFIO_PCI))
+		kvm_s390_pci_clear_list(kvm);
 	kvm_s390_clear_float_irqs(kvm);
 	kvm_s390_vsie_destroy(kvm);
 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 1c42d25de697..28fe95f13c33 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -9,6 +9,7 @@ 
 
 #include <linux/kvm_host.h>
 #include <linux/pci.h>
+#include <linux/vfio.h>
 #include <asm/kvm_pci.h>
 #include <asm/pci.h>
 #include <asm/pci_insn.h>
@@ -23,6 +24,22 @@  static inline int __set_irq_noiib(u16 ctl, u8 isc)
 	return zpci_set_irq_ctrl(ctl, isc, &iib);
 }
 
+static struct kvm_zdev *get_kzdev_by_fh(struct kvm *kvm, u32 fh)
+{
+	struct kvm_zdev *kzdev, *retval = NULL;
+
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
+		if (kzdev->zdev->fh == fh) {
+			retval = kzdev;
+			break;
+		}
+	}
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+
+	return retval;
+}
+
 /* Caller must hold the aift lock before calling this function */
 void kvm_s390_pci_aen_exit(void)
 {
@@ -153,6 +170,20 @@  int kvm_s390_pci_aen_init(u8 nisc)
 	return rc;
 }
 
+static int kvm_s390_pci_group_notifier(struct notifier_block *nb,
+				       unsigned long action, void *data)
+{
+	struct kvm_zdev *kzdev = container_of(nb, struct kvm_zdev, nb);
+
+	if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
+		if (!data || !kzdev->zdev)
+			return NOTIFY_DONE;
+		kzdev->kvm = data;
+	}
+
+	return NOTIFY_OK;
+}
+
 int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
 {
 	struct kvm_zdev *kzdev;
@@ -179,6 +210,200 @@  void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
 }
 EXPORT_SYMBOL_GPL(kvm_s390_pci_dev_release);
 
+static struct vfio_device *get_vdev(struct device *dev)
+{
+	struct vfio_device *(*fn)(struct device *dev);
+	struct vfio_device *vdev;
+
+	fn = symbol_get(vfio_device_get_from_dev);
+	if (!fn)
+		return NULL;
+
+	vdev = fn(dev);
+
+	symbol_put(vfio_device_get_from_dev);
+
+	return vdev;
+}
+
+static void put_vdev(struct vfio_device *vdev)
+{
+	void (*fn)(struct vfio_device *vdev);
+
+	fn = symbol_get(vfio_device_put);
+	if (!fn)
+		return;
+
+	fn(vdev);
+
+	symbol_put(vfio_device_put);
+}
+
+static int register_notifier(struct device *dev, struct notifier_block *nb)
+{
+	int (*fn)(struct device *dev, enum vfio_notify_type type,
+		  unsigned long *events, struct notifier_block *nb);
+	unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM;
+	int rc;
+
+	fn = symbol_get(vfio_register_notifier);
+	if (!fn)
+		return -EINVAL;
+
+	rc = fn(dev, VFIO_GROUP_NOTIFY, &events, nb);
+
+	symbol_put(vfio_register_notifier);
+
+	return rc;
+}
+
+static int unregister_notifier(struct device *dev, struct notifier_block *nb)
+{
+	int (*fn)(struct device *dev, enum vfio_notify_type type,
+		  struct notifier_block *nb);
+	int rc;
+
+	fn = symbol_get(vfio_unregister_notifier);
+	if (!fn)
+		return -EINVAL;
+
+	rc = fn(dev, VFIO_GROUP_NOTIFY, nb);
+
+	symbol_put(vfio_unregister_notifier);
+
+	return rc;
+}
+
+int kvm_s390_pci_zpci_start(struct kvm *kvm, struct zpci_dev *zdev)
+{
+	struct vfio_device *vdev;
+	struct pci_dev *pdev;
+	int rc;
+
+	rc = kvm_s390_pci_dev_open(zdev);
+	if (rc)
+		return rc;
+
+	pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+	if (!pdev) {
+		rc = -ENODEV;
+		goto exit_err;
+	}
+
+	vdev = get_vdev(&pdev->dev);
+	if (!vdev) {
+		pci_dev_put(pdev);
+		rc = -ENODEV;
+		goto exit_err;
+	}
+
+	zdev->kzdev->nb.notifier_call = kvm_s390_pci_group_notifier;
+
+	/*
+	 * At this point, a KVM should already be associated with this device,
+	 * so registering the notifier now should immediately trigger the
+	 * event.  We also want to know if the KVM association is later removed
+	 * to ensure proper cleanup happens.
+	 */
+	rc = register_notifier(vdev->dev, &zdev->kzdev->nb);
+
+	put_vdev(vdev);
+	pci_dev_put(pdev);
+
+	/* Make sure the registered KVM matches the KVM issuing the ioctl */
+	if (rc || zdev->kzdev->kvm != kvm) {
+		rc = -ENODEV;
+		goto exit_err;
+	}
+
+	/* Must support KVM-managed IOMMU to proceed */
+	if (IS_ENABLED(CONFIG_S390_KVM_IOMMU))
+		rc = zpci_iommu_attach_kvm(zdev, kvm);
+	else
+		rc = -EINVAL;
+
+	if (rc)
+		goto exit_err;
+
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+	return 0;
+
+exit_err:
+	kvm_s390_pci_dev_release(zdev);
+	return rc;
+}
+
+int kvm_s390_pci_zpci_stop(struct kvm *kvm, struct zpci_dev *zdev)
+{
+	struct vfio_device *vdev;
+	struct pci_dev *pdev;
+	int rc = 0;
+
+	if (!zdev || !zdev->kzdev)
+		return -EINVAL;
+
+	pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+	if (!pdev) {
+		rc = -ENODEV;
+		goto exit_err;
+	}
+
+	vdev = get_vdev(&pdev->dev);
+	if (!vdev) {
+		pci_dev_put(pdev);
+		rc = -ENODEV;
+		goto exit_err;
+	}
+
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_del(&zdev->kzdev->entry);
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+
+	rc = unregister_notifier(vdev->dev, &zdev->kzdev->nb);
+
+	put_vdev(vdev);
+	pci_dev_put(pdev);
+
+exit_err:
+	kvm_s390_pci_dev_release(zdev);
+	return rc;
+}
+
+void kvm_s390_pci_init_list(struct kvm *kvm)
+{
+	spin_lock_init(&kvm->arch.kzdev_list_lock);
+	INIT_LIST_HEAD(&kvm->arch.kzdev_list);
+}
+
+void kvm_s390_pci_clear_list(struct kvm *kvm)
+{
+	struct kvm_zdev *tmp, *kzdev;
+	struct vfio_device *vdev;
+	struct pci_dev *pdev;
+	LIST_HEAD(remove);
+
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_for_each_entry_safe(kzdev, tmp, &kvm->arch.kzdev_list, entry)
+		list_move_tail(&kzdev->entry, &remove);
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+
+	list_for_each_entry_safe(kzdev, tmp, &remove, entry) {
+		pdev = pci_get_slot(kzdev->zdev->zbus->bus, kzdev->zdev->devfn);
+		if (pdev) {
+			vdev = get_vdev(&pdev->dev);
+			if (vdev) {
+				unregister_notifier(vdev->dev,
+						    &kzdev->nb);
+				put_vdev(vdev);
+			}
+			pci_dev_put(pdev);
+		}
+		kvm_s390_pci_dev_release(kzdev->zdev);
+	}
+}
+
 int kvm_s390_pci_init(void)
 {
 	aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
index 25cb1c787190..a95d9fdc91be 100644
--- a/arch/s390/kvm/pci.h
+++ b/arch/s390/kvm/pci.h
@@ -47,6 +47,11 @@  static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift,
 int kvm_s390_pci_aen_init(u8 nisc);
 void kvm_s390_pci_aen_exit(void);
 
+int kvm_s390_pci_zpci_start(struct kvm *kvm, struct zpci_dev *zdev);
+int kvm_s390_pci_zpci_stop(struct kvm *kvm, struct zpci_dev *zdev);
+void kvm_s390_pci_init_list(struct kvm *kvm);
+void kvm_s390_pci_clear_list(struct kvm *kvm);
+
 int kvm_s390_pci_init(void);
 
 #endif /* __KVM_S390_PCI_H */