Message ID | 1-v3-7593f297c43f+56ce-vfio_unmap_notif_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Remove the VFIO_IOMMU_NOTIFY_DMA_UNMAP notifier | expand |
On 2022.07.04 21:59:03 -0300, Jason Gunthorpe wrote: > Instead of having drivers register the notifier with explicit code just > have them provide a dma_unmap callback op in their driver ops and rely on > the core code to wire it up. > > Suggested-by: Christoph Hellwig <hch@lst.de> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Reviewed-by: Kevin Tian <kevin.tian@intel.com> > Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com> > Reviewed-by: Eric Farman <farman@linux.ibm.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/gpu/drm/i915/gvt/gvt.h | 1 - > drivers/gpu/drm/i915/gvt/kvmgt.c | 75 ++++----------- gvt change looks fine to me. Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com> > drivers/s390/cio/vfio_ccw_ops.c | 41 ++------ > drivers/s390/cio/vfio_ccw_private.h | 2 - > drivers/s390/crypto/vfio_ap_ops.c | 53 ++--------- > drivers/s390/crypto/vfio_ap_private.h | 3 - > drivers/vfio/vfio.c | 129 +++++++++----------------- > drivers/vfio/vfio.h | 3 + > include/linux/vfio.h | 21 +---- > 9 files changed, 88 insertions(+), 240 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h > index aee1a45da74bcb..705689e6401197 100644 > --- a/drivers/gpu/drm/i915/gvt/gvt.h > +++ b/drivers/gpu/drm/i915/gvt/gvt.h > @@ -226,7 +226,6 @@ struct intel_vgpu { > unsigned long nr_cache_entries; > struct mutex cache_lock; > > - struct notifier_block iommu_notifier; > atomic_t released; > > struct kvm_page_track_notifier_node track_node; > diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c > index e2f6c56ab3420c..ecd5bb37b63a2a 100644 > --- a/drivers/gpu/drm/i915/gvt/kvmgt.c > +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c > @@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num) > return ret; > } > > -static int intel_vgpu_iommu_notifier(struct notifier_block *nb, > - unsigned long action, void *data) > +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova, > + u64 length) > { > - struct intel_vgpu *vgpu = > - container_of(nb, struct intel_vgpu, iommu_notifier); > - > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { > - struct vfio_iommu_type1_dma_unmap *unmap = data; > - struct gvt_dma *entry; > - unsigned long iov_pfn, end_iov_pfn; > + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); > + struct gvt_dma *entry; > + u64 iov_pfn = iova >> PAGE_SHIFT; > + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE; > > - iov_pfn = unmap->iova >> PAGE_SHIFT; > - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; > + mutex_lock(&vgpu->cache_lock); > + for (; iov_pfn < end_iov_pfn; iov_pfn++) { > + entry = __gvt_cache_find_gfn(vgpu, iov_pfn); > + if (!entry) > + continue; > > - mutex_lock(&vgpu->cache_lock); > - for (; iov_pfn < end_iov_pfn; iov_pfn++) { > - entry = __gvt_cache_find_gfn(vgpu, iov_pfn); > - if (!entry) > - continue; > - > - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, > - entry->size); > - __gvt_cache_remove_entry(vgpu, entry); > - } > - mutex_unlock(&vgpu->cache_lock); > + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, > + entry->size); > + __gvt_cache_remove_entry(vgpu, entry); > } > - > - return NOTIFY_OK; > + mutex_unlock(&vgpu->cache_lock); > } > > static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) > @@ -783,36 +774,20 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) > static int intel_vgpu_open_device(struct vfio_device *vfio_dev) > { > struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); > - unsigned long events; > - int ret; > - > - vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; > > - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; > - ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events, > - &vgpu->iommu_notifier); > - if (ret != 0) { > - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", > - ret); > - goto out; > - } > - > - ret = -EEXIST; > if (vgpu->attached) > - goto undo_iommu; > + return -EEXIST; > > - ret = -ESRCH; > if (!vgpu->vfio_device.kvm || > vgpu->vfio_device.kvm->mm != current->mm) { > gvt_vgpu_err("KVM is required to use Intel vGPU\n"); > - goto undo_iommu; > + return -ESRCH; > } > > kvm_get_kvm(vgpu->vfio_device.kvm); > > - ret = -EEXIST; > if (__kvmgt_vgpu_exist(vgpu)) > - goto undo_iommu; > + return -EEXIST; > > vgpu->attached = true; > > @@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) > > atomic_set(&vgpu->released, 0); > return 0; > - > -undo_iommu: > - vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, > - &vgpu->iommu_notifier); > -out: > - return ret; > } > > static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) > @@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) > static void intel_vgpu_close_device(struct vfio_device *vfio_dev) > { > struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); > - struct drm_i915_private *i915 = vgpu->gvt->gt->i915; > - int ret; > > if (!vgpu->attached) > return; > @@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) > > intel_gvt_release_vgpu(vgpu); > > - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY, > - &vgpu->iommu_notifier); > - drm_WARN(&i915->drm, ret, > - "vfio_unregister_notifier for iommu failed: %d\n", ret); > - > debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); > > kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, > @@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { > .write = intel_vgpu_write, > .mmap = intel_vgpu_mmap, > .ioctl = intel_vgpu_ioctl, > + .dma_unmap = intel_vgpu_dma_unmap, > }; > > static int intel_vgpu_probe(struct mdev_device *mdev) > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c > index b49e2e9db2dc6f..09e0ce7b72324c 100644 > --- a/drivers/s390/cio/vfio_ccw_ops.c > +++ b/drivers/s390/cio/vfio_ccw_ops.c > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) > return ret; > } > > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb, > - unsigned long action, > - void *data) > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) > { > struct vfio_ccw_private *private = > - container_of(nb, struct vfio_ccw_private, nb); > - > - /* > - * Vendor drivers MUST unpin pages in response to an > - * invalidation. > - */ > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { > - struct vfio_iommu_type1_dma_unmap *unmap = data; > - > - if (!cp_iova_pinned(&private->cp, unmap->iova)) > - return NOTIFY_OK; > + container_of(vdev, struct vfio_ccw_private, vdev); > > - if (vfio_ccw_mdev_reset(private)) > - return NOTIFY_BAD; > + /* Drivers MUST unpin pages in response to an invalidation. */ > + if (!cp_iova_pinned(&private->cp, iova)) > + return; > > - cp_free(&private->cp); > - return NOTIFY_OK; > - } > + if (vfio_ccw_mdev_reset(private)) > + return; > > - return NOTIFY_DONE; > + cp_free(&private->cp); > } > > static ssize_t name_show(struct mdev_type *mtype, > @@ -178,19 +166,11 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) > { > struct vfio_ccw_private *private = > container_of(vdev, struct vfio_ccw_private, vdev); > - unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; > int ret; > > - private->nb.notifier_call = vfio_ccw_mdev_notifier; > - > - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, > - &events, &private->nb); > - if (ret) > - return ret; > - > ret = vfio_ccw_register_async_dev_regions(private); > if (ret) > - goto out_unregister; > + return ret; > > ret = vfio_ccw_register_schib_dev_regions(private); > if (ret) > @@ -204,7 +184,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) > > out_unregister: > vfio_ccw_unregister_dev_regions(private); > - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); > return ret; > } > > @@ -222,7 +201,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev) > > cp_free(&private->cp); > vfio_ccw_unregister_dev_regions(private); > - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); > } > > static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private, > @@ -645,6 +623,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = { > .write = vfio_ccw_mdev_write, > .ioctl = vfio_ccw_mdev_ioctl, > .request = vfio_ccw_mdev_request, > + .dma_unmap = vfio_ccw_dma_unmap, > }; > > struct mdev_driver vfio_ccw_mdev_driver = { > diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h > index b7163bac8cc75d..61418109238976 100644 > --- a/drivers/s390/cio/vfio_ccw_private.h > +++ b/drivers/s390/cio/vfio_ccw_private.h > @@ -74,7 +74,6 @@ struct vfio_ccw_crw { > * @completion: synchronization helper of the I/O completion > * @avail: available for creating a mediated device > * @mdev: pointer to the mediated device > - * @nb: notifier for vfio events > * @io_region: MMIO region to input/output I/O arguments/results > * @io_mutex: protect against concurrent update of I/O regions > * @region: additional regions for other subchannel operations > @@ -98,7 +97,6 @@ struct vfio_ccw_private { > struct completion *completion; > atomic_t avail; > struct mdev_device *mdev; > - struct notifier_block nb; > struct ccw_io_region *io_region; > struct mutex io_mutex; > struct vfio_ccw_region *region; > diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c > index a7d2a95796d360..bb1a1677c5c230 100644 > --- a/drivers/s390/crypto/vfio_ap_ops.c > +++ b/drivers/s390/crypto/vfio_ap_ops.c > @@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, > return 0; > } > > -/** > - * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback > - * > - * @nb: The notifier block > - * @action: Action to be taken > - * @data: data associated with the request > - * > - * For an UNMAP request, unpin the guest IOVA (the NIB guest address we > - * pinned before). Other requests are ignored. > - * > - * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE. > - */ > -static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, > - unsigned long action, void *data) > +static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, > + u64 length) > { > - struct ap_matrix_mdev *matrix_mdev; > - > - matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier); > - > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { > - struct vfio_iommu_type1_dma_unmap *unmap = data; > - unsigned long g_pfn = unmap->iova >> PAGE_SHIFT; > - > - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); > - return NOTIFY_OK; > - } > + struct ap_matrix_mdev *matrix_mdev = > + container_of(vdev, struct ap_matrix_mdev, vdev); > + unsigned long g_pfn = iova >> PAGE_SHIFT; > > - return NOTIFY_DONE; > + vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); > } > > /** > @@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) > { > struct ap_matrix_mdev *matrix_mdev = > container_of(vdev, struct ap_matrix_mdev, vdev); > - unsigned long events; > - int ret; > > if (!vdev->kvm) > return -EINVAL; > > - ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); > - if (ret) > - return ret; > - > - matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; > - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; > - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events, > - &matrix_mdev->iommu_notifier); > - if (ret) > - goto err_kvm; > - return 0; > - > -err_kvm: > - vfio_ap_mdev_unset_kvm(matrix_mdev); > - return ret; > + return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); > } > > static void vfio_ap_mdev_close_device(struct vfio_device *vdev) > @@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev) > struct ap_matrix_mdev *matrix_mdev = > container_of(vdev, struct ap_matrix_mdev, vdev); > > - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, > - &matrix_mdev->iommu_notifier); > vfio_ap_mdev_unset_kvm(matrix_mdev); > } > > @@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { > .open_device = vfio_ap_mdev_open_device, > .close_device = vfio_ap_mdev_close_device, > .ioctl = vfio_ap_mdev_ioctl, > + .dma_unmap = vfio_ap_mdev_dma_unmap, > }; > > static struct mdev_driver vfio_ap_matrix_driver = { > diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h > index a26efd804d0df3..abb59d59f81b20 100644 > --- a/drivers/s390/crypto/vfio_ap_private.h > +++ b/drivers/s390/crypto/vfio_ap_private.h > @@ -81,8 +81,6 @@ struct ap_matrix { > * @node: allows the ap_matrix_mdev struct to be added to a list > * @matrix: the adapters, usage domains and control domains assigned to the > * mediated matrix device. > - * @iommu_notifier: notifier block used for specifying callback function for > - * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even > * @kvm: the struct holding guest's state > * @pqap_hook: the function pointer to the interception handler for the > * PQAP(AQIC) instruction. > @@ -92,7 +90,6 @@ struct ap_matrix_mdev { > struct vfio_device vdev; > struct list_head node; > struct ap_matrix matrix; > - struct notifier_block iommu_notifier; > struct kvm *kvm; > crypto_hook pqap_hook; > struct mdev_device *mdev; > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index 61e71c1154be67..610bb884d9197b 100644 > --- a/drivers/vfio/vfio.c > +++ b/drivers/vfio/vfio.c > @@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) > { > struct vfio_iommu_driver *driver, *tmp; > > + if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier)) > + return -EINVAL; > + > driver = kzalloc(sizeof(*driver), GFP_KERNEL); > if (!driver) > return -ENOMEM; > @@ -1077,8 +1080,20 @@ static void vfio_device_unassign_container(struct vfio_device *device) > up_write(&device->group->group_rwsem); > } > > +static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action, > + void *data) > +{ > + struct vfio_device *vfio_device = > + container_of(nb, struct vfio_device, iommu_nb); > + struct vfio_iommu_type1_dma_unmap *unmap = data; > + > + vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size); > + return NOTIFY_OK; > +} > + > static struct file *vfio_device_open(struct vfio_device *device) > { > + struct vfio_iommu_driver *iommu_driver; > struct file *filep; > int ret; > > @@ -1109,6 +1124,18 @@ static struct file *vfio_device_open(struct vfio_device *device) > if (ret) > goto err_undo_count; > } > + > + iommu_driver = device->group->container->iommu_driver; > + if (device->ops->dma_unmap && iommu_driver && > + iommu_driver->ops->register_notifier) { > + unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; > + > + device->iommu_nb.notifier_call = vfio_iommu_notifier; > + iommu_driver->ops->register_notifier( > + device->group->container->iommu_data, &events, > + &device->iommu_nb); > + } > + > up_read(&device->group->group_rwsem); > } > mutex_unlock(&device->dev_set->lock); > @@ -1143,8 +1170,16 @@ static struct file *vfio_device_open(struct vfio_device *device) > err_close_device: > mutex_lock(&device->dev_set->lock); > down_read(&device->group->group_rwsem); > - if (device->open_count == 1 && device->ops->close_device) > + if (device->open_count == 1 && device->ops->close_device) { > device->ops->close_device(device); > + > + iommu_driver = device->group->container->iommu_driver; > + if (device->ops->dma_unmap && iommu_driver && > + iommu_driver->ops->unregister_notifier) > + iommu_driver->ops->unregister_notifier( > + device->group->container->iommu_data, > + &device->iommu_nb); > + } > err_undo_count: > device->open_count--; > if (device->open_count == 0 && device->kvm) > @@ -1339,12 +1374,20 @@ static const struct file_operations vfio_group_fops = { > static int vfio_device_fops_release(struct inode *inode, struct file *filep) > { > struct vfio_device *device = filep->private_data; > + struct vfio_iommu_driver *iommu_driver; > > mutex_lock(&device->dev_set->lock); > vfio_assert_device_open(device); > down_read(&device->group->group_rwsem); > if (device->open_count == 1 && device->ops->close_device) > device->ops->close_device(device); > + > + iommu_driver = device->group->container->iommu_driver; > + if (device->ops->dma_unmap && iommu_driver && > + iommu_driver->ops->unregister_notifier) > + iommu_driver->ops->unregister_notifier( > + device->group->container->iommu_data, > + &device->iommu_nb); > up_read(&device->group->group_rwsem); > device->open_count--; > if (device->open_count == 0) > @@ -2027,90 +2070,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, > } > EXPORT_SYMBOL(vfio_dma_rw); > > -static int vfio_register_iommu_notifier(struct vfio_group *group, > - unsigned long *events, > - struct notifier_block *nb) > -{ > - struct vfio_container *container; > - struct vfio_iommu_driver *driver; > - int ret; > - > - lockdep_assert_held_read(&group->group_rwsem); > - > - container = group->container; > - driver = container->iommu_driver; > - if (likely(driver && driver->ops->register_notifier)) > - ret = driver->ops->register_notifier(container->iommu_data, > - events, nb); > - else > - ret = -ENOTTY; > - > - return ret; > -} > - > -static int vfio_unregister_iommu_notifier(struct vfio_group *group, > - struct notifier_block *nb) > -{ > - struct vfio_container *container; > - struct vfio_iommu_driver *driver; > - int ret; > - > - lockdep_assert_held_read(&group->group_rwsem); > - > - container = group->container; > - driver = container->iommu_driver; > - if (likely(driver && driver->ops->unregister_notifier)) > - ret = driver->ops->unregister_notifier(container->iommu_data, > - nb); > - else > - ret = -ENOTTY; > - > - return ret; > -} > - > -int vfio_register_notifier(struct vfio_device *device, > - enum vfio_notify_type type, unsigned long *events, > - struct notifier_block *nb) > -{ > - struct vfio_group *group = device->group; > - int ret; > - > - if (!nb || !events || (*events == 0) || > - !vfio_assert_device_open(device)) > - return -EINVAL; > - > - switch (type) { > - case VFIO_IOMMU_NOTIFY: > - ret = vfio_register_iommu_notifier(group, events, nb); > - break; > - default: > - ret = -EINVAL; > - } > - return ret; > -} > -EXPORT_SYMBOL(vfio_register_notifier); > - > -int vfio_unregister_notifier(struct vfio_device *device, > - enum vfio_notify_type type, > - struct notifier_block *nb) > -{ > - struct vfio_group *group = device->group; > - int ret; > - > - if (!nb || !vfio_assert_device_open(device)) > - return -EINVAL; > - > - switch (type) { > - case VFIO_IOMMU_NOTIFY: > - ret = vfio_unregister_iommu_notifier(group, nb); > - break; > - default: > - ret = -EINVAL; > - } > - return ret; > -} > -EXPORT_SYMBOL(vfio_unregister_notifier); > - > /* > * Module/class support > */ > diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h > index a6713022115155..25da02ca1568fc 100644 > --- a/drivers/vfio/vfio.h > +++ b/drivers/vfio/vfio.h > @@ -33,6 +33,9 @@ enum vfio_iommu_notify_type { > VFIO_IOMMU_CONTAINER_CLOSE = 0, > }; > > +/* events for register_notifier() */ > +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) > + > /** > * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks > */ > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > index 49580fa2073a8d..8c5c389d42d918 100644 > --- a/include/linux/vfio.h > +++ b/include/linux/vfio.h > @@ -44,6 +44,7 @@ struct vfio_device { > unsigned int open_count; > struct completion comp; > struct list_head group_next; > + struct notifier_block iommu_nb; > }; > > /** > @@ -60,6 +61,8 @@ struct vfio_device { > * @match: Optional device name match callback (return: 0 for no-match, >0 for > * match, -errno for abort (ex. match with insufficient or incorrect > * additional args) > + * @dma_unmap: Called when userspace unmaps IOVA from the container > + * this device is attached to. > * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl > * @migration_set_state: Optional callback to change the migration state for > * devices that support migration. It's mandatory for > @@ -85,6 +88,7 @@ struct vfio_device_ops { > int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); > void (*request)(struct vfio_device *vdev, unsigned int count); > int (*match)(struct vfio_device *vdev, char *buf); > + void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); > int (*device_feature)(struct vfio_device *device, u32 flags, > void __user *arg, size_t argsz); > struct file *(*migration_set_state)( > @@ -154,23 +158,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, > int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, > void *data, size_t len, bool write); > > -/* each type has independent events */ > -enum vfio_notify_type { > - VFIO_IOMMU_NOTIFY = 0, > -}; > - > -/* events for VFIO_IOMMU_NOTIFY */ > -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) > - > -int vfio_register_notifier(struct vfio_device *device, > - enum vfio_notify_type type, > - unsigned long *required_events, > - struct notifier_block *nb); > -int vfio_unregister_notifier(struct vfio_device *device, > - enum vfio_notify_type type, > - struct notifier_block *nb); > - > - > /* > * Sub-module helpers > */ > -- > 2.37.0 >
On Mon, 4 Jul 2022 21:59:03 -0300 Jason Gunthorpe <jgg@nvidia.com> wrote: > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c > index b49e2e9db2dc6f..09e0ce7b72324c 100644 > --- a/drivers/s390/cio/vfio_ccw_ops.c > +++ b/drivers/s390/cio/vfio_ccw_ops.c > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) > return ret; > } > > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb, > - unsigned long action, > - void *data) > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) > { > struct vfio_ccw_private *private = > - container_of(nb, struct vfio_ccw_private, nb); > - > - /* > - * Vendor drivers MUST unpin pages in response to an > - * invalidation. > - */ > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { > - struct vfio_iommu_type1_dma_unmap *unmap = data; > - > - if (!cp_iova_pinned(&private->cp, unmap->iova)) > - return NOTIFY_OK; > + container_of(vdev, struct vfio_ccw_private, vdev); > > - if (vfio_ccw_mdev_reset(private)) > - return NOTIFY_BAD; > + /* Drivers MUST unpin pages in response to an invalidation. */ > + if (!cp_iova_pinned(&private->cp, iova)) > + return; > > - cp_free(&private->cp); > - return NOTIFY_OK; > - } > + if (vfio_ccw_mdev_reset(private)) > + return; > > - return NOTIFY_DONE; > + cp_free(&private->cp); > } The cp_free() call is gone here with [1], so I think this function now just ends with: ... vfio_ccw_mdev_reset(private); } There are also minor contextual differences elsewhere from that series, so a quick respin to record the changes on list would be appreciated. However the above kind of highlights that NOTIFY_BAD that silently gets dropped here. I realize we weren't testing the return value of the notifier call chain and really we didn't intend that notifiers could return a failure here, but does this warrant some logging or suggest future work to allow a device to go offline here? Thanks. Alex [1]https://lore.kernel.org/all/20220707135737.720765-1-farman@linux.ibm.com/
On Thu, Jul 07, 2022 at 03:37:16PM -0600, Alex Williamson wrote: > On Mon, 4 Jul 2022 21:59:03 -0300 > Jason Gunthorpe <jgg@nvidia.com> wrote: > > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c > > index b49e2e9db2dc6f..09e0ce7b72324c 100644 > > --- a/drivers/s390/cio/vfio_ccw_ops.c > > +++ b/drivers/s390/cio/vfio_ccw_ops.c > > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) > > return ret; > > } > > > > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb, > > - unsigned long action, > > - void *data) > > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) > > { > > struct vfio_ccw_private *private = > > - container_of(nb, struct vfio_ccw_private, nb); > > - > > - /* > > - * Vendor drivers MUST unpin pages in response to an > > - * invalidation. > > - */ > > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { > > - struct vfio_iommu_type1_dma_unmap *unmap = data; > > - > > - if (!cp_iova_pinned(&private->cp, unmap->iova)) > > - return NOTIFY_OK; > > + container_of(vdev, struct vfio_ccw_private, vdev); > > > > - if (vfio_ccw_mdev_reset(private)) > > - return NOTIFY_BAD; > > + /* Drivers MUST unpin pages in response to an invalidation. */ > > + if (!cp_iova_pinned(&private->cp, iova)) > > + return; > > > > - cp_free(&private->cp); > > - return NOTIFY_OK; > > - } > > + if (vfio_ccw_mdev_reset(private)) > > + return; > > > > - return NOTIFY_DONE; > > + cp_free(&private->cp); > > } > > > The cp_free() call is gone here with [1], so I think this function now > just ends with: > > ... > vfio_ccw_mdev_reset(private); > } > > There are also minor contextual differences elsewhere from that series, > so a quick respin to record the changes on list would be appreciated. > > However the above kind of highlights that NOTIFY_BAD that silently gets > dropped here. I realize we weren't testing the return value of the > notifier call chain and really we didn't intend that notifiers could > return a failure here, but does this warrant some logging or suggest > future work to allow a device to go offline here? Thanks. It looks like no. If the FSM trapped in a bad state here, such as VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned the pages and this is considered a success for this purpose The return code here exists only to return to userspace so it can detect during a VFIO_DEVICE_RESET that the device has crashed irrecoverably. Thus just continuing to silently ignore it seems like the best thing. Jason
On Tue, Jul 19 2022, Jason Gunthorpe <jgg@nvidia.com> wrote: > On Thu, Jul 07, 2022 at 03:37:16PM -0600, Alex Williamson wrote: >> On Mon, 4 Jul 2022 21:59:03 -0300 >> Jason Gunthorpe <jgg@nvidia.com> wrote: >> > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c >> > index b49e2e9db2dc6f..09e0ce7b72324c 100644 >> > --- a/drivers/s390/cio/vfio_ccw_ops.c >> > +++ b/drivers/s390/cio/vfio_ccw_ops.c >> > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) >> > return ret; >> > } >> > >> > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb, >> > - unsigned long action, >> > - void *data) >> > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) >> > { >> > struct vfio_ccw_private *private = >> > - container_of(nb, struct vfio_ccw_private, nb); >> > - >> > - /* >> > - * Vendor drivers MUST unpin pages in response to an >> > - * invalidation. >> > - */ >> > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { >> > - struct vfio_iommu_type1_dma_unmap *unmap = data; >> > - >> > - if (!cp_iova_pinned(&private->cp, unmap->iova)) >> > - return NOTIFY_OK; >> > + container_of(vdev, struct vfio_ccw_private, vdev); >> > >> > - if (vfio_ccw_mdev_reset(private)) >> > - return NOTIFY_BAD; >> > + /* Drivers MUST unpin pages in response to an invalidation. */ >> > + if (!cp_iova_pinned(&private->cp, iova)) >> > + return; >> > >> > - cp_free(&private->cp); >> > - return NOTIFY_OK; >> > - } >> > + if (vfio_ccw_mdev_reset(private)) >> > + return; >> > >> > - return NOTIFY_DONE; >> > + cp_free(&private->cp); >> > } >> >> >> The cp_free() call is gone here with [1], so I think this function now >> just ends with: >> >> ... >> vfio_ccw_mdev_reset(private); >> } >> >> There are also minor contextual differences elsewhere from that series, >> so a quick respin to record the changes on list would be appreciated. >> >> However the above kind of highlights that NOTIFY_BAD that silently gets >> dropped here. I realize we weren't testing the return value of the >> notifier call chain and really we didn't intend that notifiers could >> return a failure here, but does this warrant some logging or suggest >> future work to allow a device to go offline here? Thanks. > > It looks like no. > > If the FSM trapped in a bad state here, such as > VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned > the pages and this is considered a success for this purpose A rather pathological case would be a subchannel that cannot be quiesced and does not end up being non-operational; in theory, the hardware could still try to access the buffers we provided for I/O. I'd say that is extremely unlikely, we might log it, but really cannot do anything else. > > The return code here exists only to return to userspace so it can > detect during a VFIO_DEVICE_RESET that the device has crashed > irrecoverably. Does it imply only that ("it's dead, Jim"), or can it also imply a runaway device? Not that userspace can do much in any case. > > Thus just continuing to silently ignore it seems like the best thing. > > Jason
On Wed, Jul 20, 2022 at 09:47:12AM +0200, Cornelia Huck wrote: > > If the FSM trapped in a bad state here, such as > > VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned > > the pages and this is considered a success for this purpose > > A rather pathological case would be a subchannel that cannot be > quiesced and does not end up being non-operational; in theory, the > hardware could still try to access the buffers we provided for I/O. I'd > say that is extremely unlikely, we might log it, but really cannot do > anything else. I think if the FSM can't reach NOT_OPER then it would be appropriate to panic the kernel when it realizes it has lost control of the device. > > The return code here exists only to return to userspace so it can > > detect during a VFIO_DEVICE_RESET that the device has crashed > > irrecoverably. > > Does it imply only that ("it's dead, Jim"), or can it also imply a > runaway device? Not that userspace can do much in any case. The kernel cannot permit a runaway device, the driver must panic if it unable to quiet the device's DMA. I assume this return from RESET is for cases where quieting was successful. Jason
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index aee1a45da74bcb..705689e6401197 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -226,7 +226,6 @@ struct intel_vgpu { unsigned long nr_cache_entries; struct mutex cache_lock; - struct notifier_block iommu_notifier; atomic_t released; struct kvm_page_track_notifier_node track_node; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index e2f6c56ab3420c..ecd5bb37b63a2a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num) return ret; } -static int intel_vgpu_iommu_notifier(struct notifier_block *nb, - unsigned long action, void *data) +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova, + u64 length) { - struct intel_vgpu *vgpu = - container_of(nb, struct intel_vgpu, iommu_notifier); - - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - struct gvt_dma *entry; - unsigned long iov_pfn, end_iov_pfn; + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); + struct gvt_dma *entry; + u64 iov_pfn = iova >> PAGE_SHIFT; + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE; - iov_pfn = unmap->iova >> PAGE_SHIFT; - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; + mutex_lock(&vgpu->cache_lock); + for (; iov_pfn < end_iov_pfn; iov_pfn++) { + entry = __gvt_cache_find_gfn(vgpu, iov_pfn); + if (!entry) + continue; - mutex_lock(&vgpu->cache_lock); - for (; iov_pfn < end_iov_pfn; iov_pfn++) { - entry = __gvt_cache_find_gfn(vgpu, iov_pfn); - if (!entry) - continue; - - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, - entry->size); - __gvt_cache_remove_entry(vgpu, entry); - } - mutex_unlock(&vgpu->cache_lock); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, + entry->size); + __gvt_cache_remove_entry(vgpu, entry); } - - return NOTIFY_OK; + mutex_unlock(&vgpu->cache_lock); } static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) @@ -783,36 +774,20 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) static int intel_vgpu_open_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - unsigned long events; - int ret; - - vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events, - &vgpu->iommu_notifier); - if (ret != 0) { - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", - ret); - goto out; - } - - ret = -EEXIST; if (vgpu->attached) - goto undo_iommu; + return -EEXIST; - ret = -ESRCH; if (!vgpu->vfio_device.kvm || vgpu->vfio_device.kvm->mm != current->mm) { gvt_vgpu_err("KVM is required to use Intel vGPU\n"); - goto undo_iommu; + return -ESRCH; } kvm_get_kvm(vgpu->vfio_device.kvm); - ret = -EEXIST; if (__kvmgt_vgpu_exist(vgpu)) - goto undo_iommu; + return -EEXIST; vgpu->attached = true; @@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) atomic_set(&vgpu->released, 0); return 0; - -undo_iommu: - vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, - &vgpu->iommu_notifier); -out: - return ret; } static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) @@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) static void intel_vgpu_close_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - struct drm_i915_private *i915 = vgpu->gvt->gt->i915; - int ret; if (!vgpu->attached) return; @@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) intel_gvt_release_vgpu(vgpu); - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY, - &vgpu->iommu_notifier); - drm_WARN(&i915->drm, ret, - "vfio_unregister_notifier for iommu failed: %d\n", ret); - debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, @@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .write = intel_vgpu_write, .mmap = intel_vgpu_mmap, .ioctl = intel_vgpu_ioctl, + .dma_unmap = intel_vgpu_dma_unmap, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index b49e2e9db2dc6f..09e0ce7b72324c 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) return ret; } -static int vfio_ccw_mdev_notifier(struct notifier_block *nb, - unsigned long action, - void *data) +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) { struct vfio_ccw_private *private = - container_of(nb, struct vfio_ccw_private, nb); - - /* - * Vendor drivers MUST unpin pages in response to an - * invalidation. - */ - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - - if (!cp_iova_pinned(&private->cp, unmap->iova)) - return NOTIFY_OK; + container_of(vdev, struct vfio_ccw_private, vdev); - if (vfio_ccw_mdev_reset(private)) - return NOTIFY_BAD; + /* Drivers MUST unpin pages in response to an invalidation. */ + if (!cp_iova_pinned(&private->cp, iova)) + return; - cp_free(&private->cp); - return NOTIFY_OK; - } + if (vfio_ccw_mdev_reset(private)) + return; - return NOTIFY_DONE; + cp_free(&private->cp); } static ssize_t name_show(struct mdev_type *mtype, @@ -178,19 +166,11 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) { struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); - unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; int ret; - private->nb.notifier_call = vfio_ccw_mdev_notifier; - - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, - &events, &private->nb); - if (ret) - return ret; - ret = vfio_ccw_register_async_dev_regions(private); if (ret) - goto out_unregister; + return ret; ret = vfio_ccw_register_schib_dev_regions(private); if (ret) @@ -204,7 +184,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) out_unregister: vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); return ret; } @@ -222,7 +201,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev) cp_free(&private->cp); vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); } static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private, @@ -645,6 +623,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = { .write = vfio_ccw_mdev_write, .ioctl = vfio_ccw_mdev_ioctl, .request = vfio_ccw_mdev_request, + .dma_unmap = vfio_ccw_dma_unmap, }; struct mdev_driver vfio_ccw_mdev_driver = { diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index b7163bac8cc75d..61418109238976 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -74,7 +74,6 @@ struct vfio_ccw_crw { * @completion: synchronization helper of the I/O completion * @avail: available for creating a mediated device * @mdev: pointer to the mediated device - * @nb: notifier for vfio events * @io_region: MMIO region to input/output I/O arguments/results * @io_mutex: protect against concurrent update of I/O regions * @region: additional regions for other subchannel operations @@ -98,7 +97,6 @@ struct vfio_ccw_private { struct completion *completion; atomic_t avail; struct mdev_device *mdev; - struct notifier_block nb; struct ccw_io_region *io_region; struct mutex io_mutex; struct vfio_ccw_region *region; diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index a7d2a95796d360..bb1a1677c5c230 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, return 0; } -/** - * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback - * - * @nb: The notifier block - * @action: Action to be taken - * @data: data associated with the request - * - * For an UNMAP request, unpin the guest IOVA (the NIB guest address we - * pinned before). Other requests are ignored. - * - * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE. - */ -static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, - unsigned long action, void *data) +static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, + u64 length) { - struct ap_matrix_mdev *matrix_mdev; - - matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier); - - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - unsigned long g_pfn = unmap->iova >> PAGE_SHIFT; - - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); - return NOTIFY_OK; - } + struct ap_matrix_mdev *matrix_mdev = + container_of(vdev, struct ap_matrix_mdev, vdev); + unsigned long g_pfn = iova >> PAGE_SHIFT; - return NOTIFY_DONE; + vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); } /** @@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) { struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - unsigned long events; - int ret; if (!vdev->kvm) return -EINVAL; - ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); - if (ret) - return ret; - - matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events, - &matrix_mdev->iommu_notifier); - if (ret) - goto err_kvm; - return 0; - -err_kvm: - vfio_ap_mdev_unset_kvm(matrix_mdev); - return ret; + return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); } static void vfio_ap_mdev_close_device(struct vfio_device *vdev) @@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev) struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, - &matrix_mdev->iommu_notifier); vfio_ap_mdev_unset_kvm(matrix_mdev); } @@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { .open_device = vfio_ap_mdev_open_device, .close_device = vfio_ap_mdev_close_device, .ioctl = vfio_ap_mdev_ioctl, + .dma_unmap = vfio_ap_mdev_dma_unmap, }; static struct mdev_driver vfio_ap_matrix_driver = { diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index a26efd804d0df3..abb59d59f81b20 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -81,8 +81,6 @@ struct ap_matrix { * @node: allows the ap_matrix_mdev struct to be added to a list * @matrix: the adapters, usage domains and control domains assigned to the * mediated matrix device. - * @iommu_notifier: notifier block used for specifying callback function for - * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even * @kvm: the struct holding guest's state * @pqap_hook: the function pointer to the interception handler for the * PQAP(AQIC) instruction. @@ -92,7 +90,6 @@ struct ap_matrix_mdev { struct vfio_device vdev; struct list_head node; struct ap_matrix matrix; - struct notifier_block iommu_notifier; struct kvm *kvm; crypto_hook pqap_hook; struct mdev_device *mdev; diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be67..610bb884d9197b 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) { struct vfio_iommu_driver *driver, *tmp; + if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier)) + return -EINVAL; + driver = kzalloc(sizeof(*driver), GFP_KERNEL); if (!driver) return -ENOMEM; @@ -1077,8 +1080,20 @@ static void vfio_device_unassign_container(struct vfio_device *device) up_write(&device->group->group_rwsem); } +static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct vfio_device *vfio_device = + container_of(nb, struct vfio_device, iommu_nb); + struct vfio_iommu_type1_dma_unmap *unmap = data; + + vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size); + return NOTIFY_OK; +} + static struct file *vfio_device_open(struct vfio_device *device) { + struct vfio_iommu_driver *iommu_driver; struct file *filep; int ret; @@ -1109,6 +1124,18 @@ static struct file *vfio_device_open(struct vfio_device *device) if (ret) goto err_undo_count; } + + iommu_driver = device->group->container->iommu_driver; + if (device->ops->dma_unmap && iommu_driver && + iommu_driver->ops->register_notifier) { + unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; + + device->iommu_nb.notifier_call = vfio_iommu_notifier; + iommu_driver->ops->register_notifier( + device->group->container->iommu_data, &events, + &device->iommu_nb); + } + up_read(&device->group->group_rwsem); } mutex_unlock(&device->dev_set->lock); @@ -1143,8 +1170,16 @@ static struct file *vfio_device_open(struct vfio_device *device) err_close_device: mutex_lock(&device->dev_set->lock); down_read(&device->group->group_rwsem); - if (device->open_count == 1 && device->ops->close_device) + if (device->open_count == 1 && device->ops->close_device) { device->ops->close_device(device); + + iommu_driver = device->group->container->iommu_driver; + if (device->ops->dma_unmap && iommu_driver && + iommu_driver->ops->unregister_notifier) + iommu_driver->ops->unregister_notifier( + device->group->container->iommu_data, + &device->iommu_nb); + } err_undo_count: device->open_count--; if (device->open_count == 0 && device->kvm) @@ -1339,12 +1374,20 @@ static const struct file_operations vfio_group_fops = { static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; + struct vfio_iommu_driver *iommu_driver; mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); down_read(&device->group->group_rwsem); if (device->open_count == 1 && device->ops->close_device) device->ops->close_device(device); + + iommu_driver = device->group->container->iommu_driver; + if (device->ops->dma_unmap && iommu_driver && + iommu_driver->ops->unregister_notifier) + iommu_driver->ops->unregister_notifier( + device->group->container->iommu_data, + &device->iommu_nb); up_read(&device->group->group_rwsem); device->open_count--; if (device->open_count == 0) @@ -2027,90 +2070,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, } EXPORT_SYMBOL(vfio_dma_rw); -static int vfio_register_iommu_notifier(struct vfio_group *group, - unsigned long *events, - struct notifier_block *nb) -{ - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret; - - lockdep_assert_held_read(&group->group_rwsem); - - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->register_notifier)) - ret = driver->ops->register_notifier(container->iommu_data, - events, nb); - else - ret = -ENOTTY; - - return ret; -} - -static int vfio_unregister_iommu_notifier(struct vfio_group *group, - struct notifier_block *nb) -{ - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret; - - lockdep_assert_held_read(&group->group_rwsem); - - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->unregister_notifier)) - ret = driver->ops->unregister_notifier(container->iommu_data, - nb); - else - ret = -ENOTTY; - - return ret; -} - -int vfio_register_notifier(struct vfio_device *device, - enum vfio_notify_type type, unsigned long *events, - struct notifier_block *nb) -{ - struct vfio_group *group = device->group; - int ret; - - if (!nb || !events || (*events == 0) || - !vfio_assert_device_open(device)) - return -EINVAL; - - switch (type) { - case VFIO_IOMMU_NOTIFY: - ret = vfio_register_iommu_notifier(group, events, nb); - break; - default: - ret = -EINVAL; - } - return ret; -} -EXPORT_SYMBOL(vfio_register_notifier); - -int vfio_unregister_notifier(struct vfio_device *device, - enum vfio_notify_type type, - struct notifier_block *nb) -{ - struct vfio_group *group = device->group; - int ret; - - if (!nb || !vfio_assert_device_open(device)) - return -EINVAL; - - switch (type) { - case VFIO_IOMMU_NOTIFY: - ret = vfio_unregister_iommu_notifier(group, nb); - break; - default: - ret = -EINVAL; - } - return ret; -} -EXPORT_SYMBOL(vfio_unregister_notifier); - /* * Module/class support */ diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index a6713022115155..25da02ca1568fc 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -33,6 +33,9 @@ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, }; +/* events for register_notifier() */ +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) + /** * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 49580fa2073a8d..8c5c389d42d918 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -44,6 +44,7 @@ struct vfio_device { unsigned int open_count; struct completion comp; struct list_head group_next; + struct notifier_block iommu_nb; }; /** @@ -60,6 +61,8 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @dma_unmap: Called when userspace unmaps IOVA from the container + * this device is attached to. * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl * @migration_set_state: Optional callback to change the migration state for * devices that support migration. It's mandatory for @@ -85,6 +88,7 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); struct file *(*migration_set_state)( @@ -154,23 +158,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, size_t len, bool write); -/* each type has independent events */ -enum vfio_notify_type { - VFIO_IOMMU_NOTIFY = 0, -}; - -/* events for VFIO_IOMMU_NOTIFY */ -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) - -int vfio_register_notifier(struct vfio_device *device, - enum vfio_notify_type type, - unsigned long *required_events, - struct notifier_block *nb); -int vfio_unregister_notifier(struct vfio_device *device, - enum vfio_notify_type type, - struct notifier_block *nb); - - /* * Sub-module helpers */