Message ID | 20220517180851.166538-2-mjrosato@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio: remove VFIO_GROUP_NOTIFY_SET_KVM | expand |
On Tue, May 17, 2022 at 02:08:51PM -0400, Matthew Rosato wrote: > Rather than relying on a notifier for associating the KVM with > the group, let's assume that the association has already been > made prior to device_open. The first time a device is opened > associate the group KVM with the device. This also fixes a user triggerable oops in gvt > Suggested-by: Jason Gunthorpe <jgg@nvidia.com> You can add my signed-off-by as well > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index cfcff7764403..c5d421eda275 100644 > +++ b/drivers/vfio/vfio.c > @@ -10,6 +10,7 @@ > * Author: Tom Lyon, pugs@cisco.com > */ > > +#include "linux/kvm_host.h" This is the wrong format of include (editor automation, sigh) > @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device) > > mutex_lock(&device->dev_set->lock); > device->open_count++; > + down_write(&device->group->group_rwsem); Read I suppose > + if (device->open_count == 1 && device->group->kvm) { > + device->kvm = device->group->kvm; > + kvm_get_kvm(device->kvm); > + } > + up_write(&device->group->group_rwsem); Yeah, this is OK, not very elegant though I was looking at this - but it could come later too: diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 8e11d9119418be..c5d8dfe8314108 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -10,7 +10,6 @@ * Author: Tom Lyon, pugs@cisco.com */ -#include "linux/kvm_host.h" #include <linux/cdev.h> #include <linux/compat.h> #include <linux/device.h> @@ -33,6 +32,7 @@ #include <linux/vfio.h> #include <linux/wait.h> #include <linux/sched/signal.h> +#include <linux/kvm_host.h> #include "vfio.h" #define DRIVER_VERSION "0.3" @@ -1059,93 +1059,71 @@ static int vfio_device_assign_container(struct vfio_device *device) static void vfio_device_unassign_container(struct vfio_device *device) { - down_write(&device->group->group_rwsem); + lockdep_assert_held(&device->group->group_rwsem); + WARN_ON(device->group->container_users <= 1); device->group->container_users--; fput(device->group->opened_file); - up_write(&device->group->group_rwsem); } -static struct file *vfio_device_open(struct vfio_device *device) +static int vfio_device_open(struct vfio_device *device) { - struct file *filep; int ret; + lockdep_assert_held(&device->dev_set->lock); + + if (!try_module_get(device->dev->driver->owner)) + return -ENODEV; + down_write(&device->group->group_rwsem); ret = vfio_device_assign_container(device); - if (ret) { - up_write(&device->group->group_rwsem); - return ERR_PTR(ret); - } + if (ret) + goto err_unlock; if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM) { - if (!device->group->kvm) { - up_write(&device->group->group_rwsem); + if (!device->group->kvm) goto err_unassign_container; - } device->kvm = device->group->kvm; kvm_get_kvm(device->kvm); } up_write(&device->group->group_rwsem); - if (!try_module_get(device->dev->driver->owner)) { - ret = -ENODEV; - goto err_put_kvm; - } - - mutex_lock(&device->dev_set->lock); - device->open_count++; - if (device->open_count == 1 && device->ops->open_device) { + if (device->ops->open_device) { ret = device->ops->open_device(device); if (ret) - goto err_undo_count; + goto err_put_kvm; } - mutex_unlock(&device->dev_set->lock); + return 0; - /* - * We can't use anon_inode_getfd() because we need to modify - * the f_mode flags directly to allow more than just ioctls - */ - filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, - device, O_RDWR); - if (IS_ERR(filep)) { - ret = PTR_ERR(filep); - goto err_close_device; +err_put_kvm: + if (device->kvm) { + kvm_put_kvm(device->kvm); + device->kvm = NULL; } + down_write(&device->group->group_rwsem); +err_unassign_container: + vfio_device_unassign_container(device); +err_unlock: + up_write(&device->group->group_rwsem); + module_put(device->dev->driver->owner); + return ret; +} - /* - * TODO: add an anon_inode interface to do this. - * Appears to be missing by lack of need rather than - * explicitly prevented. Now there's need. - */ - filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); - - if (device->group->type == VFIO_NO_IOMMU) - dev_warn(device->dev, "vfio-noiommu device opened by user " - "(%s:%d)\n", current->comm, task_pid_nr(current)); - /* - * On success the ref of device is moved to the file and - * put in vfio_device_fops_release() - */ - return filep; +static void vfio_device_close(struct vfio_device *device) +{ + lockdep_assert_held(&device->dev_set->lock); -err_close_device: - mutex_lock(&device->dev_set->lock); - if (device->open_count == 1 && device->ops->close_device) + if (device->ops->close_device) device->ops->close_device(device); -err_undo_count: - device->open_count--; - mutex_unlock(&device->dev_set->lock); - module_put(device->dev->driver->owner); -err_put_kvm: if (device->kvm) { kvm_put_kvm(device->kvm); device->kvm = NULL; } -err_unassign_container: + down_write(&device->group->group_rwsem); vfio_device_unassign_container(device); - return ERR_PTR(ret); + up_write(&device->group->group_rwsem); + module_put(device->dev->driver->owner); } static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) @@ -1159,23 +1137,61 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) if (IS_ERR(device)) return PTR_ERR(device); - fdno = get_unused_fd_flags(O_CLOEXEC); - if (fdno < 0) { - ret = fdno; - goto err_put_device; + mutex_lock(&device->dev_set->lock); + device->open_count++; + if (device->open_count == 1) { + ret = vfio_device_open(device); + if (ret) { + device->open_count--; + mutex_unlock(&device->dev_set->lock); + goto err_put_device; + } } + mutex_unlock(&device->dev_set->lock); - filep = vfio_device_open(device); + /* + * We can't use anon_inode_getfd() because we need to modify + * the f_mode flags directly to allow more than just ioctls + */ + filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, device, + O_RDWR); if (IS_ERR(filep)) { ret = PTR_ERR(filep); - goto err_put_fdno; + goto err_close_device; + } + + /* + * TODO: add an anon_inode interface to do this. + * Appears to be missing by lack of need rather than + * explicitly prevented. Now there's need. + */ + filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) { + ret = fdno; + goto err_put_file; } + if (device->group->type == VFIO_NO_IOMMU) + dev_warn(device->dev, "vfio-noiommu device opened by user " + "(%s:%d)\n", current->comm, task_pid_nr(current)); + + /* + * On success the ref of device is moved to the file and put in + * vfio_device_fops_release(). + */ fd_install(fdno, filep); return fdno; -err_put_fdno: - put_unused_fd(fdno); +err_put_file: + fput(filep); +err_close_device: + mutex_lock(&device->dev_set->lock); + if (device->open_count == 1) + vfio_device_close(device); + device->open_count--; + mutex_unlock(&device->dev_set->lock); err_put_device: vfio_device_put(device); return ret; @@ -1333,19 +1349,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); - if (device->open_count == 1 && device->ops->close_device) - device->ops->close_device(device); + if (device->open_count == 1) + vfio_device_close(device); device->open_count--; mutex_unlock(&device->dev_set->lock); - module_put(device->dev->driver->owner); - - if (device->kvm) { - kvm_put_kvm(device->kvm); - device->kvm = NULL; - } - vfio_device_unassign_container(device); - vfio_device_put(device); return 0;
> if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM) > { Nit: this is not the normal brace placement. But what is you diff against anyway? The one Matthew sent did away with the VFIO_DEVICE_NEEDS_KVM flags, which does the wrong thing for zpci, so it can't be that.. Also if we want to do major code movement, it really needs to go into a separate patch or patches, as the combinations of all these moves with actual code changes is almost unreadable.
With this the release_work in gvt can go away as well: diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 633acfcf76bf2..aee1a45da74bc 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -227,7 +227,6 @@ struct intel_vgpu { struct mutex cache_lock; struct notifier_block iommu_notifier; - struct work_struct release_work; atomic_t released; struct kvm_page_track_notifier_node track_node; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index b317ae4cc7d2d..917617d7599a9 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) } } -static void intel_vgpu_release_work(struct work_struct *work); - static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { @@ -850,8 +848,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) } } -static void __intel_vgpu_release(struct intel_vgpu *vgpu) +static void intel_vgpu_close_device(struct vfio_device *vfio_dev) { + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); struct drm_i915_private *i915 = vgpu->gvt->gt->i915; int ret; @@ -880,19 +879,6 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) vgpu->attached = false; } -static void intel_vgpu_close_device(struct vfio_device *vfio_dev) -{ - __intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev)); -} - -static void intel_vgpu_release_work(struct work_struct *work) -{ - struct intel_vgpu *vgpu = - container_of(work, struct intel_vgpu, release_work); - - __intel_vgpu_release(vgpu); -} - static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) { u32 start_lo, start_hi; @@ -1639,7 +1625,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev) return PTR_ERR(vgpu); } - INIT_WORK(&vgpu->release_work, intel_vgpu_release_work); vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev, &intel_vgpu_dev_ops);
On Wed, May 18, 2022 at 12:39:41AM -0700, Christoph Hellwig wrote: > > if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM) > > { > > Nit: this is not the normal brace placement. > > But what is you diff against anyway? The one Matthew sent did away > with the VFIO_DEVICE_NEEDS_KVM flags, which does the wrong thing for > zpci, so it can't be that.. Against what I sent before, I did this before Matthew sent his > Also if we want to do major code movement, it really needs to go into > a separate patch or patches, as the combinations of all these moves > with actual code changes is almost unreadable. Sure, just checking how things could look at this point Matthew's version first followed by the code motion is probably a good approach. Jason
On 5/17/22 2:08 PM, Matthew Rosato wrote: > Rather than relying on a notifier for associating the KVM with > the group, let's assume that the association has already been > made prior to device_open. The first time a device is opened > associate the group KVM with the device. > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com> > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> ... > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index cfcff7764403..c5d421eda275 100644 > --- a/drivers/vfio/vfio.c > +++ b/drivers/vfio/vfio.c > @@ -10,6 +10,7 @@ > * Author: Tom Lyon, pugs@cisco.com > */ > > +#include "linux/kvm_host.h" > #include <linux/cdev.h> > #include <linux/compat.h> > #include <linux/device.h> > @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device) > > mutex_lock(&device->dev_set->lock); > device->open_count++; > + down_write(&device->group->group_rwsem); > + if (device->open_count == 1 && device->group->kvm) { > + device->kvm = device->group->kvm; > + kvm_get_kvm(device->kvm); Did some more compile testing, since vfio has no hard kvm dependency, kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is built-in...
On Wed, May 18, 2022 at 10:37:48AM -0400, Matthew Rosato wrote: > On 5/17/22 2:08 PM, Matthew Rosato wrote: > > Rather than relying on a notifier for associating the KVM with > > the group, let's assume that the association has already been > > made prior to device_open. The first time a device is opened > > associate the group KVM with the device. > > > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com> > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > > ... > > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > > index cfcff7764403..c5d421eda275 100644 > > +++ b/drivers/vfio/vfio.c > > @@ -10,6 +10,7 @@ > > * Author: Tom Lyon, pugs@cisco.com > > */ > > +#include "linux/kvm_host.h" > > #include <linux/cdev.h> > > #include <linux/compat.h> > > #include <linux/device.h> > > @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device) > > mutex_lock(&device->dev_set->lock); > > device->open_count++; > > + down_write(&device->group->group_rwsem); > > + if (device->open_count == 1 && device->group->kvm) { > > + device->kvm = device->group->kvm; > > + kvm_get_kvm(device->kvm); > > Did some more compile testing, since vfio has no hard kvm dependency, > kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is > built-in... Ugh, my other plan was to have the driver itself capture the kvm, ie we lock the group_rwsem to keep the group->kvm valid and then pass the kvm to open_device in some way, then the driver can kvm_get_kvm() it Alternatively, I don't know why kvm_get_kvm() is an exported symbol when it is just calling refcount_inc() - inlining it would be an improvement I think. Jason
On 5/18/22 11:12 AM, Jason Gunthorpe wrote: > On Wed, May 18, 2022 at 10:37:48AM -0400, Matthew Rosato wrote: >> On 5/17/22 2:08 PM, Matthew Rosato wrote: >>> Rather than relying on a notifier for associating the KVM with >>> the group, let's assume that the association has already been >>> made prior to device_open. The first time a device is opened >>> associate the group KVM with the device. >>> >>> Suggested-by: Jason Gunthorpe <jgg@nvidia.com> >>> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> >> >> ... >> >>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c >>> index cfcff7764403..c5d421eda275 100644 >>> +++ b/drivers/vfio/vfio.c >>> @@ -10,6 +10,7 @@ >>> * Author: Tom Lyon, pugs@cisco.com >>> */ >>> +#include "linux/kvm_host.h" >>> #include <linux/cdev.h> >>> #include <linux/compat.h> >>> #include <linux/device.h> >>> @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device) >>> mutex_lock(&device->dev_set->lock); >>> device->open_count++; >>> + down_write(&device->group->group_rwsem); >>> + if (device->open_count == 1 && device->group->kvm) { >>> + device->kvm = device->group->kvm; >>> + kvm_get_kvm(device->kvm); >> >> Did some more compile testing, since vfio has no hard kvm dependency, >> kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is >> built-in... > > Ugh, my other plan was to have the driver itself capture the kvm, ie > we lock the group_rwsem to keep the group->kvm valid and then pass the > kvm to open_device in some way, then the driver can kvm_get_kvm() it > Hrm... If we did that we would have to re-evaluate some other usage of the rwsem e.g. if driver open_device calls vfio_register_iommu_notifier it will try to get the rwsem but it's already locked. > Alternatively, I don't know why kvm_get_kvm() is an exported symbol > when it is just calling refcount_inc() - inlining it would be an > improvement I think. > I think that would work for kvm_get_kvm, but kvm_put_kvm (which we also need) calls kvm_destroy_kvm after the refcount_dec and that can't be inlined
On Wed, May 18, 2022 at 11:33:37AM -0400, Matthew Rosato wrote: > On 5/18/22 11:12 AM, Jason Gunthorpe wrote: > > On Wed, May 18, 2022 at 10:37:48AM -0400, Matthew Rosato wrote: > > > On 5/17/22 2:08 PM, Matthew Rosato wrote: > > > > Rather than relying on a notifier for associating the KVM with > > > > the group, let's assume that the association has already been > > > > made prior to device_open. The first time a device is opened > > > > associate the group KVM with the device. > > > > > > > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com> > > > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > > > > > > ... > > > > > > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > > > > index cfcff7764403..c5d421eda275 100644 > > > > +++ b/drivers/vfio/vfio.c > > > > @@ -10,6 +10,7 @@ > > > > * Author: Tom Lyon, pugs@cisco.com > > > > */ > > > > +#include "linux/kvm_host.h" > > > > #include <linux/cdev.h> > > > > #include <linux/compat.h> > > > > #include <linux/device.h> > > > > @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device) > > > > mutex_lock(&device->dev_set->lock); > > > > device->open_count++; > > > > + down_write(&device->group->group_rwsem); > > > > + if (device->open_count == 1 && device->group->kvm) { > > > > + device->kvm = device->group->kvm; > > > > + kvm_get_kvm(device->kvm); > > > > > > Did some more compile testing, since vfio has no hard kvm dependency, > > > kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is > > > built-in... > > > > Ugh, my other plan was to have the driver itself capture the kvm, ie > > we lock the group_rwsem to keep the group->kvm valid and then pass the > > kvm to open_device in some way, then the driver can kvm_get_kvm() it > > > > Hrm... If we did that we would have to re-evaluate some other usage of the > rwsem e.g. if driver open_device calls vfio_register_iommu_notifier it will > try to get the rwsem but it's already locked. Ugh, yes, it means removing the other notifier callback too, which I was expecting to do as well Maybe we could split the lock for just this patch though. > > Alternatively, I don't know why kvm_get_kvm() is an exported symbol > > when it is just calling refcount_inc() - inlining it would be an > > improvement I think. > > I think that would work for kvm_get_kvm, but kvm_put_kvm (which we also > need) calls kvm_destroy_kvm after the refcount_dec and that can't be inlined Indeed. Jason
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 2af4c83e733c..633acfcf76bf 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -227,8 +227,6 @@ struct intel_vgpu { struct mutex cache_lock; struct notifier_block iommu_notifier; - struct notifier_block group_notifier; - struct kvm *kvm; struct work_struct release_work; atomic_t released; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 7655ffa97d51..b317ae4cc7d2 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -761,23 +761,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static int intel_vgpu_group_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct intel_vgpu *vgpu = - container_of(nb, struct intel_vgpu, group_notifier); - - /* the only action we care about */ - if (action == VFIO_GROUP_NOTIFY_SET_KVM) { - vgpu->kvm = data; - - if (!data) - schedule_work(&vgpu->release_work); - } - - return NOTIFY_OK; -} - static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) { struct intel_vgpu *itr; @@ -789,7 +772,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) if (!itr->attached) continue; - if (vgpu->kvm == itr->kvm) { + if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) { ret = true; goto out; } @@ -806,7 +789,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) int ret; vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; - vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier; events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events, @@ -817,38 +799,30 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) goto out; } - events = VFIO_GROUP_NOTIFY_SET_KVM; - ret = vfio_register_notifier(vfio_dev, VFIO_GROUP_NOTIFY, &events, - &vgpu->group_notifier); - if (ret != 0) { - gvt_vgpu_err("vfio_register_notifier for group failed: %d\n", - ret); - goto undo_iommu; - } - ret = -EEXIST; if (vgpu->attached) - goto undo_register; + goto undo_iommu; ret = -ESRCH; - if (!vgpu->kvm || vgpu->kvm->mm != current->mm) { + if (!vgpu->vfio_device.kvm || + vgpu->vfio_device.kvm->mm != current->mm) { gvt_vgpu_err("KVM is required to use Intel vGPU\n"); - goto undo_register; + goto undo_iommu; } ret = -EEXIST; if (__kvmgt_vgpu_exist(vgpu)) - goto undo_register; + goto undo_iommu; vgpu->attached = true; - kvm_get_kvm(vgpu->kvm); kvmgt_protect_table_init(vgpu); gvt_cache_init(vgpu); vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; - kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node); + kvm_page_track_register_notifier(vgpu->vfio_device.kvm, + &vgpu->track_node); debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, &vgpu->nr_cache_entries); @@ -858,10 +832,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) atomic_set(&vgpu->released, 0); return 0; -undo_register: - vfio_unregister_notifier(vfio_dev, VFIO_GROUP_NOTIFY, - &vgpu->group_notifier); - undo_iommu: vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &vgpu->iommu_notifier); @@ -898,21 +868,15 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) drm_WARN(&i915->drm, ret, "vfio_unregister_notifier for iommu failed: %d\n", ret); - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_GROUP_NOTIFY, - &vgpu->group_notifier); - drm_WARN(&i915->drm, ret, - "vfio_unregister_notifier for group failed: %d\n", ret); - debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); - kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node); - kvm_put_kvm(vgpu->kvm); + kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, + &vgpu->track_node); kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); intel_vgpu_release_msi_eventfd_ctx(vgpu); - vgpu->kvm = NULL; vgpu->attached = false; } @@ -1713,7 +1677,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = { int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) { - struct kvm *kvm = info->kvm; + struct kvm *kvm = info->vfio_device.kvm; struct kvm_memory_slot *slot; int idx; @@ -1743,7 +1707,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) { - struct kvm *kvm = info->kvm; + struct kvm *kvm = info->vfio_device.kvm; struct kvm_memory_slot *slot; int idx; diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index e8914024f5b1..43c68afe7e87 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) } } -static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - int notify_rc = NOTIFY_OK; - struct ap_matrix_mdev *matrix_mdev; - - if (action != VFIO_GROUP_NOTIFY_SET_KVM) - return NOTIFY_OK; - - matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier); - - if (!data) - vfio_ap_mdev_unset_kvm(matrix_mdev); - else if (vfio_ap_mdev_set_kvm(matrix_mdev, data)) - notify_rc = NOTIFY_DONE; - - return notify_rc; -} - static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) { struct device *dev; @@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) unsigned long events; int ret; - matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier; - events = VFIO_GROUP_NOTIFY_SET_KVM; + if (!vdev->kvm) + return -EPERM; - ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events, - &matrix_mdev->group_notifier); + ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); if (ret) return ret; @@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events, &matrix_mdev->iommu_notifier); if (ret) - goto out_unregister_group; + goto err_kvm; return 0; -out_unregister_group: - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY, - &matrix_mdev->group_notifier); +err_kvm: + vfio_ap_mdev_unset_kvm(matrix_mdev); return ret; } @@ -1431,8 +1410,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev) vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &matrix_mdev->iommu_notifier); - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY, - &matrix_mdev->group_notifier); vfio_ap_mdev_unset_kvm(matrix_mdev); } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index 648fcaf8104a..a26efd804d0d 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -81,8 +81,6 @@ struct ap_matrix { * @node: allows the ap_matrix_mdev struct to be added to a list * @matrix: the adapters, usage domains and control domains assigned to the * mediated matrix device. - * @group_notifier: notifier block used for specifying callback function for - * handling the VFIO_GROUP_NOTIFY_SET_KVM event * @iommu_notifier: notifier block used for specifying callback function for * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even * @kvm: the struct holding guest's state @@ -94,7 +92,6 @@ struct ap_matrix_mdev { struct vfio_device vdev; struct list_head node; struct ap_matrix matrix; - struct notifier_block group_notifier; struct notifier_block iommu_notifier; struct kvm *kvm; crypto_hook pqap_hook; diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index cfcff7764403..c5d421eda275 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -10,6 +10,7 @@ * Author: Tom Lyon, pugs@cisco.com */ +#include "linux/kvm_host.h" #include <linux/cdev.h> #include <linux/compat.h> #include <linux/device.h> @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device) mutex_lock(&device->dev_set->lock); device->open_count++; + down_write(&device->group->group_rwsem); + if (device->open_count == 1 && device->group->kvm) { + device->kvm = device->group->kvm; + kvm_get_kvm(device->kvm); + } + up_write(&device->group->group_rwsem); + if (device->open_count == 1 && device->ops->open_device) { ret = device->ops->open_device(device); if (ret) @@ -1123,6 +1131,12 @@ static struct file *vfio_device_open(struct vfio_device *device) device->ops->close_device(device); err_undo_count: device->open_count--; + down_write(&device->group->group_rwsem); + if (device->open_count == 0 && device->kvm) { + kvm_put_kvm(device->kvm); + device->kvm = NULL; + } + up_write(&device->group->group_rwsem); mutex_unlock(&device->dev_set->lock); module_put(device->dev->driver->owner); err_unassign_container: @@ -1318,6 +1332,12 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) if (device->open_count == 1 && device->ops->close_device) device->ops->close_device(device); device->open_count--; + down_write(&device->group->group_rwsem); + if (device->open_count == 0 && device->kvm) { + kvm_put_kvm(device->kvm); + device->kvm = NULL; + } + up_write(&device->group->group_rwsem); mutex_unlock(&device->dev_set->lock); module_put(device->dev->driver->owner); @@ -1726,8 +1746,8 @@ EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); * @file: VFIO group file * @kvm: KVM to link * - * The kvm pointer will be forwarded to all the vfio_device's attached to the - * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier. + * When a VFIO device is first opened the KVM will be available in + * device->kvm if one was associated with the group. */ void vfio_file_set_kvm(struct file *file, struct kvm *kvm) { @@ -1738,8 +1758,6 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm) down_write(&group->group_rwsem); group->kvm = kvm; - blocking_notifier_call_chain(&group->notifier, - VFIO_GROUP_NOTIFY_SET_KVM, kvm); up_write(&group->group_rwsem); } EXPORT_SYMBOL_GPL(vfio_file_set_kvm); @@ -2039,42 +2057,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group, return ret; } -static int vfio_register_group_notifier(struct vfio_group *group, - unsigned long *events, - struct notifier_block *nb) -{ - int ret; - bool set_kvm = false; - - if (*events & VFIO_GROUP_NOTIFY_SET_KVM) - set_kvm = true; - - /* clear known events */ - *events &= ~VFIO_GROUP_NOTIFY_SET_KVM; - - /* refuse to continue if still events remaining */ - if (*events) - return -EINVAL; - - ret = blocking_notifier_chain_register(&group->notifier, nb); - if (ret) - return ret; - - /* - * The attaching of kvm and vfio_group might already happen, so - * here we replay once upon registration. - */ - if (set_kvm) { - down_read(&group->group_rwsem); - if (group->kvm) - blocking_notifier_call_chain(&group->notifier, - VFIO_GROUP_NOTIFY_SET_KVM, - group->kvm); - up_read(&group->group_rwsem); - } - return 0; -} - int vfio_register_notifier(struct vfio_device *device, enum vfio_notify_type type, unsigned long *events, struct notifier_block *nb) @@ -2090,9 +2072,6 @@ int vfio_register_notifier(struct vfio_device *device, case VFIO_IOMMU_NOTIFY: ret = vfio_register_iommu_notifier(group, events, nb); break; - case VFIO_GROUP_NOTIFY: - ret = vfio_register_group_notifier(group, events, nb); - break; default: ret = -EINVAL; } @@ -2114,9 +2093,6 @@ int vfio_unregister_notifier(struct vfio_device *device, case VFIO_IOMMU_NOTIFY: ret = vfio_unregister_iommu_notifier(group, nb); break; - case VFIO_GROUP_NOTIFY: - ret = blocking_notifier_chain_unregister(&group->notifier, nb); - break; default: ret = -EINVAL; } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 45b287826ce6..5f691453e3fb 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -36,6 +36,7 @@ struct vfio_device { struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; + struct kvm *kvm; /* Members below here are private, not for driver use */ refcount_t refcount; @@ -155,15 +156,11 @@ extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, /* each type has independent events */ enum vfio_notify_type { VFIO_IOMMU_NOTIFY = 0, - VFIO_GROUP_NOTIFY = 1, }; /* events for VFIO_IOMMU_NOTIFY */ #define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) -/* events for VFIO_GROUP_NOTIFY */ -#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0) - extern int vfio_register_notifier(struct vfio_device *device, enum vfio_notify_type type, unsigned long *required_events,
Rather than relying on a notifier for associating the KVM with the group, let's assume that the association has already been made prior to device_open. The first time a device is opened associate the group KVM with the device. Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> --- drivers/gpu/drm/i915/gvt/gvt.h | 2 - drivers/gpu/drm/i915/gvt/kvmgt.c | 60 +++++------------------ drivers/s390/crypto/vfio_ap_ops.c | 35 +++----------- drivers/s390/crypto/vfio_ap_private.h | 3 -- drivers/vfio/vfio.c | 68 +++++++++------------------ include/linux/vfio.h | 5 +- 6 files changed, 41 insertions(+), 132 deletions(-)