diff mbox series

[1/1] vfio: remove VFIO_GROUP_NOTIFY_SET_KVM

Message ID 20220517180851.166538-2-mjrosato@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series vfio: remove VFIO_GROUP_NOTIFY_SET_KVM | expand

Commit Message

Matthew Rosato May 17, 2022, 6:08 p.m. UTC
Rather than relying on a notifier for associating the KVM with
the group, let's assume that the association has already been
made prior to device_open.  The first time a device is opened
associate the group KVM with the device.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h        |  2 -
 drivers/gpu/drm/i915/gvt/kvmgt.c      | 60 +++++------------------
 drivers/s390/crypto/vfio_ap_ops.c     | 35 +++-----------
 drivers/s390/crypto/vfio_ap_private.h |  3 --
 drivers/vfio/vfio.c                   | 68 +++++++++------------------
 include/linux/vfio.h                  |  5 +-
 6 files changed, 41 insertions(+), 132 deletions(-)

Comments

Jason Gunthorpe May 17, 2022, 6:56 p.m. UTC | #1
On Tue, May 17, 2022 at 02:08:51PM -0400, Matthew Rosato wrote:
> Rather than relying on a notifier for associating the KVM with
> the group, let's assume that the association has already been
> made prior to device_open.  The first time a device is opened
> associate the group KVM with the device.

This also fixes a user triggerable oops in gvt
 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>

You can add my signed-off-by as well

> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index cfcff7764403..c5d421eda275 100644
> +++ b/drivers/vfio/vfio.c
> @@ -10,6 +10,7 @@
>   * Author: Tom Lyon, pugs@cisco.com
>   */
>  
> +#include "linux/kvm_host.h"

This is the wrong format of include (editor automation, sigh)

> @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
>  
>  	mutex_lock(&device->dev_set->lock);
>  	device->open_count++;
> +	down_write(&device->group->group_rwsem);

Read I suppose

> +	if (device->open_count == 1 && device->group->kvm) {
> +		device->kvm = device->group->kvm;
> +		kvm_get_kvm(device->kvm);
> +	}
> +	up_write(&device->group->group_rwsem);

Yeah, this is OK, not very elegant though

I was looking at this - but it could come later too:

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 8e11d9119418be..c5d8dfe8314108 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -10,7 +10,6 @@
  * Author: Tom Lyon, pugs@cisco.com
  */
 
-#include "linux/kvm_host.h"
 #include <linux/cdev.h>
 #include <linux/compat.h>
 #include <linux/device.h>
@@ -33,6 +32,7 @@
 #include <linux/vfio.h>
 #include <linux/wait.h>
 #include <linux/sched/signal.h>
+#include <linux/kvm_host.h>
 #include "vfio.h"
 
 #define DRIVER_VERSION	"0.3"
@@ -1059,93 +1059,71 @@ static int vfio_device_assign_container(struct vfio_device *device)
 
 static void vfio_device_unassign_container(struct vfio_device *device)
 {
-	down_write(&device->group->group_rwsem);
+	lockdep_assert_held(&device->group->group_rwsem);
+
 	WARN_ON(device->group->container_users <= 1);
 	device->group->container_users--;
 	fput(device->group->opened_file);
-	up_write(&device->group->group_rwsem);
 }
 
-static struct file *vfio_device_open(struct vfio_device *device)
+static int vfio_device_open(struct vfio_device *device)
 {
-	struct file *filep;
 	int ret;
 
+	lockdep_assert_held(&device->dev_set->lock);
+
+	if (!try_module_get(device->dev->driver->owner))
+		return -ENODEV;
+
 	down_write(&device->group->group_rwsem);
 	ret = vfio_device_assign_container(device);
-	if (ret) {
-		up_write(&device->group->group_rwsem);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto err_unlock;
 
 	if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM)
 	{
-		if (!device->group->kvm) {
-			up_write(&device->group->group_rwsem);
+		if (!device->group->kvm)
 			goto err_unassign_container;
-		}
 		device->kvm = device->group->kvm;
 		kvm_get_kvm(device->kvm);
 	}
 	up_write(&device->group->group_rwsem);
 
-	if (!try_module_get(device->dev->driver->owner)) {
-		ret = -ENODEV;
-		goto err_put_kvm;
-	}
-
-	mutex_lock(&device->dev_set->lock);
-	device->open_count++;
-	if (device->open_count == 1 && device->ops->open_device) {
+	if (device->ops->open_device) {
 		ret = device->ops->open_device(device);
 		if (ret)
-			goto err_undo_count;
+			goto err_put_kvm;
 	}
-	mutex_unlock(&device->dev_set->lock);
+	return 0;
 
-	/*
-	 * We can't use anon_inode_getfd() because we need to modify
-	 * the f_mode flags directly to allow more than just ioctls
-	 */
-	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
-				   device, O_RDWR);
-	if (IS_ERR(filep)) {
-		ret = PTR_ERR(filep);
-		goto err_close_device;
+err_put_kvm:
+	if (device->kvm) {
+		kvm_put_kvm(device->kvm);
+		device->kvm = NULL;
 	}
+	down_write(&device->group->group_rwsem);
+err_unassign_container:
+	vfio_device_unassign_container(device);
+err_unlock:
+	up_write(&device->group->group_rwsem);
+	module_put(device->dev->driver->owner);
+	return ret;
+}
 
-	/*
-	 * TODO: add an anon_inode interface to do this.
-	 * Appears to be missing by lack of need rather than
-	 * explicitly prevented.  Now there's need.
-	 */
-	filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
-
-	if (device->group->type == VFIO_NO_IOMMU)
-		dev_warn(device->dev, "vfio-noiommu device opened by user "
-			 "(%s:%d)\n", current->comm, task_pid_nr(current));
-	/*
-	 * On success the ref of device is moved to the file and
-	 * put in vfio_device_fops_release()
-	 */
-	return filep;
+static void vfio_device_close(struct vfio_device *device)
+{
+	lockdep_assert_held(&device->dev_set->lock);
 
-err_close_device:
-	mutex_lock(&device->dev_set->lock);
-	if (device->open_count == 1 && device->ops->close_device)
+	if (device->ops->close_device)
 		device->ops->close_device(device);
-err_undo_count:
-	device->open_count--;
-	mutex_unlock(&device->dev_set->lock);
-	module_put(device->dev->driver->owner);
-err_put_kvm:
 	if (device->kvm) {
 		kvm_put_kvm(device->kvm);
 		device->kvm = NULL;
 	}
-err_unassign_container:
+	down_write(&device->group->group_rwsem);
 	vfio_device_unassign_container(device);
-	return ERR_PTR(ret);
+	up_write(&device->group->group_rwsem);
+	module_put(device->dev->driver->owner);
 }
 
 static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
@@ -1159,23 +1137,61 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 	if (IS_ERR(device))
 		return PTR_ERR(device);
 
-	fdno = get_unused_fd_flags(O_CLOEXEC);
-	if (fdno < 0) {
-		ret = fdno;
-		goto err_put_device;
+	mutex_lock(&device->dev_set->lock);
+	device->open_count++;
+	if (device->open_count == 1) {
+		ret = vfio_device_open(device);
+		if (ret) {
+			device->open_count--;
+			mutex_unlock(&device->dev_set->lock);
+			goto err_put_device;
+		}
 	}
+	mutex_unlock(&device->dev_set->lock);
 
-	filep = vfio_device_open(device);
+	/*
+	 * We can't use anon_inode_getfd() because we need to modify
+	 * the f_mode flags directly to allow more than just ioctls
+	 */
+	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, device,
+				   O_RDWR);
 	if (IS_ERR(filep)) {
 		ret = PTR_ERR(filep);
-		goto err_put_fdno;
+		goto err_close_device;
+	}
+
+	/*
+	 * TODO: add an anon_inode interface to do this.
+	 * Appears to be missing by lack of need rather than
+	 * explicitly prevented.  Now there's need.
+	 */
+	filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
+
+	fdno = get_unused_fd_flags(O_CLOEXEC);
+	if (fdno < 0) {
+		ret = fdno;
+		goto err_put_file;
 	}
 
+	if (device->group->type == VFIO_NO_IOMMU)
+		dev_warn(device->dev, "vfio-noiommu device opened by user "
+			 "(%s:%d)\n", current->comm, task_pid_nr(current));
+
+	/*
+	 * On success the ref of device is moved to the file and put in
+	 * vfio_device_fops_release().
+	 */
 	fd_install(fdno, filep);
 	return fdno;
 
-err_put_fdno:
-	put_unused_fd(fdno);
+err_put_file:
+	fput(filep);
+err_close_device:
+	mutex_lock(&device->dev_set->lock);
+	if (device->open_count == 1)
+		vfio_device_close(device);
+	device->open_count--;
+	mutex_unlock(&device->dev_set->lock);
 err_put_device:
 	vfio_device_put(device);
 	return ret;
@@ -1333,19 +1349,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 
 	mutex_lock(&device->dev_set->lock);
 	vfio_assert_device_open(device);
-	if (device->open_count == 1 && device->ops->close_device)
-		device->ops->close_device(device);
+	if (device->open_count == 1)
+		vfio_device_close(device);
 	device->open_count--;
 	mutex_unlock(&device->dev_set->lock);
 
-	module_put(device->dev->driver->owner);
-
-	if (device->kvm) {
-		kvm_put_kvm(device->kvm);
-		device->kvm = NULL;
-	}
-	vfio_device_unassign_container(device);
-
 	vfio_device_put(device);
 
 	return 0;
Christoph Hellwig May 18, 2022, 7:39 a.m. UTC | #2
>  	if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM)
>  	{

Nit: this is not the normal brace placement.

But what is you diff against anyway?  The one Matthew sent did away
with the VFIO_DEVICE_NEEDS_KVM flags, which does the wrong thing for
zpci, so it can't be that..

Also if we want to do major code movement, it really needs to go into
a separate patch or patches, as the combinations of all these moves
with actual code changes is almost unreadable.
Christoph Hellwig May 18, 2022, 7:47 a.m. UTC | #3
With this the release_work in gvt can go away as well:

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 633acfcf76bf2..aee1a45da74bc 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -227,7 +227,6 @@ struct intel_vgpu {
 	struct mutex cache_lock;
 
 	struct notifier_block iommu_notifier;
-	struct work_struct release_work;
 	atomic_t released;
 
 	struct kvm_page_track_notifier_node track_node;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index b317ae4cc7d2d..917617d7599a9 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
 	}
 }
 
-static void intel_vgpu_release_work(struct work_struct *work);
-
 static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
 		unsigned long size)
 {
@@ -850,8 +848,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
 	}
 }
 
-static void __intel_vgpu_release(struct intel_vgpu *vgpu)
+static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
 {
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
 	int ret;
 
@@ -880,19 +879,6 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
 	vgpu->attached = false;
 }
 
-static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
-{
-	__intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
-}
-
-static void intel_vgpu_release_work(struct work_struct *work)
-{
-	struct intel_vgpu *vgpu =
-		container_of(work, struct intel_vgpu, release_work);
-
-	__intel_vgpu_release(vgpu);
-}
-
 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
 {
 	u32 start_lo, start_hi;
@@ -1639,7 +1625,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
 		return PTR_ERR(vgpu);
 	}
 
-	INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
 	vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
 			    &intel_vgpu_dev_ops);
Jason Gunthorpe May 18, 2022, 11:58 a.m. UTC | #4
On Wed, May 18, 2022 at 12:39:41AM -0700, Christoph Hellwig wrote:
> >  	if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM)
> >  	{
> 
> Nit: this is not the normal brace placement.
> 
> But what is you diff against anyway?  The one Matthew sent did away
> with the VFIO_DEVICE_NEEDS_KVM flags, which does the wrong thing for
> zpci, so it can't be that..

Against what I sent before, I did this before Matthew sent his

> Also if we want to do major code movement, it really needs to go into
> a separate patch or patches, as the combinations of all these moves
> with actual code changes is almost unreadable.

Sure, just checking how things could look at this point

Matthew's version first followed by the code motion is probably a good
approach.

Jason
Matthew Rosato May 18, 2022, 2:37 p.m. UTC | #5
On 5/17/22 2:08 PM, Matthew Rosato wrote:
> Rather than relying on a notifier for associating the KVM with
> the group, let's assume that the association has already been
> made prior to device_open.  The first time a device is opened
> associate the group KVM with the device.
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>

...

> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index cfcff7764403..c5d421eda275 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -10,6 +10,7 @@
>    * Author: Tom Lyon, pugs@cisco.com
>    */
>   
> +#include "linux/kvm_host.h"
>   #include <linux/cdev.h>
>   #include <linux/compat.h>
>   #include <linux/device.h>
> @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
>   
>   	mutex_lock(&device->dev_set->lock);
>   	device->open_count++;
> +	down_write(&device->group->group_rwsem);
> +	if (device->open_count == 1 && device->group->kvm) {
> +		device->kvm = device->group->kvm;
> +		kvm_get_kvm(device->kvm);

Did some more compile testing, since vfio has no hard kvm dependency, 
kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio 
is built-in...
Jason Gunthorpe May 18, 2022, 3:12 p.m. UTC | #6
On Wed, May 18, 2022 at 10:37:48AM -0400, Matthew Rosato wrote:
> On 5/17/22 2:08 PM, Matthew Rosato wrote:
> > Rather than relying on a notifier for associating the KVM with
> > the group, let's assume that the association has already been
> > made prior to device_open.  The first time a device is opened
> > associate the group KVM with the device.
> > 
> > Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> 
> ...
> 
> > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> > index cfcff7764403..c5d421eda275 100644
> > +++ b/drivers/vfio/vfio.c
> > @@ -10,6 +10,7 @@
> >    * Author: Tom Lyon, pugs@cisco.com
> >    */
> > +#include "linux/kvm_host.h"
> >   #include <linux/cdev.h>
> >   #include <linux/compat.h>
> >   #include <linux/device.h>
> > @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
> >   	mutex_lock(&device->dev_set->lock);
> >   	device->open_count++;
> > +	down_write(&device->group->group_rwsem);
> > +	if (device->open_count == 1 && device->group->kvm) {
> > +		device->kvm = device->group->kvm;
> > +		kvm_get_kvm(device->kvm);
> 
> Did some more compile testing, since vfio has no hard kvm dependency,
> kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is
> built-in...

Ugh, my other plan was to have the driver itself capture the kvm, ie
we lock the group_rwsem to keep the group->kvm valid and then pass the
kvm to open_device in some way, then the driver can kvm_get_kvm() it

Alternatively, I don't know why kvm_get_kvm() is an exported symbol
when it is just calling refcount_inc() - inlining it would be an
improvement I think.

Jason
Matthew Rosato May 18, 2022, 3:33 p.m. UTC | #7
On 5/18/22 11:12 AM, Jason Gunthorpe wrote:
> On Wed, May 18, 2022 at 10:37:48AM -0400, Matthew Rosato wrote:
>> On 5/17/22 2:08 PM, Matthew Rosato wrote:
>>> Rather than relying on a notifier for associating the KVM with
>>> the group, let's assume that the association has already been
>>> made prior to device_open.  The first time a device is opened
>>> associate the group KVM with the device.
>>>
>>> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
>>> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
>>
>> ...
>>
>>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
>>> index cfcff7764403..c5d421eda275 100644
>>> +++ b/drivers/vfio/vfio.c
>>> @@ -10,6 +10,7 @@
>>>     * Author: Tom Lyon, pugs@cisco.com
>>>     */
>>> +#include "linux/kvm_host.h"
>>>    #include <linux/cdev.h>
>>>    #include <linux/compat.h>
>>>    #include <linux/device.h>
>>> @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
>>>    	mutex_lock(&device->dev_set->lock);
>>>    	device->open_count++;
>>> +	down_write(&device->group->group_rwsem);
>>> +	if (device->open_count == 1 && device->group->kvm) {
>>> +		device->kvm = device->group->kvm;
>>> +		kvm_get_kvm(device->kvm);
>>
>> Did some more compile testing, since vfio has no hard kvm dependency,
>> kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is
>> built-in...
> 
> Ugh, my other plan was to have the driver itself capture the kvm, ie
> we lock the group_rwsem to keep the group->kvm valid and then pass the
> kvm to open_device in some way, then the driver can kvm_get_kvm() it
> 

Hrm... If we did that we would have to re-evaluate some other usage of 
the rwsem e.g. if driver open_device calls vfio_register_iommu_notifier 
it will try to get the rwsem but it's already locked.

> Alternatively, I don't know why kvm_get_kvm() is an exported symbol
> when it is just calling refcount_inc() - inlining it would be an
> improvement I think.
> 

I think that would work for kvm_get_kvm, but kvm_put_kvm (which we also 
need) calls kvm_destroy_kvm after the refcount_dec and that can't be inlined
Jason Gunthorpe May 18, 2022, 3:38 p.m. UTC | #8
On Wed, May 18, 2022 at 11:33:37AM -0400, Matthew Rosato wrote:
> On 5/18/22 11:12 AM, Jason Gunthorpe wrote:
> > On Wed, May 18, 2022 at 10:37:48AM -0400, Matthew Rosato wrote:
> > > On 5/17/22 2:08 PM, Matthew Rosato wrote:
> > > > Rather than relying on a notifier for associating the KVM with
> > > > the group, let's assume that the association has already been
> > > > made prior to device_open.  The first time a device is opened
> > > > associate the group KVM with the device.
> > > > 
> > > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> > > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> > > 
> > > ...
> > > 
> > > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> > > > index cfcff7764403..c5d421eda275 100644
> > > > +++ b/drivers/vfio/vfio.c
> > > > @@ -10,6 +10,7 @@
> > > >     * Author: Tom Lyon, pugs@cisco.com
> > > >     */
> > > > +#include "linux/kvm_host.h"
> > > >    #include <linux/cdev.h>
> > > >    #include <linux/compat.h>
> > > >    #include <linux/device.h>
> > > > @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device)
> > > >    	mutex_lock(&device->dev_set->lock);
> > > >    	device->open_count++;
> > > > +	down_write(&device->group->group_rwsem);
> > > > +	if (device->open_count == 1 && device->group->kvm) {
> > > > +		device->kvm = device->group->kvm;
> > > > +		kvm_get_kvm(device->kvm);
> > > 
> > > Did some more compile testing, since vfio has no hard kvm dependency,
> > > kvm_get_kvm and kvm_put_kvm are an issue if KVM is a module while vfio is
> > > built-in...
> > 
> > Ugh, my other plan was to have the driver itself capture the kvm, ie
> > we lock the group_rwsem to keep the group->kvm valid and then pass the
> > kvm to open_device in some way, then the driver can kvm_get_kvm() it
> > 
> 
> Hrm... If we did that we would have to re-evaluate some other usage of the
> rwsem e.g. if driver open_device calls vfio_register_iommu_notifier it will
> try to get the rwsem but it's already locked.

Ugh, yes, it means removing the other notifier callback too, which I
was expecting to do as well

Maybe we could split the lock for just this patch though.

> > Alternatively, I don't know why kvm_get_kvm() is an exported symbol
> > when it is just calling refcount_inc() - inlining it would be an
> > improvement I think.
> 
> I think that would work for kvm_get_kvm, but kvm_put_kvm (which we also
> need) calls kvm_destroy_kvm after the refcount_dec and that can't be inlined

Indeed.

Jason
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 2af4c83e733c..633acfcf76bf 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -227,8 +227,6 @@  struct intel_vgpu {
 	struct mutex cache_lock;
 
 	struct notifier_block iommu_notifier;
-	struct notifier_block group_notifier;
-	struct kvm *kvm;
 	struct work_struct release_work;
 	atomic_t released;
 
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 7655ffa97d51..b317ae4cc7d2 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -761,23 +761,6 @@  static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static int intel_vgpu_group_notifier(struct notifier_block *nb,
-				     unsigned long action, void *data)
-{
-	struct intel_vgpu *vgpu =
-		container_of(nb, struct intel_vgpu, group_notifier);
-
-	/* the only action we care about */
-	if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
-		vgpu->kvm = data;
-
-		if (!data)
-			schedule_work(&vgpu->release_work);
-	}
-
-	return NOTIFY_OK;
-}
-
 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
 {
 	struct intel_vgpu *itr;
@@ -789,7 +772,7 @@  static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
 		if (!itr->attached)
 			continue;
 
-		if (vgpu->kvm == itr->kvm) {
+		if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
 			ret = true;
 			goto out;
 		}
@@ -806,7 +789,6 @@  static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
 	int ret;
 
 	vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
-	vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;
 
 	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
 	ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
@@ -817,38 +799,30 @@  static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
 		goto out;
 	}
 
-	events = VFIO_GROUP_NOTIFY_SET_KVM;
-	ret = vfio_register_notifier(vfio_dev, VFIO_GROUP_NOTIFY, &events,
-				     &vgpu->group_notifier);
-	if (ret != 0) {
-		gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
-			ret);
-		goto undo_iommu;
-	}
-
 	ret = -EEXIST;
 	if (vgpu->attached)
-		goto undo_register;
+		goto undo_iommu;
 
 	ret = -ESRCH;
-	if (!vgpu->kvm || vgpu->kvm->mm != current->mm) {
+	if (!vgpu->vfio_device.kvm ||
+	    vgpu->vfio_device.kvm->mm != current->mm) {
 		gvt_vgpu_err("KVM is required to use Intel vGPU\n");
-		goto undo_register;
+		goto undo_iommu;
 	}
 
 	ret = -EEXIST;
 	if (__kvmgt_vgpu_exist(vgpu))
-		goto undo_register;
+		goto undo_iommu;
 
 	vgpu->attached = true;
-	kvm_get_kvm(vgpu->kvm);
 
 	kvmgt_protect_table_init(vgpu);
 	gvt_cache_init(vgpu);
 
 	vgpu->track_node.track_write = kvmgt_page_track_write;
 	vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
-	kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node);
+	kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
+					 &vgpu->track_node);
 
 	debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
 			     &vgpu->nr_cache_entries);
@@ -858,10 +832,6 @@  static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
 	atomic_set(&vgpu->released, 0);
 	return 0;
 
-undo_register:
-	vfio_unregister_notifier(vfio_dev, VFIO_GROUP_NOTIFY,
-				 &vgpu->group_notifier);
-
 undo_iommu:
 	vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
 				 &vgpu->iommu_notifier);
@@ -898,21 +868,15 @@  static void __intel_vgpu_release(struct intel_vgpu *vgpu)
 	drm_WARN(&i915->drm, ret,
 		 "vfio_unregister_notifier for iommu failed: %d\n", ret);
 
-	ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_GROUP_NOTIFY,
-				       &vgpu->group_notifier);
-	drm_WARN(&i915->drm, ret,
-		 "vfio_unregister_notifier for group failed: %d\n", ret);
-
 	debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
 
-	kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
-	kvm_put_kvm(vgpu->kvm);
+	kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
+					   &vgpu->track_node);
 	kvmgt_protect_table_destroy(vgpu);
 	gvt_cache_destroy(vgpu);
 
 	intel_vgpu_release_msi_eventfd_ctx(vgpu);
 
-	vgpu->kvm = NULL;
 	vgpu->attached = false;
 }
 
@@ -1713,7 +1677,7 @@  static struct mdev_driver intel_vgpu_mdev_driver = {
 
 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
 {
-	struct kvm *kvm = info->kvm;
+	struct kvm *kvm = info->vfio_device.kvm;
 	struct kvm_memory_slot *slot;
 	int idx;
 
@@ -1743,7 +1707,7 @@  int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
 
 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
 {
-	struct kvm *kvm = info->kvm;
+	struct kvm *kvm = info->vfio_device.kvm;
 	struct kvm_memory_slot *slot;
 	int idx;
 
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index e8914024f5b1..43c68afe7e87 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1284,25 +1284,6 @@  static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
 	}
 }
 
-static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
-				       unsigned long action, void *data)
-{
-	int notify_rc = NOTIFY_OK;
-	struct ap_matrix_mdev *matrix_mdev;
-
-	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
-		return NOTIFY_OK;
-
-	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
-
-	if (!data)
-		vfio_ap_mdev_unset_kvm(matrix_mdev);
-	else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
-		notify_rc = NOTIFY_DONE;
-
-	return notify_rc;
-}
-
 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
 {
 	struct device *dev;
@@ -1402,11 +1383,10 @@  static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
 	unsigned long events;
 	int ret;
 
-	matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
-	events = VFIO_GROUP_NOTIFY_SET_KVM;
+	if (!vdev->kvm)
+		return -EPERM;
 
-	ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
-				     &matrix_mdev->group_notifier);
+	ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
 	if (ret)
 		return ret;
 
@@ -1415,12 +1395,11 @@  static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
 	ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
 				     &matrix_mdev->iommu_notifier);
 	if (ret)
-		goto out_unregister_group;
+		goto err_kvm;
 	return 0;
 
-out_unregister_group:
-	vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
-				 &matrix_mdev->group_notifier);
+err_kvm:
+	vfio_ap_mdev_unset_kvm(matrix_mdev);
 	return ret;
 }
 
@@ -1431,8 +1410,6 @@  static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
 
 	vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
 				 &matrix_mdev->iommu_notifier);
-	vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
-				 &matrix_mdev->group_notifier);
 	vfio_ap_mdev_unset_kvm(matrix_mdev);
 }
 
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 648fcaf8104a..a26efd804d0d 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -81,8 +81,6 @@  struct ap_matrix {
  * @node:	allows the ap_matrix_mdev struct to be added to a list
  * @matrix:	the adapters, usage domains and control domains assigned to the
  *		mediated matrix device.
- * @group_notifier: notifier block used for specifying callback function for
- *		    handling the VFIO_GROUP_NOTIFY_SET_KVM event
  * @iommu_notifier: notifier block used for specifying callback function for
  *		    handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
  * @kvm:	the struct holding guest's state
@@ -94,7 +92,6 @@  struct ap_matrix_mdev {
 	struct vfio_device vdev;
 	struct list_head node;
 	struct ap_matrix matrix;
-	struct notifier_block group_notifier;
 	struct notifier_block iommu_notifier;
 	struct kvm *kvm;
 	crypto_hook pqap_hook;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index cfcff7764403..c5d421eda275 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -10,6 +10,7 @@ 
  * Author: Tom Lyon, pugs@cisco.com
  */
 
+#include "linux/kvm_host.h"
 #include <linux/cdev.h>
 #include <linux/compat.h>
 #include <linux/device.h>
@@ -1083,6 +1084,13 @@  static struct file *vfio_device_open(struct vfio_device *device)
 
 	mutex_lock(&device->dev_set->lock);
 	device->open_count++;
+	down_write(&device->group->group_rwsem);
+	if (device->open_count == 1 && device->group->kvm) {
+		device->kvm = device->group->kvm;
+		kvm_get_kvm(device->kvm);
+	}
+	up_write(&device->group->group_rwsem);
+
 	if (device->open_count == 1 && device->ops->open_device) {
 		ret = device->ops->open_device(device);
 		if (ret)
@@ -1123,6 +1131,12 @@  static struct file *vfio_device_open(struct vfio_device *device)
 		device->ops->close_device(device);
 err_undo_count:
 	device->open_count--;
+	down_write(&device->group->group_rwsem);
+	if (device->open_count == 0 && device->kvm) {
+		kvm_put_kvm(device->kvm);
+		device->kvm = NULL;
+	}
+	up_write(&device->group->group_rwsem);
 	mutex_unlock(&device->dev_set->lock);
 	module_put(device->dev->driver->owner);
 err_unassign_container:
@@ -1318,6 +1332,12 @@  static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 	if (device->open_count == 1 && device->ops->close_device)
 		device->ops->close_device(device);
 	device->open_count--;
+	down_write(&device->group->group_rwsem);
+	if (device->open_count == 0 && device->kvm) {
+		kvm_put_kvm(device->kvm);
+		device->kvm = NULL;
+	}
+	up_write(&device->group->group_rwsem);
 	mutex_unlock(&device->dev_set->lock);
 
 	module_put(device->dev->driver->owner);
@@ -1726,8 +1746,8 @@  EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
  * @file: VFIO group file
  * @kvm: KVM to link
  *
- * The kvm pointer will be forwarded to all the vfio_device's attached to the
- * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
+ * When a VFIO device is first opened the KVM will be available in
+ * device->kvm if one was associated with the group.
  */
 void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
 {
@@ -1738,8 +1758,6 @@  void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
 
 	down_write(&group->group_rwsem);
 	group->kvm = kvm;
-	blocking_notifier_call_chain(&group->notifier,
-				     VFIO_GROUP_NOTIFY_SET_KVM, kvm);
 	up_write(&group->group_rwsem);
 }
 EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
@@ -2039,42 +2057,6 @@  static int vfio_unregister_iommu_notifier(struct vfio_group *group,
 	return ret;
 }
 
-static int vfio_register_group_notifier(struct vfio_group *group,
-					unsigned long *events,
-					struct notifier_block *nb)
-{
-	int ret;
-	bool set_kvm = false;
-
-	if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
-		set_kvm = true;
-
-	/* clear known events */
-	*events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
-
-	/* refuse to continue if still events remaining */
-	if (*events)
-		return -EINVAL;
-
-	ret = blocking_notifier_chain_register(&group->notifier, nb);
-	if (ret)
-		return ret;
-
-	/*
-	 * The attaching of kvm and vfio_group might already happen, so
-	 * here we replay once upon registration.
-	 */
-	if (set_kvm) {
-		down_read(&group->group_rwsem);
-		if (group->kvm)
-			blocking_notifier_call_chain(&group->notifier,
-						     VFIO_GROUP_NOTIFY_SET_KVM,
-						     group->kvm);
-		up_read(&group->group_rwsem);
-	}
-	return 0;
-}
-
 int vfio_register_notifier(struct vfio_device *device,
 			   enum vfio_notify_type type, unsigned long *events,
 			   struct notifier_block *nb)
@@ -2090,9 +2072,6 @@  int vfio_register_notifier(struct vfio_device *device,
 	case VFIO_IOMMU_NOTIFY:
 		ret = vfio_register_iommu_notifier(group, events, nb);
 		break;
-	case VFIO_GROUP_NOTIFY:
-		ret = vfio_register_group_notifier(group, events, nb);
-		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -2114,9 +2093,6 @@  int vfio_unregister_notifier(struct vfio_device *device,
 	case VFIO_IOMMU_NOTIFY:
 		ret = vfio_unregister_iommu_notifier(group, nb);
 		break;
-	case VFIO_GROUP_NOTIFY:
-		ret = blocking_notifier_chain_unregister(&group->notifier, nb);
-		break;
 	default:
 		ret = -EINVAL;
 	}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 45b287826ce6..5f691453e3fb 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -36,6 +36,7 @@  struct vfio_device {
 	struct vfio_device_set *dev_set;
 	struct list_head dev_set_list;
 	unsigned int migration_flags;
+	struct kvm *kvm;
 
 	/* Members below here are private, not for driver use */
 	refcount_t refcount;
@@ -155,15 +156,11 @@  extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
 /* each type has independent events */
 enum vfio_notify_type {
 	VFIO_IOMMU_NOTIFY = 0,
-	VFIO_GROUP_NOTIFY = 1,
 };
 
 /* events for VFIO_IOMMU_NOTIFY */
 #define VFIO_IOMMU_NOTIFY_DMA_UNMAP	BIT(0)
 
-/* events for VFIO_GROUP_NOTIFY */
-#define VFIO_GROUP_NOTIFY_SET_KVM	BIT(0)
-
 extern int vfio_register_notifier(struct vfio_device *device,
 				  enum vfio_notify_type type,
 				  unsigned long *required_events,