diff mbox series

[v1,01/14] vfio: Create vfio_fs_type with inode per device

Message ID 161524004828.3480.1817334832614722574.stgit@gimli.home (mailing list archive)
State New, archived
Headers show
Series vfio: Device memory DMA mapping improvements | expand

Commit Message

Alex Williamson March 8, 2021, 9:47 p.m. UTC
By linking all the device fds we provide to userspace to an
address space through a new pseudo fs, we can use tools like
unmap_mapping_range() to zap all vmas associated with a device.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c |   54 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

Comments

Christoph Hellwig March 9, 2021, 8:36 a.m. UTC | #1
On Mon, Mar 08, 2021 at 02:47:28PM -0700, Alex Williamson wrote:
> By linking all the device fds we provide to userspace to an
> address space through a new pseudo fs, we can use tools like
> unmap_mapping_range() to zap all vmas associated with a device.
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

I'd much prefer to just piggy back on the anon_inode fs rather than
duplicating this logic all over.  Something like the trivial patch below
should be all that is needed:

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index a280156138ed89..6fe404aab0f0dd 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -225,6 +225,12 @@ int anon_inode_getfd_secure(const char *name, const struct file_operations *fops
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd_secure);
 
+struct inode *alloc_anon_inode_simple(void)
+{
+	return alloc_anon_inode(anon_inode_mnt->mnt_sb);
+}
+EXPORT_SYMBOL_GPL(alloc_anon_inode_simple);
+
 static int __init anon_inode_init(void)
 {
 	anon_inode_mnt = kern_mount(&anon_inode_fs_type);
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index 71881a2b6f7860..6b2fb7d0abc57a 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -21,6 +21,7 @@ int anon_inode_getfd_secure(const char *name,
 			    const struct file_operations *fops,
 			    void *priv, int flags,
 			    const struct inode *context_inode);
+struct inode *alloc_anon_inode_simple(void);
 
 #endif /* _LINUX_ANON_INODES_H */
Zengtao (B) April 9, 2021, 4:54 a.m. UTC | #2
> -----邮件原件-----
> 发件人: Alex Williamson [mailto:alex.williamson@redhat.com]
> 发送时间: 2021年3月9日 5:47
> 收件人: alex.williamson@redhat.com
> 抄送: cohuck@redhat.com; kvm@vger.kernel.org;
> linux-kernel@vger.kernel.org; jgg@nvidia.com; peterx@redhat.com
> 主题: [PATCH v1 01/14] vfio: Create vfio_fs_type with inode per device
> 
> By linking all the device fds we provide to userspace to an address space
> through a new pseudo fs, we can use tools like
> unmap_mapping_range() to zap all vmas associated with a device.
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>  drivers/vfio/vfio.c |   54
> +++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 54 insertions(+)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index
> 38779e6fd80c..abdf8d52a911 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -32,11 +32,18 @@
>  #include <linux/vfio.h>
>  #include <linux/wait.h>
>  #include <linux/sched/signal.h>
> +#include <linux/pseudo_fs.h>
> +#include <linux/mount.h>
Minor: keep the headers in alphabetical order.

> 
>  #define DRIVER_VERSION	"0.3"
>  #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
>  #define DRIVER_DESC	"VFIO - User Level meta-driver"
> 
> +#define VFIO_MAGIC 0x5646494f /* "VFIO" */
Move to include/uapi/linux/magic.h ? 

> +
> +static int vfio_fs_cnt;
> +static struct vfsmount *vfio_fs_mnt;
> +
>  static struct vfio {
>  	struct class			*class;
>  	struct list_head		iommu_drivers_list;
> @@ -97,6 +104,7 @@ struct vfio_device {
>  	struct vfio_group		*group;
>  	struct list_head		group_next;
>  	void				*device_data;
> +	struct inode			*inode;
>  };
> 
>  #ifdef CONFIG_VFIO_NOIOMMU
> @@ -529,6 +537,34 @@ static struct vfio_group
> *vfio_group_get_from_dev(struct device *dev)
>  	return group;
>  }
> 
> +static int vfio_fs_init_fs_context(struct fs_context *fc) {
> +	return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM; }
> +
> +static struct file_system_type vfio_fs_type = {
> +	.name = "vfio",
> +	.owner = THIS_MODULE,
> +	.init_fs_context = vfio_fs_init_fs_context,
> +	.kill_sb = kill_anon_super,
> +};
> +
> +static struct inode *vfio_fs_inode_new(void) {
> +	struct inode *inode;
> +	int ret;
> +
> +	ret = simple_pin_fs(&vfio_fs_type, &vfio_fs_mnt, &vfio_fs_cnt);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	inode = alloc_anon_inode(vfio_fs_mnt->mnt_sb);
> +	if (IS_ERR(inode))
> +		simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt);
> +
> +	return inode;
> +}
> +
>  /**
>   * Device objects - create, release, get, put, search
>   */
> @@ -539,11 +575,19 @@ struct vfio_device
> *vfio_group_create_device(struct vfio_group *group,
>  					     void *device_data)
>  {
>  	struct vfio_device *device;
> +	struct inode *inode;
> 
>  	device = kzalloc(sizeof(*device), GFP_KERNEL);
>  	if (!device)
>  		return ERR_PTR(-ENOMEM);
> 
> +	inode = vfio_fs_inode_new();
> +	if (IS_ERR(inode)) {
> +		kfree(device);
> +		return ERR_CAST(inode);
> +	}
> +	device->inode = inode;
> +
>  	kref_init(&device->kref);
>  	device->dev = dev;
>  	device->group = group;
> @@ -574,6 +618,9 @@ static void vfio_device_release(struct kref *kref)
> 
>  	dev_set_drvdata(device->dev, NULL);
> 
> +	iput(device->inode);
> +	simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt);
> +
>  	kfree(device);
> 
>  	/* vfio_del_group_dev may be waiting for this device */ @@ -1488,6
> +1535,13 @@ static int vfio_group_get_device_fd(struct vfio_group *group,
> char *buf)
>  	 */
>  	filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
> 
> +	/*
> +	 * Use the pseudo fs inode on the device to link all mmaps
> +	 * to the same address space, allowing us to unmap all vmas
> +	 * associated to this device using unmap_mapping_range().
> +	 */
> +	filep->f_mapping = device->inode->i_mapping;
> +
>  	atomic_inc(&group->container_users);
> 
>  	fd_install(ret, filep);
Alex Williamson April 9, 2021, 2:24 p.m. UTC | #3
On Fri, 9 Apr 2021 04:54:23 +0000
"Zengtao (B)" <prime.zeng@hisilicon.com> wrote:

> > -----邮件原件-----
> > 发件人: Alex Williamson [mailto:alex.williamson@redhat.com]
> > 发送时间: 2021年3月9日 5:47
> > 收件人: alex.williamson@redhat.com
> > 抄送: cohuck@redhat.com; kvm@vger.kernel.org;
> > linux-kernel@vger.kernel.org; jgg@nvidia.com; peterx@redhat.com
> > 主题: [PATCH v1 01/14] vfio: Create vfio_fs_type with inode per device
> > 
> > By linking all the device fds we provide to userspace to an address space
> > through a new pseudo fs, we can use tools like
> > unmap_mapping_range() to zap all vmas associated with a device.
> > 
> > Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> >  drivers/vfio/vfio.c |   54
> > +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 54 insertions(+)
> > 
> > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index
> > 38779e6fd80c..abdf8d52a911 100644
> > --- a/drivers/vfio/vfio.c
> > +++ b/drivers/vfio/vfio.c
> > @@ -32,11 +32,18 @@
> >  #include <linux/vfio.h>
> >  #include <linux/wait.h>
> >  #include <linux/sched/signal.h>
> > +#include <linux/pseudo_fs.h>
> > +#include <linux/mount.h>  
> Minor: keep the headers in alphabetical order.

They started out that way, but various tree-wide changes ignoring that,
and likely oversights on my part as well, has left us with numerous
breaks in that rule already.

> > 
> >  #define DRIVER_VERSION	"0.3"
> >  #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
> >  #define DRIVER_DESC	"VFIO - User Level meta-driver"
> > 
> > +#define VFIO_MAGIC 0x5646494f /* "VFIO" */  
> Move to include/uapi/linux/magic.h ? 

Hmm, yeah, I suppose it probably should go there.  Thanks.

FWIW, I'm still working on a next version of this series, currently
struggling how to handle an arbitrary number of vmas per user DMA
mapping.  Thanks,

Alex
Jason Gunthorpe April 9, 2021, 5:32 p.m. UTC | #4
On Fri, Apr 09, 2021 at 08:24:00AM -0600, Alex Williamson wrote:

> > >  #define DRIVER_VERSION	"0.3"
> > >  #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
> > >  #define DRIVER_DESC	"VFIO - User Level meta-driver"
> > > 
> > > +#define VFIO_MAGIC 0x5646494f /* "VFIO" */  
> > Move to include/uapi/linux/magic.h ? 
> 
> Hmm, yeah, I suppose it probably should go there.  Thanks.

From my review we haven't been doing that unless it is actually uapi
relavent for some reason (this is not)

In particular with CH having a patch series to eliminate all of these
cases and drop the constants..

Jason
Zengtao (B) April 12, 2021, 4:03 a.m. UTC | #5
> -----邮件原件-----
> 发件人: Jason Gunthorpe [mailto:jgg@nvidia.com]
> 发送时间: 2021年4月10日 1:32
> 收件人: Alex Williamson <alex.williamson@redhat.com>
> 抄送: Zengtao (B) <prime.zeng@hisilicon.com>; cohuck@redhat.com;
> kvm@vger.kernel.org; linux-kernel@vger.kernel.org; peterx@redhat.com
> 主题: Re: [PATCH v1 01/14] vfio: Create vfio_fs_type with inode per device
> 
> On Fri, Apr 09, 2021 at 08:24:00AM -0600, Alex Williamson wrote:
> 
> > > >  #define DRIVER_VERSION	"0.3"
> > > >  #define DRIVER_AUTHOR	"Alex Williamson
> <alex.williamson@redhat.com>"
> > > >  #define DRIVER_DESC	"VFIO - User Level meta-driver"
> > > >
> > > > +#define VFIO_MAGIC 0x5646494f /* "VFIO" */
> > > Move to include/uapi/linux/magic.h ?
> >
> > Hmm, yeah, I suppose it probably should go there.  Thanks.
> 
> From my review we haven't been doing that unless it is actually uapi relavent
> for some reason (this is not)
>
Yes, it's not uapi relavent, and I still think it's better to put magic
 in a single header, and currently not all micros in magic.h are for
 uapi, some work should be done for that, but no ideas now, :)
 
> In particular with CH having a patch series to eliminate all of these cases and
> drop the constants..
> 
Interested, could you share the links for that?

Thanks 
Zengtao 

> Jason
Zengtao (B) April 12, 2021, 4:09 a.m. UTC | #6
> -----邮件原件-----
> 发件人: Alex Williamson [mailto:alex.williamson@redhat.com]
> 发送时间: 2021年4月9日 22:24
> 收件人: Zengtao (B) <prime.zeng@hisilicon.com>
> 抄送: cohuck@redhat.com; kvm@vger.kernel.org;
> linux-kernel@vger.kernel.org; jgg@nvidia.com; peterx@redhat.com
> 主题: Re: [PATCH v1 01/14] vfio: Create vfio_fs_type with inode per device
> 
> On Fri, 9 Apr 2021 04:54:23 +0000
> "Zengtao (B)" <prime.zeng@hisilicon.com> wrote:
> 
> > > -----邮件原件-----
> > > 发件人: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > 发送时间: 2021年3月9日 5:47
> > > 收件人: alex.williamson@redhat.com
> > > 抄送: cohuck@redhat.com; kvm@vger.kernel.org;
> > > linux-kernel@vger.kernel.org; jgg@nvidia.com; peterx@redhat.com
> > > 主题: [PATCH v1 01/14] vfio: Create vfio_fs_type with inode per device
> > >
> > > By linking all the device fds we provide to userspace to an address
> > > space through a new pseudo fs, we can use tools like
> > > unmap_mapping_range() to zap all vmas associated with a device.
> > >
> > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > ---
> > >  drivers/vfio/vfio.c |   54
> > > +++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 54 insertions(+)
> > >
> > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index
> > > 38779e6fd80c..abdf8d52a911 100644
> > > --- a/drivers/vfio/vfio.c
> > > +++ b/drivers/vfio/vfio.c
> > > @@ -32,11 +32,18 @@
> > >  #include <linux/vfio.h>
> > >  #include <linux/wait.h>
> > >  #include <linux/sched/signal.h>
> > > +#include <linux/pseudo_fs.h>
> > > +#include <linux/mount.h>
> > Minor: keep the headers in alphabetical order.
> 
> They started out that way, but various tree-wide changes ignoring that, and
> likely oversights on my part as well, has left us with numerous breaks in that
> rule already.
> 
> > >
> > >  #define DRIVER_VERSION	"0.3"
> > >  #define DRIVER_AUTHOR	"Alex Williamson
> <alex.williamson@redhat.com>"
> > >  #define DRIVER_DESC	"VFIO - User Level meta-driver"
> > >
> > > +#define VFIO_MAGIC 0x5646494f /* "VFIO" */
> > Move to include/uapi/linux/magic.h ?
> 
> Hmm, yeah, I suppose it probably should go there.  Thanks.
> 
> FWIW, I'm still working on a next version of this series, currently struggling
> how to handle an arbitrary number of vmas per user DMA mapping.  Thanks,
> 

Will do some testing on our platform, and want to make sure the issue
 I reported is included : 
 https://patchwork.kernel.org/project/kvm/patch/1615201890-887-1-git-send-email-prime.zeng@hisilicon.com/
 
 Thanks
 zengtao


> Alex
diff mbox series

Patch

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 38779e6fd80c..abdf8d52a911 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -32,11 +32,18 @@ 
 #include <linux/vfio.h>
 #include <linux/wait.h>
 #include <linux/sched/signal.h>
+#include <linux/pseudo_fs.h>
+#include <linux/mount.h>
 
 #define DRIVER_VERSION	"0.3"
 #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
 #define DRIVER_DESC	"VFIO - User Level meta-driver"
 
+#define VFIO_MAGIC 0x5646494f /* "VFIO" */
+
+static int vfio_fs_cnt;
+static struct vfsmount *vfio_fs_mnt;
+
 static struct vfio {
 	struct class			*class;
 	struct list_head		iommu_drivers_list;
@@ -97,6 +104,7 @@  struct vfio_device {
 	struct vfio_group		*group;
 	struct list_head		group_next;
 	void				*device_data;
+	struct inode			*inode;
 };
 
 #ifdef CONFIG_VFIO_NOIOMMU
@@ -529,6 +537,34 @@  static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
 	return group;
 }
 
+static int vfio_fs_init_fs_context(struct fs_context *fc)
+{
+	return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type vfio_fs_type = {
+	.name = "vfio",
+	.owner = THIS_MODULE,
+	.init_fs_context = vfio_fs_init_fs_context,
+	.kill_sb = kill_anon_super,
+};
+
+static struct inode *vfio_fs_inode_new(void)
+{
+	struct inode *inode;
+	int ret;
+
+	ret = simple_pin_fs(&vfio_fs_type, &vfio_fs_mnt, &vfio_fs_cnt);
+	if (ret)
+		return ERR_PTR(ret);
+
+	inode = alloc_anon_inode(vfio_fs_mnt->mnt_sb);
+	if (IS_ERR(inode))
+		simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt);
+
+	return inode;
+}
+
 /**
  * Device objects - create, release, get, put, search
  */
@@ -539,11 +575,19 @@  struct vfio_device *vfio_group_create_device(struct vfio_group *group,
 					     void *device_data)
 {
 	struct vfio_device *device;
+	struct inode *inode;
 
 	device = kzalloc(sizeof(*device), GFP_KERNEL);
 	if (!device)
 		return ERR_PTR(-ENOMEM);
 
+	inode = vfio_fs_inode_new();
+	if (IS_ERR(inode)) {
+		kfree(device);
+		return ERR_CAST(inode);
+	}
+	device->inode = inode;
+
 	kref_init(&device->kref);
 	device->dev = dev;
 	device->group = group;
@@ -574,6 +618,9 @@  static void vfio_device_release(struct kref *kref)
 
 	dev_set_drvdata(device->dev, NULL);
 
+	iput(device->inode);
+	simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt);
+
 	kfree(device);
 
 	/* vfio_del_group_dev may be waiting for this device */
@@ -1488,6 +1535,13 @@  static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 	 */
 	filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
 
+	/*
+	 * Use the pseudo fs inode on the device to link all mmaps
+	 * to the same address space, allowing us to unmap all vmas
+	 * associated to this device using unmap_mapping_range().
+	 */
+	filep->f_mapping = device->inode->i_mapping;
+
 	atomic_inc(&group->container_users);
 
 	fd_install(ret, filep);