diff mbox series

[RFC,01/10] vfio: Create vfio_fs_type with inode per device

Message ID 161401263517.16443.7534035240372538844.stgit@gimli.home (mailing list archive)
State New, archived
Headers show
Series vfio: Device memory DMA mapping improvements | expand

Commit Message

Alex Williamson Feb. 22, 2021, 4:50 p.m. UTC
By linking all the device fds we provide to userspace to an
address space through a new pseudo fs, we can use tools like
unmap_mapping_range() to zap all vmas associated with a device.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c |   54 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

Comments

Christoph Hellwig Feb. 26, 2021, 5:38 a.m. UTC | #1
On Mon, Feb 22, 2021 at 09:50:35AM -0700, Alex Williamson wrote:
> By linking all the device fds we provide to userspace to an
> address space through a new pseudo fs, we can use tools like
> unmap_mapping_range() to zap all vmas associated with a device.
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

Adding Al:

I hate how we're are growing these tiny file systems just to allocate an
anonymous inode all over.  Shouldn't we allow to enhance fs/anon_inodes.c
to add a new API to allocate a new specific inode from anon_inodefs
instead?
Jason Gunthorpe Feb. 26, 2021, 1:15 p.m. UTC | #2
On Fri, Feb 26, 2021 at 05:38:04AM +0000, Christoph Hellwig wrote:
> On Mon, Feb 22, 2021 at 09:50:35AM -0700, Alex Williamson wrote:
> > By linking all the device fds we provide to userspace to an
> > address space through a new pseudo fs, we can use tools like
> > unmap_mapping_range() to zap all vmas associated with a device.
> > 
> > Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> 
> Adding Al:
> 
> I hate how we're are growing these tiny file systems just to allocate an
> anonymous inode all over.  Shouldn't we allow to enhance fs/anon_inodes.c
> to add a new API to allocate a new specific inode from anon_inodefs
> instead?

+1 when I was researching this I also felt this was alot of
boilerplate to just get an inode. With vfio and rdma getting this it
is at least 5 places now.

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 38779e6fd80c..464caef97aff 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -32,11 +32,18 @@ 
 #include <linux/vfio.h>
 #include <linux/wait.h>
 #include <linux/sched/signal.h>
+#include <linux/pseudo_fs.h>
+#include <linux/mount.h>
 
 #define DRIVER_VERSION	"0.3"
 #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
 #define DRIVER_DESC	"VFIO - User Level meta-driver"
 
+#define VFIO_MAGIC 0x5646494f /* "VFIO" */
+
+static int vfio_fs_cnt;
+static struct vfsmount *vfio_fs_mnt;
+
 static struct vfio {
 	struct class			*class;
 	struct list_head		iommu_drivers_list;
@@ -97,6 +104,7 @@  struct vfio_device {
 	struct vfio_group		*group;
 	struct list_head		group_next;
 	void				*device_data;
+	struct inode			*inode;
 };
 
 #ifdef CONFIG_VFIO_NOIOMMU
@@ -529,6 +537,34 @@  static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
 	return group;
 }
 
+static int vfio_fs_init_fs_context(struct fs_context *fc)
+{
+	return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type vfio_fs_type = {
+	.name = "vfio",
+	.owner = THIS_MODULE,
+	.init_fs_context = vfio_fs_init_fs_context,
+	.kill_sb = kill_anon_super,
+};
+
+static struct inode *vfio_fs_inode_new(void)
+{
+	struct inode *inode;
+	int ret;
+
+	ret = simple_pin_fs(&vfio_fs_type, &vfio_fs_mnt, &vfio_fs_cnt);
+	if (ret)
+		return ERR_PTR(ret);
+
+	inode = alloc_anon_inode(vfio_fs_mnt->mnt_sb);
+	if (IS_ERR(inode))
+		simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt);
+
+	return inode;
+}
+
 /**
  * Device objects - create, release, get, put, search
  */
@@ -539,11 +575,19 @@  struct vfio_device *vfio_group_create_device(struct vfio_group *group,
 					     void *device_data)
 {
 	struct vfio_device *device;
+	struct inode *inode;
 
 	device = kzalloc(sizeof(*device), GFP_KERNEL);
 	if (!device)
 		return ERR_PTR(-ENOMEM);
 
+	inode = vfio_fs_inode_new();
+	if (IS_ERR(inode)) {
+		kfree(device);
+		return (struct vfio_device *)inode;
+	}
+	device->inode = inode;
+
 	kref_init(&device->kref);
 	device->dev = dev;
 	device->group = group;
@@ -574,6 +618,9 @@  static void vfio_device_release(struct kref *kref)
 
 	dev_set_drvdata(device->dev, NULL);
 
+	iput(device->inode);
+	simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt);
+
 	kfree(device);
 
 	/* vfio_del_group_dev may be waiting for this device */
@@ -1488,6 +1535,13 @@  static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 	 */
 	filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
 
+	/*
+	 * Use the pseudo fs inode on the device to link all mmaps
+	 * to the same address space, allowing us to unmap all vmas
+	 * associated to this device using unmap_mapping_range().
+	 */
+	filep->f_mapping = device->inode->i_mapping;
+
 	atomic_inc(&group->container_users);
 
 	fd_install(ret, filep);