diff mbox series

[v14,22/26] vfio: Add VFIO_DEVICE_BIND_IOMMUFD

Message ID 20230711025928.6438-23-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series Add vfio_device cdev for iommufd support | expand

Commit Message

Yi Liu July 11, 2023, 2:59 a.m. UTC
This adds ioctl for userspace to bind device cdev fd to iommufd.

    VFIO_DEVICE_BIND_IOMMUFD: bind device to an iommufd, hence gain DMA
			      control provided by the iommufd. open_device
			      op is called after bind_iommufd op.

Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yanting Jiang <yanting.jiang@intel.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Tested-by: Terrence Xu <terrence.xu@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/vfio/device_cdev.c | 107 +++++++++++++++++++++++++++++++++++++
 drivers/vfio/vfio.h        |  13 +++++
 drivers/vfio/vfio_main.c   |   5 ++
 include/linux/vfio.h       |   5 +-
 include/uapi/linux/vfio.h  |  27 ++++++++++
 5 files changed, 155 insertions(+), 2 deletions(-)

Comments

Jason Gunthorpe July 14, 2023, 2:42 p.m. UTC | #1
On Mon, Jul 10, 2023 at 07:59:24PM -0700, Yi Liu wrote:

> +static inline long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
> +					      struct vfio_device_bind_iommufd __user *arg)
> +{
> +	return -EOPNOTSUPP;
> +}

This should be -ENOTTY

> @@ -1149,6 +1151,9 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
>  	void __user *uptr = (void __user *)arg;
>  	int ret;
>  
> +	if (cmd == VFIO_DEVICE_BIND_IOMMUFD)
> +		return vfio_df_ioctl_bind_iommufd(df, uptr);
> +

And this function has a mistake too:

	default:
		if (unlikely(!device->ops->ioctl))
			ret = -EINVAL;

Should also be -ENOTTY

All the implementations of the ops already return -ENOTTY

However, I think this is all slightly not quite right, the proper
return code is supposed to be ENOIOCTLCMD which vfs_ioctl() then
translates into ENOTTY for some reason..

It looks Ok otherwise

Jason
Yi Liu July 15, 2023, 4:16 a.m. UTC | #2
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, July 14, 2023 10:42 PM
> 
> On Mon, Jul 10, 2023 at 07:59:24PM -0700, Yi Liu wrote:
> 
> > +static inline long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
> > +					      struct vfio_device_bind_iommufd __user
> *arg)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> 
> This should be -ENOTTY

Okay. Since there are quite a few stub functions in the drivers/vfio/vfio.h.
Let me check the rule. All the stub functions should return -ENOTTY in
the !IS_ENABLED(CONFIG_XXX) case, if the function returns int., is it?

> > @@ -1149,6 +1151,9 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
> >  	void __user *uptr = (void __user *)arg;
> >  	int ret;
> >
> > +	if (cmd == VFIO_DEVICE_BIND_IOMMUFD)
> > +		return vfio_df_ioctl_bind_iommufd(df, uptr);
> > +
> 
> And this function has a mistake too:
> 
> 	default:
> 		if (unlikely(!device->ops->ioctl))
> 			ret = -EINVAL;
> 
> Should also be -ENOTTY
> 
> All the implementations of the ops already return -ENOTTY
> 
> However, I think this is all slightly not quite right, the proper
> return code is supposed to be ENOIOCTLCMD which vfs_ioctl() then
> translates into ENOTTY for some reason..
> 
> It looks Ok otherwise

This is not in the scope of this series. May need a separate fix patch. @Alex?

Regards,
Yi Liu
Jason Gunthorpe July 15, 2023, 1:04 p.m. UTC | #3
On Sat, Jul 15, 2023 at 04:16:52AM +0000, Liu, Yi L wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Friday, July 14, 2023 10:42 PM
> > 
> > On Mon, Jul 10, 2023 at 07:59:24PM -0700, Yi Liu wrote:
> > 
> > > +static inline long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
> > > +					      struct vfio_device_bind_iommufd __user
> > *arg)
> > > +{
> > > +	return -EOPNOTSUPP;
> > > +}
> > 
> > This should be -ENOTTY
> 
> Okay. Since there are quite a few stub functions in the drivers/vfio/vfio.h.
> Let me check the rule. All the stub functions should return -ENOTTY in
> the !IS_ENABLED(CONFIG_XXX) case, if the function returns int., is
> it?

No, just ioctl returns ENOTTY, so really just this function.

Jason
Yi Liu July 15, 2023, 2:19 p.m. UTC | #4
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, July 15, 2023 9:05 PM
> 
> On Sat, Jul 15, 2023 at 04:16:52AM +0000, Liu, Yi L wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Friday, July 14, 2023 10:42 PM
> > >
> > > On Mon, Jul 10, 2023 at 07:59:24PM -0700, Yi Liu wrote:
> > >
> > > > +static inline long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
> > > > +					      struct vfio_device_bind_iommufd __user
> > > *arg)
> > > > +{
> > > > +	return -EOPNOTSUPP;
> > > > +}
> > >
> > > This should be -ENOTTY
> >
> > Okay. Since there are quite a few stub functions in the drivers/vfio/vfio.h.
> > Let me check the rule. All the stub functions should return -ENOTTY in
> > the !IS_ENABLED(CONFIG_XXX) case, if the function returns int., is
> > it?
> 
> No, just ioctl returns ENOTTY, so really just this function.

Ok. I see.

Regards,
Yi Liu
diff mbox series

Patch

diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
index bf1032d00107..f40784dd5561 100644
--- a/drivers/vfio/device_cdev.c
+++ b/drivers/vfio/device_cdev.c
@@ -3,6 +3,7 @@ 
  * Copyright (c) 2023 Intel Corporation.
  */
 #include <linux/vfio.h>
+#include <linux/iommufd.h>
 
 #include "vfio.h"
 
@@ -45,6 +46,112 @@  int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep)
 	return ret;
 }
 
+static void vfio_df_get_kvm_safe(struct vfio_device_file *df)
+{
+	spin_lock(&df->kvm_ref_lock);
+	vfio_device_get_kvm_safe(df->device, df->kvm);
+	spin_unlock(&df->kvm_ref_lock);
+}
+
+long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
+				struct vfio_device_bind_iommufd __user *arg)
+{
+	struct vfio_device *device = df->device;
+	struct vfio_device_bind_iommufd bind;
+	unsigned long minsz;
+	int ret;
+
+	static_assert(__same_type(arg->out_devid, df->devid));
+
+	minsz = offsetofend(struct vfio_device_bind_iommufd, out_devid);
+
+	if (copy_from_user(&bind, arg, minsz))
+		return -EFAULT;
+
+	if (bind.argsz < minsz || bind.flags || bind.iommufd < 0)
+		return -EINVAL;
+
+	/* BIND_IOMMUFD only allowed for cdev fds */
+	if (df->group)
+		return -EINVAL;
+
+	ret = vfio_device_block_group(device);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->dev_set->lock);
+	/* one device cannot be bound twice */
+	if (df->access_granted) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	df->iommufd = iommufd_ctx_from_fd(bind.iommufd);
+	if (IS_ERR(df->iommufd)) {
+		ret = PTR_ERR(df->iommufd);
+		df->iommufd = NULL;
+		goto out_unlock;
+	}
+
+	/*
+	 * Before the device open, get the KVM pointer currently
+	 * associated with the device file (if there is) and obtain
+	 * a reference.  This reference is held until device closed.
+	 * Save the pointer in the device for use by drivers.
+	 */
+	vfio_df_get_kvm_safe(df);
+
+	ret = vfio_df_open(df);
+	if (ret)
+		goto out_put_kvm;
+
+	ret = copy_to_user(&arg->out_devid, &df->devid,
+			   sizeof(df->devid)) ? -EFAULT : 0;
+	if (ret)
+		goto out_close_device;
+
+	device->cdev_opened = true;
+	/*
+	 * Paired with smp_load_acquire() in vfio_device_fops::ioctl/
+	 * read/write/mmap
+	 */
+	smp_store_release(&df->access_granted, true);
+	mutex_unlock(&device->dev_set->lock);
+	return 0;
+
+out_close_device:
+	vfio_df_close(df);
+out_put_kvm:
+	vfio_device_put_kvm(device);
+	iommufd_ctx_put(df->iommufd);
+	df->iommufd = NULL;
+out_unlock:
+	mutex_unlock(&device->dev_set->lock);
+	vfio_device_unblock_group(device);
+	return ret;
+}
+
+void vfio_df_unbind_iommufd(struct vfio_device_file *df)
+{
+	struct vfio_device *device = df->device;
+
+	/*
+	 * In the time of close, there is no contention with another one
+	 * changing this flag.  So read df->access_granted without lock
+	 * and no smp_load_acquire() is ok.
+	 */
+	if (!df->access_granted)
+		return;
+
+	mutex_lock(&device->dev_set->lock);
+	vfio_df_close(df);
+	vfio_device_put_kvm(device);
+	iommufd_ctx_put(df->iommufd);
+	device->cdev_opened = false;
+	mutex_unlock(&device->dev_set->lock);
+	vfio_device_unblock_group(device);
+}
+
 static char *vfio_device_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev));
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index c2aa65382592..137d3e9ca798 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -287,6 +287,9 @@  static inline void vfio_device_del(struct vfio_device *device)
 }
 
 int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep);
+long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
+				struct vfio_device_bind_iommufd __user *arg);
+void vfio_df_unbind_iommufd(struct vfio_device_file *df);
 int vfio_cdev_init(struct class *device_class);
 void vfio_cdev_cleanup(void);
 #else
@@ -310,6 +313,16 @@  static inline int vfio_device_fops_cdev_open(struct inode *inode,
 	return 0;
 }
 
+static inline long vfio_df_ioctl_bind_iommufd(struct vfio_device_file *df,
+					      struct vfio_device_bind_iommufd __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void vfio_df_unbind_iommufd(struct vfio_device_file *df)
+{
+}
+
 static inline int vfio_cdev_init(struct class *device_class)
 {
 	return 0;
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index a2744cb64c6d..9fdf93ff17cf 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -575,6 +575,8 @@  static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 
 	if (df->group)
 		vfio_df_group_close(df);
+	else
+		vfio_df_unbind_iommufd(df);
 
 	vfio_device_put_registration(device);
 
@@ -1149,6 +1151,9 @@  static long vfio_device_fops_unl_ioctl(struct file *filep,
 	void __user *uptr = (void __user *)arg;
 	int ret;
 
+	if (cmd == VFIO_DEVICE_BIND_IOMMUFD)
+		return vfio_df_ioctl_bind_iommufd(df, uptr);
+
 	/* Paired with smp_store_release() following vfio_df_open() */
 	if (!smp_load_acquire(&df->access_granted))
 		return -EINVAL;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e0069f26488d..d6228c839c44 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -64,8 +64,9 @@  struct vfio_device {
 	void (*put_kvm)(struct kvm *kvm);
 #if IS_ENABLED(CONFIG_IOMMUFD)
 	struct iommufd_device *iommufd_device;
-	bool iommufd_attached;
+	u8 iommufd_attached:1;
 #endif
+	u8 cdev_opened:1;
 };
 
 /**
@@ -168,7 +169,7 @@  vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx)
 
 static inline bool vfio_device_cdev_opened(struct vfio_device *device)
 {
-	return false;
+	return device->cdev_opened;
 }
 
 /**
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 4c3d548e9c96..098946b23e86 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -897,6 +897,33 @@  struct vfio_device_feature {
 
 #define VFIO_DEVICE_FEATURE		_IO(VFIO_TYPE, VFIO_BASE + 17)
 
+/*
+ * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18,
+ *				   struct vfio_device_bind_iommufd)
+ * @argsz:	 User filled size of this data.
+ * @flags:	 Must be 0.
+ * @iommufd:	 iommufd to bind.
+ * @out_devid:	 The device id generated by this bind. devid is a handle for
+ *		 this device/iommufd bond and can be used in IOMMUFD commands.
+ *
+ * Bind a vfio_device to the specified iommufd.
+ *
+ * User is restricted from accessing the device before the binding operation
+ * is completed.  Only allowed on cdev fds.
+ *
+ * Unbind is automatically conducted when device fd is closed.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_bind_iommufd {
+	__u32		argsz;
+	__u32		flags;
+	__s32		iommufd;
+	__u32		out_devid;
+};
+
+#define VFIO_DEVICE_BIND_IOMMUFD	_IO(VFIO_TYPE, VFIO_BASE + 18)
+
 /*
  * Provide support for setting a PCI VF Token, which is used as a shared
  * secret between PF and VF drivers.  This feature may only be set on a