diff mbox series

[v12,21/24] vfio: Determine noiommu device in __vfio_register_dev()

Message ID 20230602121653.80017-22-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series Add vfio_device cdev for iommufd support | expand

Commit Message

Yi Liu June 2, 2023, 12:16 p.m. UTC
This moves the noiommu device determination and noiommu taint out of
vfio_group_find_or_alloc(). noiommu device is determined in
__vfio_register_dev() and result is stored in flag vfio_device->noiommu,
the noiommu taint is added in the end of __vfio_register_dev().

This is also a preparation for compiling out vfio_group infrastructure
as it makes the noiommu detection and taint common between the cdev path
and group path though cdev path does not support noiommu.

Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/vfio/group.c     | 15 ---------------
 drivers/vfio/vfio_main.c | 31 ++++++++++++++++++++++++++++++-
 include/linux/vfio.h     |  1 +
 3 files changed, 31 insertions(+), 16 deletions(-)

Comments

Alex Williamson June 12, 2023, 10:42 p.m. UTC | #1
On Fri,  2 Jun 2023 05:16:50 -0700
Yi Liu <yi.l.liu@intel.com> wrote:

> This moves the noiommu device determination and noiommu taint out of
> vfio_group_find_or_alloc(). noiommu device is determined in
> __vfio_register_dev() and result is stored in flag vfio_device->noiommu,
> the noiommu taint is added in the end of __vfio_register_dev().
> 
> This is also a preparation for compiling out vfio_group infrastructure
> as it makes the noiommu detection and taint common between the cdev path
> and group path though cdev path does not support noiommu.

Does this really still make sense?  The motivation for the change is
really not clear without cdev support for noiommu.  Thanks,

Alex
 
> Suggested-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>  drivers/vfio/group.c     | 15 ---------------
>  drivers/vfio/vfio_main.c | 31 ++++++++++++++++++++++++++++++-
>  include/linux/vfio.h     |  1 +
>  3 files changed, 31 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> index 653b62f93474..64cdd0ea8825 100644
> --- a/drivers/vfio/group.c
> +++ b/drivers/vfio/group.c
> @@ -668,21 +668,6 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
>  	struct vfio_group *group;
>  
>  	iommu_group = iommu_group_get(dev);
> -	if (!iommu_group && vfio_noiommu) {
> -		/*
> -		 * With noiommu enabled, create an IOMMU group for devices that
> -		 * don't already have one, implying no IOMMU hardware/driver
> -		 * exists.  Taint the kernel because we're about to give a DMA
> -		 * capable device to a user without IOMMU protection.
> -		 */
> -		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
> -		if (!IS_ERR(group)) {
> -			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> -			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
> -		}
> -		return group;
> -	}
> -
>  	if (!iommu_group)
>  		return ERR_PTR(-EINVAL);
>  
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 6d8f9b0f3637..00a699b9f76b 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -265,6 +265,18 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev,
>  	return ret;
>  }
>  
> +static int vfio_device_set_noiommu(struct vfio_device *device)
> +{
> +	struct iommu_group *iommu_group = iommu_group_get(device->dev);
> +
> +	if (!iommu_group && !vfio_noiommu)
> +		return -EINVAL;
> +
> +	device->noiommu = !iommu_group;
> +	iommu_group_put(iommu_group); /* Accepts NULL */
> +	return 0;
> +}
> +
>  static int __vfio_register_dev(struct vfio_device *device,
>  			       enum vfio_group_type type)
>  {
> @@ -277,6 +289,13 @@ static int __vfio_register_dev(struct vfio_device *device,
>  		     !device->ops->detach_ioas)))
>  		return -EINVAL;
>  
> +	/* Only physical devices can be noiommu device */
> +	if (type == VFIO_IOMMU) {
> +		ret = vfio_device_set_noiommu(device);
> +		if (ret)
> +			return ret;
> +	}
> +
>  	/*
>  	 * If the driver doesn't specify a set then the device is added to a
>  	 * singleton set just for itself.
> @@ -288,7 +307,8 @@ static int __vfio_register_dev(struct vfio_device *device,
>  	if (ret)
>  		return ret;
>  
> -	ret = vfio_device_set_group(device, type);
> +	ret = vfio_device_set_group(device,
> +				    device->noiommu ? VFIO_NO_IOMMU : type);
>  	if (ret)
>  		return ret;
>  
> @@ -301,6 +321,15 @@ static int __vfio_register_dev(struct vfio_device *device,
>  
>  	vfio_device_group_register(device);
>  
> +	if (device->noiommu) {
> +		/*
> +		 * noiommu deivces have no IOMMU hardware/driver.  Taint the
> +		 * kernel because we're about to give a DMA capable device to
> +		 * a user without IOMMU protection.
> +		 */
> +		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> +		dev_warn(device->dev, "Adding kernel taint for vfio-noiommu on device\n");
> +	}
>  	return 0;
>  err_out:
>  	vfio_device_remove_group(device);
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index e80a8ac86e46..183e620009e7 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -67,6 +67,7 @@ struct vfio_device {
>  	bool iommufd_attached;
>  #endif
>  	bool cdev_opened:1;
> +	bool noiommu:1;
>  };
>  
>  /**
Yi Liu June 13, 2023, 5:53 a.m. UTC | #2
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Tuesday, June 13, 2023 6:42 AM
> 
> On Fri,  2 Jun 2023 05:16:50 -0700
> Yi Liu <yi.l.liu@intel.com> wrote:
> 
> > This moves the noiommu device determination and noiommu taint out of
> > vfio_group_find_or_alloc(). noiommu device is determined in
> > __vfio_register_dev() and result is stored in flag vfio_device->noiommu,
> > the noiommu taint is added in the end of __vfio_register_dev().
> >
> > This is also a preparation for compiling out vfio_group infrastructure
> > as it makes the noiommu detection and taint common between the cdev path
> > and group path though cdev path does not support noiommu.
> 
> Does this really still make sense?  The motivation for the change is
> really not clear without cdev support for noiommu.  Thanks,

I think it still makes sense. When CONFIG_VFIO_GROUP==n, the kernel
only supports cdev interface. If there is noiommu device, vfio should
fail the registration. So, the noiommu determination is still needed. But
I'd admit the taint might still be in the group code.

Regards,
Yi Liu

> Alex
> 
> > Suggested-by: Alex Williamson <alex.williamson@redhat.com>
> > Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> > ---
> >  drivers/vfio/group.c     | 15 ---------------
> >  drivers/vfio/vfio_main.c | 31 ++++++++++++++++++++++++++++++-
> >  include/linux/vfio.h     |  1 +
> >  3 files changed, 31 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> > index 653b62f93474..64cdd0ea8825 100644
> > --- a/drivers/vfio/group.c
> > +++ b/drivers/vfio/group.c
> > @@ -668,21 +668,6 @@ static struct vfio_group *vfio_group_find_or_alloc(struct
> device *dev)
> >  	struct vfio_group *group;
> >
> >  	iommu_group = iommu_group_get(dev);
> > -	if (!iommu_group && vfio_noiommu) {
> > -		/*
> > -		 * With noiommu enabled, create an IOMMU group for devices that
> > -		 * don't already have one, implying no IOMMU hardware/driver
> > -		 * exists.  Taint the kernel because we're about to give a DMA
> > -		 * capable device to a user without IOMMU protection.
> > -		 */
> > -		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
> > -		if (!IS_ERR(group)) {
> > -			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > -			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on
> device\n");
> > -		}
> > -		return group;
> > -	}
> > -
> >  	if (!iommu_group)
> >  		return ERR_PTR(-EINVAL);
> >
> > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> > index 6d8f9b0f3637..00a699b9f76b 100644
> > --- a/drivers/vfio/vfio_main.c
> > +++ b/drivers/vfio/vfio_main.c
> > @@ -265,6 +265,18 @@ static int vfio_init_device(struct vfio_device *device, struct
> device *dev,
> >  	return ret;
> >  }
> >
> > +static int vfio_device_set_noiommu(struct vfio_device *device)
> > +{
> > +	struct iommu_group *iommu_group = iommu_group_get(device->dev);
> > +
> > +	if (!iommu_group && !vfio_noiommu)
> > +		return -EINVAL;
> > +
> > +	device->noiommu = !iommu_group;
> > +	iommu_group_put(iommu_group); /* Accepts NULL */
> > +	return 0;
> > +}
> > +
> >  static int __vfio_register_dev(struct vfio_device *device,
> >  			       enum vfio_group_type type)
> >  {
> > @@ -277,6 +289,13 @@ static int __vfio_register_dev(struct vfio_device *device,
> >  		     !device->ops->detach_ioas)))
> >  		return -EINVAL;
> >
> > +	/* Only physical devices can be noiommu device */
> > +	if (type == VFIO_IOMMU) {
> > +		ret = vfio_device_set_noiommu(device);
> > +		if (ret)
> > +			return ret;
> > +	}
> > +
> >  	/*
> >  	 * If the driver doesn't specify a set then the device is added to a
> >  	 * singleton set just for itself.
> > @@ -288,7 +307,8 @@ static int __vfio_register_dev(struct vfio_device *device,
> >  	if (ret)
> >  		return ret;
> >
> > -	ret = vfio_device_set_group(device, type);
> > +	ret = vfio_device_set_group(device,
> > +				    device->noiommu ? VFIO_NO_IOMMU : type);
> >  	if (ret)
> >  		return ret;
> >
> > @@ -301,6 +321,15 @@ static int __vfio_register_dev(struct vfio_device *device,
> >
> >  	vfio_device_group_register(device);
> >
> > +	if (device->noiommu) {
> > +		/*
> > +		 * noiommu deivces have no IOMMU hardware/driver.  Taint the
> > +		 * kernel because we're about to give a DMA capable device to
> > +		 * a user without IOMMU protection.
> > +		 */
> > +		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > +		dev_warn(device->dev, "Adding kernel taint for vfio-noiommu on
> device\n");
> > +	}
> >  	return 0;
> >  err_out:
> >  	vfio_device_remove_group(device);
> > diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> > index e80a8ac86e46..183e620009e7 100644
> > --- a/include/linux/vfio.h
> > +++ b/include/linux/vfio.h
> > @@ -67,6 +67,7 @@ struct vfio_device {
> >  	bool iommufd_attached;
> >  #endif
> >  	bool cdev_opened:1;
> > +	bool noiommu:1;
> >  };
> >
> >  /**
Alex Williamson June 13, 2023, 2:19 p.m. UTC | #3
On Tue, 13 Jun 2023 05:53:42 +0000
"Liu, Yi L" <yi.l.liu@intel.com> wrote:

> > From: Alex Williamson <alex.williamson@redhat.com>
> > Sent: Tuesday, June 13, 2023 6:42 AM
> > 
> > On Fri,  2 Jun 2023 05:16:50 -0700
> > Yi Liu <yi.l.liu@intel.com> wrote:
> >   
> > > This moves the noiommu device determination and noiommu taint out of
> > > vfio_group_find_or_alloc(). noiommu device is determined in
> > > __vfio_register_dev() and result is stored in flag vfio_device->noiommu,
> > > the noiommu taint is added in the end of __vfio_register_dev().
> > >
> > > This is also a preparation for compiling out vfio_group infrastructure
> > > as it makes the noiommu detection and taint common between the cdev path
> > > and group path though cdev path does not support noiommu.  
> > 
> > Does this really still make sense?  The motivation for the change is
> > really not clear without cdev support for noiommu.  Thanks,  
> 
> I think it still makes sense. When CONFIG_VFIO_GROUP==n, the kernel
> only supports cdev interface. If there is noiommu device, vfio should
> fail the registration. So, the noiommu determination is still needed. But
> I'd admit the taint might still be in the group code.

How is there going to be a noiommu device when VFIO_GROUP is unset?
Thanks,

Alex


> > > Suggested-by: Alex Williamson <alex.williamson@redhat.com>
> > > Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> > > ---
> > >  drivers/vfio/group.c     | 15 ---------------
> > >  drivers/vfio/vfio_main.c | 31 ++++++++++++++++++++++++++++++-
> > >  include/linux/vfio.h     |  1 +
> > >  3 files changed, 31 insertions(+), 16 deletions(-)
> > >
> > > diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> > > index 653b62f93474..64cdd0ea8825 100644
> > > --- a/drivers/vfio/group.c
> > > +++ b/drivers/vfio/group.c
> > > @@ -668,21 +668,6 @@ static struct vfio_group *vfio_group_find_or_alloc(struct  
> > device *dev)  
> > >  	struct vfio_group *group;
> > >
> > >  	iommu_group = iommu_group_get(dev);
> > > -	if (!iommu_group && vfio_noiommu) {
> > > -		/*
> > > -		 * With noiommu enabled, create an IOMMU group for devices that
> > > -		 * don't already have one, implying no IOMMU hardware/driver
> > > -		 * exists.  Taint the kernel because we're about to give a DMA
> > > -		 * capable device to a user without IOMMU protection.
> > > -		 */
> > > -		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
> > > -		if (!IS_ERR(group)) {
> > > -			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > -			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on  
> > device\n");  
> > > -		}
> > > -		return group;
> > > -	}
> > > -
> > >  	if (!iommu_group)
> > >  		return ERR_PTR(-EINVAL);
> > >
> > > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> > > index 6d8f9b0f3637..00a699b9f76b 100644
> > > --- a/drivers/vfio/vfio_main.c
> > > +++ b/drivers/vfio/vfio_main.c
> > > @@ -265,6 +265,18 @@ static int vfio_init_device(struct vfio_device *device, struct  
> > device *dev,  
> > >  	return ret;
> > >  }
> > >
> > > +static int vfio_device_set_noiommu(struct vfio_device *device)
> > > +{
> > > +	struct iommu_group *iommu_group = iommu_group_get(device->dev);
> > > +
> > > +	if (!iommu_group && !vfio_noiommu)
> > > +		return -EINVAL;
> > > +
> > > +	device->noiommu = !iommu_group;
> > > +	iommu_group_put(iommu_group); /* Accepts NULL */
> > > +	return 0;
> > > +}
> > > +
> > >  static int __vfio_register_dev(struct vfio_device *device,
> > >  			       enum vfio_group_type type)
> > >  {
> > > @@ -277,6 +289,13 @@ static int __vfio_register_dev(struct vfio_device *device,
> > >  		     !device->ops->detach_ioas)))
> > >  		return -EINVAL;
> > >
> > > +	/* Only physical devices can be noiommu device */
> > > +	if (type == VFIO_IOMMU) {
> > > +		ret = vfio_device_set_noiommu(device);
> > > +		if (ret)
> > > +			return ret;
> > > +	}
> > > +
> > >  	/*
> > >  	 * If the driver doesn't specify a set then the device is added to a
> > >  	 * singleton set just for itself.
> > > @@ -288,7 +307,8 @@ static int __vfio_register_dev(struct vfio_device *device,
> > >  	if (ret)
> > >  		return ret;
> > >
> > > -	ret = vfio_device_set_group(device, type);
> > > +	ret = vfio_device_set_group(device,
> > > +				    device->noiommu ? VFIO_NO_IOMMU : type);
> > >  	if (ret)
> > >  		return ret;
> > >
> > > @@ -301,6 +321,15 @@ static int __vfio_register_dev(struct vfio_device *device,
> > >
> > >  	vfio_device_group_register(device);
> > >
> > > +	if (device->noiommu) {
> > > +		/*
> > > +		 * noiommu deivces have no IOMMU hardware/driver.  Taint the
> > > +		 * kernel because we're about to give a DMA capable device to
> > > +		 * a user without IOMMU protection.
> > > +		 */
> > > +		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > +		dev_warn(device->dev, "Adding kernel taint for vfio-noiommu on  
> > device\n");  
> > > +	}
> > >  	return 0;
> > >  err_out:
> > >  	vfio_device_remove_group(device);
> > > diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> > > index e80a8ac86e46..183e620009e7 100644
> > > --- a/include/linux/vfio.h
> > > +++ b/include/linux/vfio.h
> > > @@ -67,6 +67,7 @@ struct vfio_device {
> > >  	bool iommufd_attached;
> > >  #endif
> > >  	bool cdev_opened:1;
> > > +	bool noiommu:1;
> > >  };
> > >
> > >  /**  
>
Yi Liu June 13, 2023, 2:33 p.m. UTC | #4
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Tuesday, June 13, 2023 10:19 PM
> 
> On Tue, 13 Jun 2023 05:53:42 +0000
> "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> 
> > > From: Alex Williamson <alex.williamson@redhat.com>
> > > Sent: Tuesday, June 13, 2023 6:42 AM
> > >
> > > On Fri,  2 Jun 2023 05:16:50 -0700
> > > Yi Liu <yi.l.liu@intel.com> wrote:
> > >
> > > > This moves the noiommu device determination and noiommu taint out of
> > > > vfio_group_find_or_alloc(). noiommu device is determined in
> > > > __vfio_register_dev() and result is stored in flag vfio_device->noiommu,
> > > > the noiommu taint is added in the end of __vfio_register_dev().
> > > >
> > > > This is also a preparation for compiling out vfio_group infrastructure
> > > > as it makes the noiommu detection and taint common between the cdev path
> > > > and group path though cdev path does not support noiommu.
> > >
> > > Does this really still make sense?  The motivation for the change is
> > > really not clear without cdev support for noiommu.  Thanks,
> >
> > I think it still makes sense. When CONFIG_VFIO_GROUP==n, the kernel
> > only supports cdev interface. If there is noiommu device, vfio should
> > fail the registration. So, the noiommu determination is still needed. But
> > I'd admit the taint might still be in the group code.
> 
> How is there going to be a noiommu device when VFIO_GROUP is unset?

How about booting a kernel with iommu disabled, then all the devices
are not protected by iommu. I suppose they are noiommu devices. If
user wants to bound them to vfio, the kernel should have VFIO_GROUP.
Otherwise, needs to fail.

Regards,
Yi Liu

> Thanks,
> 
> Alex
> 
> 
> > > > Suggested-by: Alex Williamson <alex.williamson@redhat.com>
> > > > Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> > > > ---
> > > >  drivers/vfio/group.c     | 15 ---------------
> > > >  drivers/vfio/vfio_main.c | 31 ++++++++++++++++++++++++++++++-
> > > >  include/linux/vfio.h     |  1 +
> > > >  3 files changed, 31 insertions(+), 16 deletions(-)
> > > >
> > > > diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> > > > index 653b62f93474..64cdd0ea8825 100644
> > > > --- a/drivers/vfio/group.c
> > > > +++ b/drivers/vfio/group.c
> > > > @@ -668,21 +668,6 @@ static struct vfio_group *vfio_group_find_or_alloc(struct
> > > device *dev)
> > > >  	struct vfio_group *group;
> > > >
> > > >  	iommu_group = iommu_group_get(dev);
> > > > -	if (!iommu_group && vfio_noiommu) {
> > > > -		/*
> > > > -		 * With noiommu enabled, create an IOMMU group for devices that
> > > > -		 * don't already have one, implying no IOMMU hardware/driver
> > > > -		 * exists.  Taint the kernel because we're about to give a DMA
> > > > -		 * capable device to a user without IOMMU protection.
> > > > -		 */
> > > > -		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
> > > > -		if (!IS_ERR(group)) {
> > > > -			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > -			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on
> > > device\n");
> > > > -		}
> > > > -		return group;
> > > > -	}
> > > > -
> > > >  	if (!iommu_group)
> > > >  		return ERR_PTR(-EINVAL);
> > > >
> > > > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> > > > index 6d8f9b0f3637..00a699b9f76b 100644
> > > > --- a/drivers/vfio/vfio_main.c
> > > > +++ b/drivers/vfio/vfio_main.c
> > > > @@ -265,6 +265,18 @@ static int vfio_init_device(struct vfio_device *device,
> struct
> > > device *dev,
> > > >  	return ret;
> > > >  }
> > > >
> > > > +static int vfio_device_set_noiommu(struct vfio_device *device)
> > > > +{
> > > > +	struct iommu_group *iommu_group = iommu_group_get(device->dev);
> > > > +
> > > > +	if (!iommu_group && !vfio_noiommu)
> > > > +		return -EINVAL;
> > > > +
> > > > +	device->noiommu = !iommu_group;
> > > > +	iommu_group_put(iommu_group); /* Accepts NULL */
> > > > +	return 0;
> > > > +}
> > > > +
> > > >  static int __vfio_register_dev(struct vfio_device *device,
> > > >  			       enum vfio_group_type type)
> > > >  {
> > > > @@ -277,6 +289,13 @@ static int __vfio_register_dev(struct vfio_device *device,
> > > >  		     !device->ops->detach_ioas)))
> > > >  		return -EINVAL;
> > > >
> > > > +	/* Only physical devices can be noiommu device */
> > > > +	if (type == VFIO_IOMMU) {
> > > > +		ret = vfio_device_set_noiommu(device);
> > > > +		if (ret)
> > > > +			return ret;
> > > > +	}
> > > > +
> > > >  	/*
> > > >  	 * If the driver doesn't specify a set then the device is added to a
> > > >  	 * singleton set just for itself.
> > > > @@ -288,7 +307,8 @@ static int __vfio_register_dev(struct vfio_device *device,
> > > >  	if (ret)
> > > >  		return ret;
> > > >
> > > > -	ret = vfio_device_set_group(device, type);
> > > > +	ret = vfio_device_set_group(device,
> > > > +				    device->noiommu ? VFIO_NO_IOMMU : type);
> > > >  	if (ret)
> > > >  		return ret;
> > > >
> > > > @@ -301,6 +321,15 @@ static int __vfio_register_dev(struct vfio_device *device,
> > > >
> > > >  	vfio_device_group_register(device);
> > > >
> > > > +	if (device->noiommu) {
> > > > +		/*
> > > > +		 * noiommu deivces have no IOMMU hardware/driver.  Taint the
> > > > +		 * kernel because we're about to give a DMA capable device to
> > > > +		 * a user without IOMMU protection.
> > > > +		 */
> > > > +		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > +		dev_warn(device->dev, "Adding kernel taint for vfio-noiommu on
> > > device\n");
> > > > +	}
> > > >  	return 0;
> > > >  err_out:
> > > >  	vfio_device_remove_group(device);
> > > > diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> > > > index e80a8ac86e46..183e620009e7 100644
> > > > --- a/include/linux/vfio.h
> > > > +++ b/include/linux/vfio.h
> > > > @@ -67,6 +67,7 @@ struct vfio_device {
> > > >  	bool iommufd_attached;
> > > >  #endif
> > > >  	bool cdev_opened:1;
> > > > +	bool noiommu:1;
> > > >  };
> > > >
> > > >  /**
> >
Alex Williamson June 13, 2023, 2:48 p.m. UTC | #5
On Tue, 13 Jun 2023 14:33:01 +0000
"Liu, Yi L" <yi.l.liu@intel.com> wrote:

> > From: Alex Williamson <alex.williamson@redhat.com>
> > Sent: Tuesday, June 13, 2023 10:19 PM
> > 
> > On Tue, 13 Jun 2023 05:53:42 +0000
> > "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> >   
> > > > From: Alex Williamson <alex.williamson@redhat.com>
> > > > Sent: Tuesday, June 13, 2023 6:42 AM
> > > >
> > > > On Fri,  2 Jun 2023 05:16:50 -0700
> > > > Yi Liu <yi.l.liu@intel.com> wrote:
> > > >  
> > > > > This moves the noiommu device determination and noiommu taint out of
> > > > > vfio_group_find_or_alloc(). noiommu device is determined in
> > > > > __vfio_register_dev() and result is stored in flag vfio_device->noiommu,
> > > > > the noiommu taint is added in the end of __vfio_register_dev().
> > > > >
> > > > > This is also a preparation for compiling out vfio_group infrastructure
> > > > > as it makes the noiommu detection and taint common between the cdev path
> > > > > and group path though cdev path does not support noiommu.  
> > > >
> > > > Does this really still make sense?  The motivation for the change is
> > > > really not clear without cdev support for noiommu.  Thanks,  
> > >
> > > I think it still makes sense. When CONFIG_VFIO_GROUP==n, the kernel
> > > only supports cdev interface. If there is noiommu device, vfio should
> > > fail the registration. So, the noiommu determination is still needed. But
> > > I'd admit the taint might still be in the group code.  
> > 
> > How is there going to be a noiommu device when VFIO_GROUP is unset?  
> 
> How about booting a kernel with iommu disabled, then all the devices
> are not protected by iommu. I suppose they are noiommu devices. If
> user wants to bound them to vfio, the kernel should have VFIO_GROUP.
> Otherwise, needs to fail.

"noiommu" is a vfio designation of a device, it must be created by
vfio.  There can certainly be devices which are not IOMMU backed, but
without vfio designating them as noiommu devices, which is only done
via the legacy and compat paths, there's no such thing as a noiommu
device.  Devices without an IOMMU are simply out of scope for cdev,
there should never be a vfio cdev entry created for them.  Thanks,

Alex
Yi Liu June 13, 2023, 3:01 p.m. UTC | #6
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Tuesday, June 13, 2023 10:48 PM
> 
> On Tue, 13 Jun 2023 14:33:01 +0000
> "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> 
> > > From: Alex Williamson <alex.williamson@redhat.com>
> > > Sent: Tuesday, June 13, 2023 10:19 PM
> > >
> > > On Tue, 13 Jun 2023 05:53:42 +0000
> > > "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> > >
> > > > > From: Alex Williamson <alex.williamson@redhat.com>
> > > > > Sent: Tuesday, June 13, 2023 6:42 AM
> > > > >
> > > > > On Fri,  2 Jun 2023 05:16:50 -0700
> > > > > Yi Liu <yi.l.liu@intel.com> wrote:
> > > > >
> > > > > > This moves the noiommu device determination and noiommu taint out of
> > > > > > vfio_group_find_or_alloc(). noiommu device is determined in
> > > > > > __vfio_register_dev() and result is stored in flag vfio_device->noiommu,
> > > > > > the noiommu taint is added in the end of __vfio_register_dev().
> > > > > >
> > > > > > This is also a preparation for compiling out vfio_group infrastructure
> > > > > > as it makes the noiommu detection and taint common between the cdev path
> > > > > > and group path though cdev path does not support noiommu.
> > > > >
> > > > > Does this really still make sense?  The motivation for the change is
> > > > > really not clear without cdev support for noiommu.  Thanks,
> > > >
> > > > I think it still makes sense. When CONFIG_VFIO_GROUP==n, the kernel
> > > > only supports cdev interface. If there is noiommu device, vfio should
> > > > fail the registration. So, the noiommu determination is still needed. But
> > > > I'd admit the taint might still be in the group code.
> > >
> > > How is there going to be a noiommu device when VFIO_GROUP is unset?
> >
> > How about booting a kernel with iommu disabled, then all the devices
> > are not protected by iommu. I suppose they are noiommu devices. If
> > user wants to bound them to vfio, the kernel should have VFIO_GROUP.
> > Otherwise, needs to fail.
> 
> "noiommu" is a vfio designation of a device, it must be created by
> vfio.  

Sure.

> There can certainly be devices which are not IOMMU backed, but
> without vfio designating them as noiommu devices, which is only done
> via the legacy and compat paths, there's no such thing as a noiommu
> device. 

Yes.

> Devices without an IOMMU are simply out of scope for cdev,
> there should never be a vfio cdev entry created for them.  Thanks,

Actually, this is what I want to solve. I need to check if a device is
IOMMU backed or not, and based on this info to prevent creating
cdev entry for them in the coming cdev support or may need to
fail registration if VFIO_GROUP is unset.

If this patch is not good. I can use the vfio_device_is_noiommu()
written like below when VFIO_GROUP is unset. What about your
opinion?

static inline bool vfio_device_is_noiommu(struct vfio_device *vdev)
{
	struct iommu_group *iommu_group;

	iommu_group = iommu_group_get(vdev->dev);
	iommu_group_put(iommu_group); /* Accepts NULL */
	return !iommu_group;
}

Regards,
Yi Liu
Alex Williamson June 13, 2023, 3:13 p.m. UTC | #7
On Tue, 13 Jun 2023 15:01:35 +0000
"Liu, Yi L" <yi.l.liu@intel.com> wrote:

> > From: Alex Williamson <alex.williamson@redhat.com>
> > Sent: Tuesday, June 13, 2023 10:48 PM
> > 
> > On Tue, 13 Jun 2023 14:33:01 +0000
> > "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> >   
> > > > From: Alex Williamson <alex.williamson@redhat.com>
> > > > Sent: Tuesday, June 13, 2023 10:19 PM
> > > >
> > > > On Tue, 13 Jun 2023 05:53:42 +0000
> > > > "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> > > >  
> > > > > > From: Alex Williamson <alex.williamson@redhat.com>
> > > > > > Sent: Tuesday, June 13, 2023 6:42 AM
> > > > > >
> > > > > > On Fri,  2 Jun 2023 05:16:50 -0700
> > > > > > Yi Liu <yi.l.liu@intel.com> wrote:
> > > > > >  
> > > > > > > This moves the noiommu device determination and noiommu taint out of
> > > > > > > vfio_group_find_or_alloc(). noiommu device is determined in
> > > > > > > __vfio_register_dev() and result is stored in flag vfio_device->noiommu,
> > > > > > > the noiommu taint is added in the end of __vfio_register_dev().
> > > > > > >
> > > > > > > This is also a preparation for compiling out vfio_group infrastructure
> > > > > > > as it makes the noiommu detection and taint common between the cdev path
> > > > > > > and group path though cdev path does not support noiommu.  
> > > > > >
> > > > > > Does this really still make sense?  The motivation for the change is
> > > > > > really not clear without cdev support for noiommu.  Thanks,  
> > > > >
> > > > > I think it still makes sense. When CONFIG_VFIO_GROUP==n, the kernel
> > > > > only supports cdev interface. If there is noiommu device, vfio should
> > > > > fail the registration. So, the noiommu determination is still needed. But
> > > > > I'd admit the taint might still be in the group code.  
> > > >
> > > > How is there going to be a noiommu device when VFIO_GROUP is unset?  
> > >
> > > How about booting a kernel with iommu disabled, then all the devices
> > > are not protected by iommu. I suppose they are noiommu devices. If
> > > user wants to bound them to vfio, the kernel should have VFIO_GROUP.
> > > Otherwise, needs to fail.  
> > 
> > "noiommu" is a vfio designation of a device, it must be created by
> > vfio.    
> 
> Sure.
> 
> > There can certainly be devices which are not IOMMU backed, but
> > without vfio designating them as noiommu devices, which is only done
> > via the legacy and compat paths, there's no such thing as a noiommu
> > device.   
> 
> Yes.
> 
> > Devices without an IOMMU are simply out of scope for cdev,
> > there should never be a vfio cdev entry created for them.  Thanks,  
> 
> Actually, this is what I want to solve. I need to check if a device is
> IOMMU backed or not, and based on this info to prevent creating
> cdev entry for them in the coming cdev support or may need to
> fail registration if VFIO_GROUP is unset.
> 
> If this patch is not good. I can use the vfio_device_is_noiommu()
> written like below when VFIO_GROUP is unset. What about your
> opinion?
> 
> static inline bool vfio_device_is_noiommu(struct vfio_device *vdev)
> {
> 	struct iommu_group *iommu_group;
> 
> 	iommu_group = iommu_group_get(vdev->dev);
> 	iommu_group_put(iommu_group); /* Accepts NULL */
> 	return !iommu_group;
> }


No, please do not confuse the issue.  As we agreed above "noiommu"
means a specific thing, it's a device without IOMMU backing that vfio
has artificially included in the environment.  If we don't have
VFIO_NOIOMMU then there's no such thing as a "noiommu" device.

You can certainly use an iommu_group test to decide if a device should
be represented, but there absolutely should never be a vfio_device
created without IOMMU backing and without VFIO_NOIOMMU.  Thanks,

Alex
Alex Williamson June 13, 2023, 5:15 p.m. UTC | #8
[Sorry for breaking threading, replying to my own message id with reply
 content from Yi since the Cc list got broken]

On Tue, 13 Jun 2023 15:28:06 +0000
"Liu, Yi L" <yi.l.liu@intel.com> wrote:

> > From: Alex Williamson <alex.williamson@redhat.com>
> > Sent: Tuesday, June 13, 2023 11:13 PM
> > 
> > On Tue, 13 Jun 2023 15:01:35 +0000
> > "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> >   
> > > > From: Alex Williamson <alex.williamson@redhat.com>
> > > > Sent: Tuesday, June 13, 2023 10:48 PM
> > > >
> > > > On Tue, 13 Jun 2023 14:33:01 +0000
> > > > "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> > > >  
> > > > > > From: Alex Williamson <alex.williamson@redhat.com>
> > > > > > Sent: Tuesday, June 13, 2023 10:19 PM
> > > > > >
> > > > > > On Tue, 13 Jun 2023 05:53:42 +0000
> > > > > > "Liu, Yi L" <yi.l.liu@intel.com> wrote:
> > > > > >  
> > > > > > > > From: Alex Williamson <alex.williamson@redhat.com>
> > > > > > > > Sent: Tuesday, June 13, 2023 6:42 AM
> > > > > > > >
> > > > > > > > On Fri,  2 Jun 2023 05:16:50 -0700
> > > > > > > > Yi Liu <yi.l.liu@intel.com> wrote:
> > > > > > > >  
> > > > > > > > > This moves the noiommu device determination and noiommu taint out of
> > > > > > > > > vfio_group_find_or_alloc(). noiommu device is determined in
> > > > > > > > > __vfio_register_dev() and result is stored in flag vfio_device->noiommu,
> > > > > > > > > the noiommu taint is added in the end of __vfio_register_dev().
> > > > > > > > >
> > > > > > > > > This is also a preparation for compiling out vfio_group infrastructure
> > > > > > > > > as it makes the noiommu detection and taint common between the cdev  
> > path  
> > > > > > > > > and group path though cdev path does not support noiommu.  
> > > > > > > >
> > > > > > > > Does this really still make sense?  The motivation for the change is
> > > > > > > > really not clear without cdev support for noiommu.  Thanks,  
> > > > > > >
> > > > > > > I think it still makes sense. When CONFIG_VFIO_GROUP==n, the kernel
> > > > > > > only supports cdev interface. If there is noiommu device, vfio should
> > > > > > > fail the registration. So, the noiommu determination is still needed. But
> > > > > > > I'd admit the taint might still be in the group code.  
> > > > > >
> > > > > > How is there going to be a noiommu device when VFIO_GROUP is unset?  
> > > > >
> > > > > How about booting a kernel with iommu disabled, then all the devices
> > > > > are not protected by iommu. I suppose they are noiommu devices. If
> > > > > user wants to bound them to vfio, the kernel should have VFIO_GROUP.
> > > > > Otherwise, needs to fail.  
> > > >
> > > > "noiommu" is a vfio designation of a device, it must be created by
> > > > vfio.  
> > >
> > > Sure.
> > >  
> > > > There can certainly be devices which are not IOMMU backed, but
> > > > without vfio designating them as noiommu devices, which is only done
> > > > via the legacy and compat paths, there's no such thing as a noiommu
> > > > device.  
> > >
> > > Yes.
> > >  
> > > > Devices without an IOMMU are simply out of scope for cdev,
> > > > there should never be a vfio cdev entry created for them.  Thanks,  
> > >
> > > Actually, this is what I want to solve. I need to check if a device is
> > > IOMMU backed or not, and based on this info to prevent creating
> > > cdev entry for them in the coming cdev support or may need to
> > > fail registration if VFIO_GROUP is unset.
> > >
> > > If this patch is not good. I can use the vfio_device_is_noiommu()
> > > written like below when VFIO_GROUP is unset. What about your
> > > opinion?
> > >
> > > static inline bool vfio_device_is_noiommu(struct vfio_device *vdev)
> > > {
> > > 	struct iommu_group *iommu_group;
> > >
> > > 	iommu_group = iommu_group_get(vdev->dev);
> > > 	iommu_group_put(iommu_group); /* Accepts NULL */
> > > 	return !iommu_group;
> > > }  
> > 
> > 
> > No, please do not confuse the issue.  As we agreed above "noiommu"
> > means a specific thing, it's a device without IOMMU backing that vfio
> > has artificially included in the environment.  If we don't have
> > VFIO_NOIOMMU then there's no such thing as a "noiommu" device.
> > 
> > You can certainly use an iommu_group test to decide if a device should
> > be represented, but there absolutely should never be a vfio_device
> > created without IOMMU backing and without VFIO_NOIOMMU.  Thanks,  
> 
> Hmmm. So your suggestion is to fail the vfio_alloc_device() if the input
> device is not IOMMU backed. right? But at this point, we don't know if
> the caller is trying to allocate vfio_device for a physical device or an
> emulated device. For emulated devices, cdev entry can always be created.
> Is it? I think the iommu_group test should be done for only physical devices
> in the register time.
> 
> Can I have an iommu_backed flag to store the iommu_group test result
> and check it when trying to create/remove cdev entry?

Ok, let me rephrase, the probe function needs to fail for a physical
(VFIO_IOMMU) device when VFIO_NO_IOIMMU is not configured and
vfio_noiommu is not enabled, there should never be a vfio group or cdev
device file created and the vfio_device should never be fully registered.
I overreacted a bit that we should never have a vfio_device at all, we
clearly need one leading up to determining if we can proceed.

If we renamed your function above to vfio_device_has_iommu_group(),
couldn't we just wrap device_add like below instead to not have cdev
setup for a noiommu device, generate an error for a physical device w/o
IOMMU backing, and otherwise setup the cdev device?

static inline int vfio_device_add(struct vfio_device *device, enum vfio_group_type type)
{
#if IS_ENABLED(CONFIG_VFIO_GROUP)
	if (device->group->type == VFIO_NO_IOMMU)
		return device_add(&device->device);
#else
	if (type == VFIO_IOMMU && !vfio_device_has_iommu_group(device))
		return -EINVAL;
#endif
	vfio_init_device_cdev(device);
	return cdev_device_add(&device->cdev, &device->device);
}

static inline void vfio_device_del(struct vfio_device *device)
{
#if IS_ENABLED(CONFIG_VFIO_GROUP)
	if (device->group->type == VFIO_NO_IOMMU)
		return device_del(&device->device);
#endif
	cdev_device_del(&device->cdev, &device->device);
}

I think this is the only extent to which noiommu needs to be a factor
here, skip cdev setup for a noiommu device.  Thanks,

Alex
Jason Gunthorpe June 13, 2023, 5:35 p.m. UTC | #9
On Tue, Jun 13, 2023 at 11:15:11AM -0600, Alex Williamson wrote:
> [Sorry for breaking threading, replying to my own message id with reply
>  content from Yi since the Cc list got broken]

Yikes it is really busted, I think I fixed it?

> If we renamed your function above to vfio_device_has_iommu_group(),
> couldn't we just wrap device_add like below instead to not have cdev
> setup for a noiommu device, generate an error for a physical device w/o
> IOMMU backing, and otherwise setup the cdev device?
> 
> static inline int vfio_device_add(struct vfio_device *device, enum vfio_group_type type)
> {
> #if IS_ENABLED(CONFIG_VFIO_GROUP)
> 	if (device->group->type == VFIO_NO_IOMMU)
> 		return device_add(&device->device);

vfio_device_is_noiommu() embeds the IS_ENABLED

> #else
> 	if (type == VFIO_IOMMU && !vfio_device_has_iommu_group(device))
> 		return -EINVAL;
> #endif

The require test is this from the group code:

 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {

We could lift it out of the group code and call it from vfio_main.c like:

if (type == VFIO_IOMMU && !vfio_device_is_noiommu(vdev) && !device_iommu_capable(dev,
     IOMMU_CAP_CACHE_COHERENCY))
   FAIL

Jason
Alex Williamson June 13, 2023, 8:10 p.m. UTC | #10
On Tue, 13 Jun 2023 14:35:09 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> On Tue, Jun 13, 2023 at 11:15:11AM -0600, Alex Williamson wrote:
> > [Sorry for breaking threading, replying to my own message id with reply
> >  content from Yi since the Cc list got broken]  
> 
> Yikes it is really busted, I think I fixed it?
> 
> > If we renamed your function above to vfio_device_has_iommu_group(),
> > couldn't we just wrap device_add like below instead to not have cdev
> > setup for a noiommu device, generate an error for a physical device w/o
> > IOMMU backing, and otherwise setup the cdev device?
> > 
> > static inline int vfio_device_add(struct vfio_device *device, enum vfio_group_type type)
> > {
> > #if IS_ENABLED(CONFIG_VFIO_GROUP)
> > 	if (device->group->type == VFIO_NO_IOMMU)
> > 		return device_add(&device->device);  
> 
> vfio_device_is_noiommu() embeds the IS_ENABLED

But patch 23/ makes the definition of struct vfio_group conditional on
CONFIG_VFIO_GROUP, so while CONFIG_VFIO_NOIOMMU depends on
CONFIG_VFIO_GROUP and the result could be determined, I think the
compiler is still unhappy about the undefined reference.  We'd need a
!CONFIG_VFIO_GROUP stub for the function.

> > #else
> > 	if (type == VFIO_IOMMU && !vfio_device_has_iommu_group(device))
> > 		return -EINVAL;
> > #endif  
> 
> The require test is this from the group code:
> 
>  	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
> 
> We could lift it out of the group code and call it from vfio_main.c like:
> 
> if (type == VFIO_IOMMU && !vfio_device_is_noiommu(vdev) && !device_iommu_capable(dev,
>      IOMMU_CAP_CACHE_COHERENCY))
>    FAIL

Ack.  Thanks,

Alex
Yi Liu June 14, 2023, 3:24 a.m. UTC | #11
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Wednesday, June 14, 2023 4:11 AM
> 
> On Tue, 13 Jun 2023 14:35:09 -0300
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> > On Tue, Jun 13, 2023 at 11:15:11AM -0600, Alex Williamson wrote:
> > > [Sorry for breaking threading, replying to my own message id with reply
> > >  content from Yi since the Cc list got broken]
> >
> > Yikes it is really busted, I think I fixed it?
> >
> > > If we renamed your function above to vfio_device_has_iommu_group(),
> > > couldn't we just wrap device_add like below instead to not have cdev
> > > setup for a noiommu device, generate an error for a physical device w/o
> > > IOMMU backing, and otherwise setup the cdev device?
> > >
> > > static inline int vfio_device_add(struct vfio_device *device, enum vfio_group_type
> type)
> > > {
> > > #if IS_ENABLED(CONFIG_VFIO_GROUP)
> > > 	if (device->group->type == VFIO_NO_IOMMU)
> > > 		return device_add(&device->device);
> >
> > vfio_device_is_noiommu() embeds the IS_ENABLED
> 
> But patch 23/ makes the definition of struct vfio_group conditional on
> CONFIG_VFIO_GROUP, so while CONFIG_VFIO_NOIOMMU depends on
> CONFIG_VFIO_GROUP and the result could be determined, I think the
> compiler is still unhappy about the undefined reference.  We'd need a
> !CONFIG_VFIO_GROUP stub for the function.
> 
> > > #else
> > > 	if (type == VFIO_IOMMU && !vfio_device_has_iommu_group(device))
> > > 		return -EINVAL;
> > > #endif
> >
> > The require test is this from the group code:
> >
> >  	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
> >
> > We could lift it out of the group code and call it from vfio_main.c like:
> >
> > if (type == VFIO_IOMMU && !vfio_device_is_noiommu(vdev)
> && !device_iommu_capable(dev,
> >      IOMMU_CAP_CACHE_COHERENCY))
> >    FAIL
> 
> Ack.  Thanks,

So, what I got is:

1) Add bellow check in __vfio_register_dev() to fail the physical devices that
    don't have IOMMU protection.

	/*
	  * noiommu device is a special type supported by the group interface.
	  * Such type represents the physical devices  that are not iommu backed.
	  */
	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device)) &&
	    !vfio_device_has_iommu_group(device))
		return -EINVAL; //or maybe -EOPNOTSUPP?

Nit: require a vfio_device_is_noiommu() stub which returns false for
the VFIO_GROUP unset case.

2) Have below functions to add device

#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV)
static inline int vfio_device_add(struct vfio_device *device)
{
	if (vfio_device_is_noiommu(device))
		return device_add(&device->device);
	vfio_init_device_cdev(device);
	return cdev_device_add(&device->cdev, &device->device);
}

static inline void vfio_device_del(struct vfio_device *device)
{
	if (vfio_device_is_noiommu(device))
		return device_del(&device->device);
	cdev_device_del(&device->cdev, &device->device);
}
#else
blabla
#endif

Regards,
Yi Liu
Tian, Kevin June 14, 2023, 5:42 a.m. UTC | #12
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Wednesday, June 14, 2023 11:24 AM
> 
> > From: Alex Williamson <alex.williamson@redhat.com>
> > Sent: Wednesday, June 14, 2023 4:11 AM
> >
> > On Tue, 13 Jun 2023 14:35:09 -0300
> > Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > > On Tue, Jun 13, 2023 at 11:15:11AM -0600, Alex Williamson wrote:
> > > > [Sorry for breaking threading, replying to my own message id with reply
> > > >  content from Yi since the Cc list got broken]
> > >
> > > Yikes it is really busted, I think I fixed it?
> > >
> > > > If we renamed your function above to vfio_device_has_iommu_group(),
> > > > couldn't we just wrap device_add like below instead to not have cdev
> > > > setup for a noiommu device, generate an error for a physical device
> w/o
> > > > IOMMU backing, and otherwise setup the cdev device?
> > > >
> > > > static inline int vfio_device_add(struct vfio_device *device, enum
> vfio_group_type
> > type)
> > > > {
> > > > #if IS_ENABLED(CONFIG_VFIO_GROUP)
> > > > 	if (device->group->type == VFIO_NO_IOMMU)
> > > > 		return device_add(&device->device);
> > >
> > > vfio_device_is_noiommu() embeds the IS_ENABLED
> >
> > But patch 23/ makes the definition of struct vfio_group conditional on
> > CONFIG_VFIO_GROUP, so while CONFIG_VFIO_NOIOMMU depends on
> > CONFIG_VFIO_GROUP and the result could be determined, I think the
> > compiler is still unhappy about the undefined reference.  We'd need a
> > !CONFIG_VFIO_GROUP stub for the function.
> >
> > > > #else
> > > > 	if (type == VFIO_IOMMU && !vfio_device_has_iommu_group(device))
> > > > 		return -EINVAL;
> > > > #endif
> > >
> > > The require test is this from the group code:
> > >
> > >  	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
> {
> > >
> > > We could lift it out of the group code and call it from vfio_main.c like:
> > >
> > > if (type == VFIO_IOMMU && !vfio_device_is_noiommu(vdev)
> > && !device_iommu_capable(dev,
> > >      IOMMU_CAP_CACHE_COHERENCY))
> > >    FAIL
> >
> > Ack.  Thanks,
> 
> So, what I got is:
> 
> 1) Add bellow check in __vfio_register_dev() to fail the physical devices that
>     don't have IOMMU protection.
> 
> 	/*
> 	  * noiommu device is a special type supported by the group interface.
> 	  * Such type represents the physical devices  that are not iommu
> backed.
> 	  */
> 	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device)) &&
> 	    !vfio_device_has_iommu_group(device))
> 		return -EINVAL; //or maybe -EOPNOTSUPP?
> 
> Nit: require a vfio_device_is_noiommu() stub which returns false for
> the VFIO_GROUP unset case.

device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY) is valid
only for cases with iommu groups. So that check already  covers the
group verification indirectly.

With that I think Jason's suggestion is to lift that test into main.c:

int vfio_register_group_dev(struct vfio_device *device)
{
	/*
	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
	 * restore cache coherency. It has to be checked here because it is only
	 * valid for cases where we are using iommu groups.
	 */
	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
	    !device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
		return ERR_PTR(-EINVAL);

	return __vfio_register_dev(device, VFIO_IOMMU);
}

> 
> 2) Have below functions to add device
> 
> #if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV)
> static inline int vfio_device_add(struct vfio_device *device)
> {
> 	if (vfio_device_is_noiommu(device))
> 		return device_add(&device->device);
> 	vfio_init_device_cdev(device);
> 	return cdev_device_add(&device->cdev, &device->device);
> }
> 
> static inline void vfio_device_del(struct vfio_device *device)
> {
> 	if (vfio_device_is_noiommu(device))
> 		return device_del(&device->device);
> 	cdev_device_del(&device->cdev, &device->device);
> }

Correct
Yi Liu June 14, 2023, 6:14 a.m. UTC | #13
> From: Tian, Kevin <kevin.tian@intel.com>
> Sent: Wednesday, June 14, 2023 1:42 PM
> 
> > From: Liu, Yi L <yi.l.liu@intel.com>
> > Sent: Wednesday, June 14, 2023 11:24 AM
> >
> > > From: Alex Williamson <alex.williamson@redhat.com>
> > > Sent: Wednesday, June 14, 2023 4:11 AM
> > >
> > > On Tue, 13 Jun 2023 14:35:09 -0300
> > > Jason Gunthorpe <jgg@nvidia.com> wrote:
> > >
> > > > On Tue, Jun 13, 2023 at 11:15:11AM -0600, Alex Williamson wrote:
> > > > > [Sorry for breaking threading, replying to my own message id with reply
> > > > >  content from Yi since the Cc list got broken]
> > > >
> > > > Yikes it is really busted, I think I fixed it?
> > > >
> > > > > If we renamed your function above to vfio_device_has_iommu_group(),
> > > > > couldn't we just wrap device_add like below instead to not have cdev
> > > > > setup for a noiommu device, generate an error for a physical device
> > w/o
> > > > > IOMMU backing, and otherwise setup the cdev device?
> > > > >
> > > > > static inline int vfio_device_add(struct vfio_device *device, enum
> > vfio_group_type
> > > type)
> > > > > {
> > > > > #if IS_ENABLED(CONFIG_VFIO_GROUP)
> > > > > 	if (device->group->type == VFIO_NO_IOMMU)
> > > > > 		return device_add(&device->device);
> > > >
> > > > vfio_device_is_noiommu() embeds the IS_ENABLED
> > >
> > > But patch 23/ makes the definition of struct vfio_group conditional on
> > > CONFIG_VFIO_GROUP, so while CONFIG_VFIO_NOIOMMU depends on
> > > CONFIG_VFIO_GROUP and the result could be determined, I think the
> > > compiler is still unhappy about the undefined reference.  We'd need a
> > > !CONFIG_VFIO_GROUP stub for the function.
> > >
> > > > > #else
> > > > > 	if (type == VFIO_IOMMU && !vfio_device_has_iommu_group(device))
> > > > > 		return -EINVAL;
> > > > > #endif
> > > >
> > > > The require test is this from the group code:
> > > >
> > > >  	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
> > {
> > > >
> > > > We could lift it out of the group code and call it from vfio_main.c like:
> > > >
> > > > if (type == VFIO_IOMMU && !vfio_device_is_noiommu(vdev)
> > > && !device_iommu_capable(dev,
> > > >      IOMMU_CAP_CACHE_COHERENCY))
> > > >    FAIL
> > >
> > > Ack.  Thanks,
> >
> > So, what I got is:
> >
> > 1) Add bellow check in __vfio_register_dev() to fail the physical devices that
> >     don't have IOMMU protection.
> >
> > 	/*
> > 	  * noiommu device is a special type supported by the group interface.
> > 	  * Such type represents the physical devices  that are not iommu
> > backed.
> > 	  */
> > 	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device)) &&
> > 	    !vfio_device_has_iommu_group(device))
> > 		return -EINVAL; //or maybe -EOPNOTSUPP?
> >
> > Nit: require a vfio_device_is_noiommu() stub which returns false for
> > the VFIO_GROUP unset case.
> 
> device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY) is valid
> only for cases with iommu groups. So that check already  covers the
> group verification indirectly.

Okay. This IOMMU_CAP_CACHE_COHERENCY check is missed in the cdev
path.

> With that I think Jason's suggestion is to lift that test into main.c:
> 
> int vfio_register_group_dev(struct vfio_device *device)
> {
> 	/*
> 	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
> 	 * restore cache coherency. It has to be checked here because it is only
> 	 * valid for cases where we are using iommu groups.
> 	 */
> 	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
> 	    !device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
> 		return ERR_PTR(-EINVAL);

vfio_device_is_noiommu() needs to be called after vfio_device_set_group().
Otherwise, it's always false. So still needs to call it in the __vfio_register_dev().

> 	return __vfio_register_dev(device, VFIO_IOMMU);
> }
> 
> >
> > 2) Have below functions to add device
> >
> > #if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV)
> > static inline int vfio_device_add(struct vfio_device *device)
> > {
> > 	if (vfio_device_is_noiommu(device))
> > 		return device_add(&device->device);
> > 	vfio_init_device_cdev(device);
> > 	return cdev_device_add(&device->cdev, &device->device);
> > }
> >
> > static inline void vfio_device_del(struct vfio_device *device)
> > {
> > 	if (vfio_device_is_noiommu(device))
> > 		return device_del(&device->device);
> > 	cdev_device_del(&device->cdev, &device->device);
> > }
> 
> Correct

Regards,
Yi Liu
Tian, Kevin June 14, 2023, 6:20 a.m. UTC | #14
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Wednesday, June 14, 2023 2:14 PM
> 
> 
> > With that I think Jason's suggestion is to lift that test into main.c:
> >
> > int vfio_register_group_dev(struct vfio_device *device)
> > {
> > 	/*
> > 	 * VFIO always sets IOMMU_CACHE because we offer no way for
> userspace to
> > 	 * restore cache coherency. It has to be checked here because it is
> only
> > 	 * valid for cases where we are using iommu groups.
> > 	 */
> > 	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
> > 	    !device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
> > 		return ERR_PTR(-EINVAL);
> 
> vfio_device_is_noiommu() needs to be called after vfio_device_set_group().
> Otherwise, it's always false. So still needs to call it in the
> __vfio_register_dev().

yes
Jason Gunthorpe June 14, 2023, 12:23 p.m. UTC | #15
On Wed, Jun 14, 2023 at 06:20:10AM +0000, Tian, Kevin wrote:
> > From: Liu, Yi L <yi.l.liu@intel.com>
> > Sent: Wednesday, June 14, 2023 2:14 PM
> > 
> > 
> > > With that I think Jason's suggestion is to lift that test into main.c:
> > >
> > > int vfio_register_group_dev(struct vfio_device *device)
> > > {
> > > 	/*
> > > 	 * VFIO always sets IOMMU_CACHE because we offer no way for
> > userspace to
> > > 	 * restore cache coherency. It has to be checked here because it is
> > only
> > > 	 * valid for cases where we are using iommu groups.
> > > 	 */
> > > 	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
> > > 	    !device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
> > > 		return ERR_PTR(-EINVAL);
> > 
> > vfio_device_is_noiommu() needs to be called after vfio_device_set_group().
> > Otherwise, it's always false. So still needs to call it in the
> > __vfio_register_dev().
> 
> yes

Right, but it needs to be in vfio_main.c, not in the group.c - so
another patch should be added to move it.

I prefer the idea that vfio_device_is_noiommu() works in all the
kconfig scenarios rather than adding #ifdefs.

Jason
Yi Liu June 14, 2023, 1:12 p.m. UTC | #16
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Wednesday, June 14, 2023 8:23 PM
> On Wed, Jun 14, 2023 at 06:20:10AM +0000, Tian, Kevin wrote:
> > > From: Liu, Yi L <yi.l.liu@intel.com>
> > > Sent: Wednesday, June 14, 2023 2:14 PM
> > >
> > >
> > > > With that I think Jason's suggestion is to lift that test into main.c:
> > > >
> > > > int vfio_register_group_dev(struct vfio_device *device)
> > > > {
> > > > 	/*
> > > > 	 * VFIO always sets IOMMU_CACHE because we offer no way for
> > > userspace to
> > > > 	 * restore cache coherency. It has to be checked here because it is
> > > only
> > > > 	 * valid for cases where we are using iommu groups.
> > > > 	 */
> > > > 	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
> > > > 	    !device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
> > > > 		return ERR_PTR(-EINVAL);
> > >
> > > vfio_device_is_noiommu() needs to be called after vfio_device_set_group().
> > > Otherwise, it's always false. So still needs to call it in the
> > > __vfio_register_dev().
> >
> > yes
> 
> Right, but it needs to be in vfio_main.c, not in the group.c - so
> another patch should be added to move it.

I've got a patch as below to move it.

From 306e71325d255eef34a1c44312bf9cdc8c302faa Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Wed, 14 Jun 2023 00:37:52 -0700
Subject: [PATCH] vfio: Move the IOMMU_CAP_CACHE_COHERENCY check in
 __vfio_register_dev()

The IOMMU_CAP_CACHE_COHERENCY check only applies to the physical devices
that are IOMMU-backed. This change prepares for compiling the vfio_group
infrastructure optionally as cdev does not support the physical devices
that are not IOMMU-backed. This check help to fail the device registration
for such devices if only vfio_group infrastructure is compiled out.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/vfio/group.c     | 10 ----------
 drivers/vfio/vfio_main.c | 11 +++++++++++
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
index 41a09a2df690..c2e0128323a7 100644
--- a/drivers/vfio/group.c
+++ b/drivers/vfio/group.c
@@ -687,16 +687,6 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 	if (!iommu_group)
 		return ERR_PTR(-EINVAL);
 
-	/*
-	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
-	 * restore cache coherency. It has to be checked here because it is only
-	 * valid for cases where we are using iommu groups.
-	 */
-	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
-		iommu_group_put(iommu_group);
-		return ERR_PTR(-EINVAL);
-	}
-
 	mutex_lock(&vfio.group_lock);
 	group = vfio_group_find_from_iommu(iommu_group);
 	if (group) {
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 51c80eb32af6..ffb4585b7f0e 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -292,6 +292,17 @@ static int __vfio_register_dev(struct vfio_device *device,
 	if (ret)
 		return ret;
 
+	/*
+	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
+	 * restore cache coherency. It has to be checked here because it is only
+	 * valid for cases where we are using iommu groups.
+	 */
+	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
+	    !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
 	ret = vfio_device_add(device);
 	if (ret)
 		goto err_out;
Jason Gunthorpe June 14, 2023, 5:30 p.m. UTC | #17
On Wed, Jun 14, 2023 at 01:12:50PM +0000, Liu, Yi L wrote:
> diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> index 41a09a2df690..c2e0128323a7 100644
> --- a/drivers/vfio/group.c
> +++ b/drivers/vfio/group.c
> @@ -687,16 +687,6 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
>  	if (!iommu_group)
>  		return ERR_PTR(-EINVAL);
>  
> -	/*
> -	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
> -	 * restore cache coherency. It has to be checked here because it is only
> -	 * valid for cases where we are using iommu groups.
> -	 */
> -	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
> -		iommu_group_put(iommu_group);
> -		return ERR_PTR(-EINVAL);
> -	}
> -
>  	mutex_lock(&vfio.group_lock);
>  	group = vfio_group_find_from_iommu(iommu_group);
>  	if (group) {
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 51c80eb32af6..ffb4585b7f0e 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -292,6 +292,17 @@ static int __vfio_register_dev(struct vfio_device *device,
>  	if (ret)
>  		return ret;
>  
> +	/*
> +	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
> +	 * restore cache coherency. It has to be checked here because it is only
> +	 * valid for cases where we are using iommu groups.
> +	 */
> +	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
> +	    !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) {
> +		ret = -EINVAL;
> +		goto err_out;
> +	}
> +
>  	ret = vfio_device_add(device);
>  	if (ret)
>  		goto err_out;

Yes that looks right

> 
> > I prefer the idea that vfio_device_is_noiommu() works in all the
> > kconfig scenarios rather than adding #ifdefs.
> 
> But the vfio_group would be empty when CONFIG_VFIO_GROUP is unset.
> From what I got now, when CONFIG_VFIO_GROUP is unset, the stub
> function always returns false.

It seems fine, you could also put the ifdef inside the stub

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
index 653b62f93474..64cdd0ea8825 100644
--- a/drivers/vfio/group.c
+++ b/drivers/vfio/group.c
@@ -668,21 +668,6 @@  static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 	struct vfio_group *group;
 
 	iommu_group = iommu_group_get(dev);
-	if (!iommu_group && vfio_noiommu) {
-		/*
-		 * With noiommu enabled, create an IOMMU group for devices that
-		 * don't already have one, implying no IOMMU hardware/driver
-		 * exists.  Taint the kernel because we're about to give a DMA
-		 * capable device to a user without IOMMU protection.
-		 */
-		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
-		if (!IS_ERR(group)) {
-			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
-			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
-		}
-		return group;
-	}
-
 	if (!iommu_group)
 		return ERR_PTR(-EINVAL);
 
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 6d8f9b0f3637..00a699b9f76b 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -265,6 +265,18 @@  static int vfio_init_device(struct vfio_device *device, struct device *dev,
 	return ret;
 }
 
+static int vfio_device_set_noiommu(struct vfio_device *device)
+{
+	struct iommu_group *iommu_group = iommu_group_get(device->dev);
+
+	if (!iommu_group && !vfio_noiommu)
+		return -EINVAL;
+
+	device->noiommu = !iommu_group;
+	iommu_group_put(iommu_group); /* Accepts NULL */
+	return 0;
+}
+
 static int __vfio_register_dev(struct vfio_device *device,
 			       enum vfio_group_type type)
 {
@@ -277,6 +289,13 @@  static int __vfio_register_dev(struct vfio_device *device,
 		     !device->ops->detach_ioas)))
 		return -EINVAL;
 
+	/* Only physical devices can be noiommu device */
+	if (type == VFIO_IOMMU) {
+		ret = vfio_device_set_noiommu(device);
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * If the driver doesn't specify a set then the device is added to a
 	 * singleton set just for itself.
@@ -288,7 +307,8 @@  static int __vfio_register_dev(struct vfio_device *device,
 	if (ret)
 		return ret;
 
-	ret = vfio_device_set_group(device, type);
+	ret = vfio_device_set_group(device,
+				    device->noiommu ? VFIO_NO_IOMMU : type);
 	if (ret)
 		return ret;
 
@@ -301,6 +321,15 @@  static int __vfio_register_dev(struct vfio_device *device,
 
 	vfio_device_group_register(device);
 
+	if (device->noiommu) {
+		/*
+		 * noiommu deivces have no IOMMU hardware/driver.  Taint the
+		 * kernel because we're about to give a DMA capable device to
+		 * a user without IOMMU protection.
+		 */
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+		dev_warn(device->dev, "Adding kernel taint for vfio-noiommu on device\n");
+	}
 	return 0;
 err_out:
 	vfio_device_remove_group(device);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e80a8ac86e46..183e620009e7 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -67,6 +67,7 @@  struct vfio_device {
 	bool iommufd_attached;
 #endif
 	bool cdev_opened:1;
+	bool noiommu:1;
 };
 
 /**