diff mbox series

[v7,9/9] vfio/pci: Allow passing zero-length fd array in VFIO_DEVICE_PCI_HOT_RESET

Message ID 20230602121515.79374-10-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series Enhance vfio PCI hot reset for vfio cdev device | expand

Commit Message

Yi Liu June 2, 2023, 12:15 p.m. UTC
This is the way user to invoke hot-reset for the devices opened by cdev
interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED
in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing
hot-reset for cdev devices.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Yanting Jiang <yanting.jiang@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/vfio/pci/vfio_pci_core.c | 61 ++++++++++++++++++++++++++------
 include/uapi/linux/vfio.h        | 14 ++++++++
 2 files changed, 64 insertions(+), 11 deletions(-)

Comments

Alex Williamson June 8, 2023, 10:30 p.m. UTC | #1
On Fri,  2 Jun 2023 05:15:15 -0700
Yi Liu <yi.l.liu@intel.com> wrote:

> This is the way user to invoke hot-reset for the devices opened by cdev
> interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED
> in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing
> hot-reset for cdev devices.
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Tested-by: Yanting Jiang <yanting.jiang@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 61 ++++++++++++++++++++++++++------
>  include/uapi/linux/vfio.h        | 14 ++++++++
>  2 files changed, 64 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index a615a223cdef..b0eadafcbcf5 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -181,7 +181,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
>  struct vfio_pci_group_info;
>  static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
>  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> -				      struct vfio_pci_group_info *groups);
> +				      struct vfio_pci_group_info *groups,
> +				      struct iommufd_ctx *iommufd_ctx);
>  
>  /*
>   * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
> @@ -1308,8 +1309,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
>  	if (ret)
>  		return ret;
>  
> -	/* Somewhere between 1 and count is OK */
> -	if (!array_count || array_count > count)
> +	if (array_count > count)
>  		return -EINVAL;
>  
>  	group_fds = kcalloc(array_count, sizeof(*group_fds), GFP_KERNEL);
> @@ -1358,7 +1358,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
>  	info.count = array_count;
>  	info.files = files;
>  
> -	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
> +	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
>  
>  hot_reset_release:
>  	for (file_idx--; file_idx >= 0; file_idx--)
> @@ -1381,13 +1381,21 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
>  	if (hdr.argsz < minsz || hdr.flags)
>  		return -EINVAL;
>  
> +	/* zero-length array is only for cdev opened devices */
> +	if (!!hdr.count == vfio_device_cdev_opened(&vdev->vdev))
> +		return -EINVAL;
> +
>  	/* Can we do a slot or bus reset or neither? */
>  	if (!pci_probe_reset_slot(vdev->pdev->slot))
>  		slot = true;
>  	else if (pci_probe_reset_bus(vdev->pdev->bus))
>  		return -ENODEV;
>  
> -	return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> +	if (hdr.count)
> +		return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> +
> +	return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL,
> +					  vfio_iommufd_device_ictx(&vdev->vdev));
>  }
>  
>  static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
> @@ -2354,13 +2362,16 @@ const struct pci_error_handlers vfio_pci_core_err_handlers = {
>  };
>  EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);
>  
> -static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
> +static bool vfio_dev_in_groups(struct vfio_device *vdev,
>  			       struct vfio_pci_group_info *groups)
>  {
>  	unsigned int i;
>  
> +	if (!groups)
> +		return false;
> +
>  	for (i = 0; i < groups->count; i++)
> -		if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
> +		if (vfio_file_has_dev(groups->files[i], vdev))
>  			return true;
>  	return false;
>  }
> @@ -2436,7 +2447,8 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
>   * get each memory_lock.
>   */
>  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> -				      struct vfio_pci_group_info *groups)
> +				      struct vfio_pci_group_info *groups,
> +				      struct iommufd_ctx *iommufd_ctx)
>  {
>  	struct vfio_pci_core_device *cur_mem;
>  	struct vfio_pci_core_device *cur_vma;
> @@ -2466,11 +2478,38 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  		goto err_unlock;
>  
>  	list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
> +		bool owned;
> +
>  		/*
> -		 * Test whether all the affected devices are contained by the
> -		 * set of groups provided by the user.
> +		 * Test whether all the affected devices can be reset by the
> +		 * user.
> +		 *
> +		 * If called from a group opened device and the user provides
> +		 * a set of groups, all the devices in the dev_set should be
> +		 * contained by the set of groups provided by the user.
> +		 *
> +		 * If called from a cdev opened device and the user provides
> +		 * a zero-length array, all the devices in the dev_set must
> +		 * be bound to the same iommufd_ctx as the input iommufd_ctx.
> +		 * If there is any device that has not been bound to any
> +		 * iommufd_ctx yet, check if its iommu_group has any device
> +		 * bound to the input iommufd_ctx.  Such devices can be
> +		 * considered owned by the input iommufd_ctx as the device
> +		 * cannot be owned by another iommufd_ctx when its iommu_group
> +		 * is owned.
> +		 *
> +		 * Otherwise, reset is not allowed.
>  		 */
> -		if (!vfio_dev_in_groups(cur_vma, groups)) {
> +		if (iommufd_ctx) {
> +			int devid = vfio_iommufd_device_hot_reset_devid(&cur_vma->vdev,
> +									iommufd_ctx);
> +
> +			owned = (devid != VFIO_PCI_DEVID_NOT_OWNED);
> +		} else {
> +			owned = vfio_dev_in_groups(&cur_vma->vdev, groups);
> +		}
> +
> +		if (!owned) {
>  			ret = -EINVAL;
>  			goto err_undo;
>  		}
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 70cc31e6b1ce..f753124e1c82 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -690,6 +690,9 @@ enum {
>   *	  affected devices are represented in the dev_set and also owned by
>   *	  the user.  This flag is available only when
>   *	  flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved.
> + *	  When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero
> + *	  length fd array on the calling device as the ownership is validated
> + *	  by iommufd_ctx.
>   *
>   * Return: 0 on success, -errno on failure:
>   *	-enospc = insufficient buffer, -enodev = unsupported for device.
> @@ -721,6 +724,17 @@ struct vfio_pci_hot_reset_info {
>   * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
>   *				    struct vfio_pci_hot_reset)
>   *
> + * Userspace requests hot reset for the devices it operates.  Due to the
> + * underlying topology, multiple devices can be affected in the reset
> + * while some might be opened by another user.  To avoid interference
> + * the calling user must ensure all affected devices are owned by itself.

This phrasing suggest to me that we're placing the responsibility on
the user to avoid resetting another user's devices.  Perhaps these
paragraphs could be replaced with:

  A PCI hot reset results in either a bus or slot reset which may affect
  other devices sharing the bus/slot.  The calling user must have
  ownership of the full set of affected devices as determined by the
  VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl.

  When called on a device file descriptor acquired through the vfio
  group interface, the user is required to provide proof of ownership
  of those affected devices via the group_fds array in struct
  vfio_pci_hot_reset.

  When called on a direct cdev opened vfio device, the flags field of
  struct vfio_pci_hot_reset_info reports the ownership status of the
  affected devices and this ioctl must be called with an empty group_fds
  array.  See above INFO ioctl definition for ownership requirements.

  Mixed usage of legacy groups and cdevs across the set of affected
  devices is not supported.

Other than this and the couple other comments, the series looks ok to
me.  We still need acks from Jason for iommufd on 3-5.  Thanks,

Alex

> + *
> + * As the ownership described by VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, the
> + * cdev opened devices must exclusively provide a zero-length fd array and
> + * the group opened devices must exclusively use an array of group fds for
> + * proof of ownership.  Mixed access to devices between cdev and legacy
> + * groups are not supported by this interface.
> + *
>   * Return: 0 on success, -errno on failure.
>   */
>  struct vfio_pci_hot_reset {
Yi Liu June 9, 2023, 12:13 a.m. UTC | #2
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Friday, June 9, 2023 6:30 AM
> 
> On Fri,  2 Jun 2023 05:15:15 -0700
> Yi Liu <yi.l.liu@intel.com> wrote:
> 
> > This is the way user to invoke hot-reset for the devices opened by cdev
> > interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED
> > in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing
> > hot-reset for cdev devices.
> >
> > Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> > Tested-by: Yanting Jiang <yanting.jiang@intel.com>
> > Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> > ---
> >  drivers/vfio/pci/vfio_pci_core.c | 61 ++++++++++++++++++++++++++------
> >  include/uapi/linux/vfio.h        | 14 ++++++++
> >  2 files changed, 64 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> > index a615a223cdef..b0eadafcbcf5 100644
> > --- a/drivers/vfio/pci/vfio_pci_core.c
> > +++ b/drivers/vfio/pci/vfio_pci_core.c
> > @@ -181,7 +181,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device
> *vdev)
> >  struct vfio_pci_group_info;
> >  static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
> >  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> > -				      struct vfio_pci_group_info *groups);
> > +				      struct vfio_pci_group_info *groups,
> > +				      struct iommufd_ctx *iommufd_ctx);
> >
> >  /*
> >   * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
> > @@ -1308,8 +1309,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct
> vfio_pci_core_device *vdev,
> >  	if (ret)
> >  		return ret;
> >
> > -	/* Somewhere between 1 and count is OK */
> > -	if (!array_count || array_count > count)
> > +	if (array_count > count)
> >  		return -EINVAL;
> >
> >  	group_fds = kcalloc(array_count, sizeof(*group_fds), GFP_KERNEL);
> > @@ -1358,7 +1358,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct
> vfio_pci_core_device *vdev,
> >  	info.count = array_count;
> >  	info.files = files;
> >
> > -	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
> > +	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
> >
> >  hot_reset_release:
> >  	for (file_idx--; file_idx >= 0; file_idx--)
> > @@ -1381,13 +1381,21 @@ static int vfio_pci_ioctl_pci_hot_reset(struct
> vfio_pci_core_device *vdev,
> >  	if (hdr.argsz < minsz || hdr.flags)
> >  		return -EINVAL;
> >
> > +	/* zero-length array is only for cdev opened devices */
> > +	if (!!hdr.count == vfio_device_cdev_opened(&vdev->vdev))
> > +		return -EINVAL;
> > +
> >  	/* Can we do a slot or bus reset or neither? */
> >  	if (!pci_probe_reset_slot(vdev->pdev->slot))
> >  		slot = true;
> >  	else if (pci_probe_reset_bus(vdev->pdev->bus))
> >  		return -ENODEV;
> >
> > -	return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> > +	if (hdr.count)
> > +		return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> > +
> > +	return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL,
> > +					  vfio_iommufd_device_ictx(&vdev->vdev));
> >  }
> >
> >  static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
> > @@ -2354,13 +2362,16 @@ const struct pci_error_handlers
> vfio_pci_core_err_handlers = {
> >  };
> >  EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);
> >
> > -static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
> > +static bool vfio_dev_in_groups(struct vfio_device *vdev,
> >  			       struct vfio_pci_group_info *groups)
> >  {
> >  	unsigned int i;
> >
> > +	if (!groups)
> > +		return false;
> > +
> >  	for (i = 0; i < groups->count; i++)
> > -		if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
> > +		if (vfio_file_has_dev(groups->files[i], vdev))
> >  			return true;
> >  	return false;
> >  }
> > @@ -2436,7 +2447,8 @@ static int vfio_pci_dev_set_pm_runtime_get(struct
> vfio_device_set *dev_set)
> >   * get each memory_lock.
> >   */
> >  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> > -				      struct vfio_pci_group_info *groups)
> > +				      struct vfio_pci_group_info *groups,
> > +				      struct iommufd_ctx *iommufd_ctx)
> >  {
> >  	struct vfio_pci_core_device *cur_mem;
> >  	struct vfio_pci_core_device *cur_vma;
> > @@ -2466,11 +2478,38 @@ static int vfio_pci_dev_set_hot_reset(struct
> vfio_device_set *dev_set,
> >  		goto err_unlock;
> >
> >  	list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
> > +		bool owned;
> > +
> >  		/*
> > -		 * Test whether all the affected devices are contained by the
> > -		 * set of groups provided by the user.
> > +		 * Test whether all the affected devices can be reset by the
> > +		 * user.
> > +		 *
> > +		 * If called from a group opened device and the user provides
> > +		 * a set of groups, all the devices in the dev_set should be
> > +		 * contained by the set of groups provided by the user.
> > +		 *
> > +		 * If called from a cdev opened device and the user provides
> > +		 * a zero-length array, all the devices in the dev_set must
> > +		 * be bound to the same iommufd_ctx as the input iommufd_ctx.
> > +		 * If there is any device that has not been bound to any
> > +		 * iommufd_ctx yet, check if its iommu_group has any device
> > +		 * bound to the input iommufd_ctx.  Such devices can be
> > +		 * considered owned by the input iommufd_ctx as the device
> > +		 * cannot be owned by another iommufd_ctx when its iommu_group
> > +		 * is owned.
> > +		 *
> > +		 * Otherwise, reset is not allowed.
> >  		 */
> > -		if (!vfio_dev_in_groups(cur_vma, groups)) {
> > +		if (iommufd_ctx) {
> > +			int devid = vfio_iommufd_device_hot_reset_devid(&cur_vma-
> >vdev,
> > +									iommufd_ctx);
> > +
> > +			owned = (devid != VFIO_PCI_DEVID_NOT_OWNED);
> > +		} else {
> > +			owned = vfio_dev_in_groups(&cur_vma->vdev, groups);
> > +		}
> > +
> > +		if (!owned) {
> >  			ret = -EINVAL;
> >  			goto err_undo;
> >  		}
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 70cc31e6b1ce..f753124e1c82 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -690,6 +690,9 @@ enum {
> >   *	  affected devices are represented in the dev_set and also owned by
> >   *	  the user.  This flag is available only when
> >   *	  flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved.
> > + *	  When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero
> > + *	  length fd array on the calling device as the ownership is validated
> > + *	  by iommufd_ctx.
> >   *
> >   * Return: 0 on success, -errno on failure:
> >   *	-enospc = insufficient buffer, -enodev = unsupported for device.
> > @@ -721,6 +724,17 @@ struct vfio_pci_hot_reset_info {
> >   * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
> >   *				    struct vfio_pci_hot_reset)
> >   *
> > + * Userspace requests hot reset for the devices it operates.  Due to the
> > + * underlying topology, multiple devices can be affected in the reset
> > + * while some might be opened by another user.  To avoid interference
> > + * the calling user must ensure all affected devices are owned by itself.
> 
> This phrasing suggest to me that we're placing the responsibility on
> the user to avoid resetting another user's devices.

This responsibility is not new. Is it? 
Jason Gunthorpe June 9, 2023, 2:38 p.m. UTC | #3
On Fri, Jun 09, 2023 at 12:13:58AM +0000, Liu, Yi L wrote:

> > Other than this and the couple other comments, the series looks ok to
> > me.  We still need acks from Jason for iommufd on 3-5.  Thanks,
> 
> Thanks, perhaps one more version after getting feedback from Jason.

Yes, perhaps today I can reach it

Jason
Jason Gunthorpe June 13, 2023, 6:09 p.m. UTC | #4
On Fri, Jun 02, 2023 at 05:15:15AM -0700, Yi Liu wrote:
> This is the way user to invoke hot-reset for the devices opened by cdev
> interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED
> in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing
> hot-reset for cdev devices.
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Tested-by: Yanting Jiang <yanting.jiang@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 61 ++++++++++++++++++++++++++------
>  include/uapi/linux/vfio.h        | 14 ++++++++
>  2 files changed, 64 insertions(+), 11 deletions(-)

This looks OK but it should be in the cdev series..

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index a615a223cdef..b0eadafcbcf5 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -181,7 +181,8 @@  static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
 struct vfio_pci_group_info;
 static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
 static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
-				      struct vfio_pci_group_info *groups);
+				      struct vfio_pci_group_info *groups,
+				      struct iommufd_ctx *iommufd_ctx);
 
 /*
  * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
@@ -1308,8 +1309,7 @@  vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
 	if (ret)
 		return ret;
 
-	/* Somewhere between 1 and count is OK */
-	if (!array_count || array_count > count)
+	if (array_count > count)
 		return -EINVAL;
 
 	group_fds = kcalloc(array_count, sizeof(*group_fds), GFP_KERNEL);
@@ -1358,7 +1358,7 @@  vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
 	info.count = array_count;
 	info.files = files;
 
-	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
+	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
 
 hot_reset_release:
 	for (file_idx--; file_idx >= 0; file_idx--)
@@ -1381,13 +1381,21 @@  static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
 	if (hdr.argsz < minsz || hdr.flags)
 		return -EINVAL;
 
+	/* zero-length array is only for cdev opened devices */
+	if (!!hdr.count == vfio_device_cdev_opened(&vdev->vdev))
+		return -EINVAL;
+
 	/* Can we do a slot or bus reset or neither? */
 	if (!pci_probe_reset_slot(vdev->pdev->slot))
 		slot = true;
 	else if (pci_probe_reset_bus(vdev->pdev->bus))
 		return -ENODEV;
 
-	return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
+	if (hdr.count)
+		return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
+
+	return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL,
+					  vfio_iommufd_device_ictx(&vdev->vdev));
 }
 
 static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
@@ -2354,13 +2362,16 @@  const struct pci_error_handlers vfio_pci_core_err_handlers = {
 };
 EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);
 
-static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
+static bool vfio_dev_in_groups(struct vfio_device *vdev,
 			       struct vfio_pci_group_info *groups)
 {
 	unsigned int i;
 
+	if (!groups)
+		return false;
+
 	for (i = 0; i < groups->count; i++)
-		if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
+		if (vfio_file_has_dev(groups->files[i], vdev))
 			return true;
 	return false;
 }
@@ -2436,7 +2447,8 @@  static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
  * get each memory_lock.
  */
 static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
-				      struct vfio_pci_group_info *groups)
+				      struct vfio_pci_group_info *groups,
+				      struct iommufd_ctx *iommufd_ctx)
 {
 	struct vfio_pci_core_device *cur_mem;
 	struct vfio_pci_core_device *cur_vma;
@@ -2466,11 +2478,38 @@  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 		goto err_unlock;
 
 	list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
+		bool owned;
+
 		/*
-		 * Test whether all the affected devices are contained by the
-		 * set of groups provided by the user.
+		 * Test whether all the affected devices can be reset by the
+		 * user.
+		 *
+		 * If called from a group opened device and the user provides
+		 * a set of groups, all the devices in the dev_set should be
+		 * contained by the set of groups provided by the user.
+		 *
+		 * If called from a cdev opened device and the user provides
+		 * a zero-length array, all the devices in the dev_set must
+		 * be bound to the same iommufd_ctx as the input iommufd_ctx.
+		 * If there is any device that has not been bound to any
+		 * iommufd_ctx yet, check if its iommu_group has any device
+		 * bound to the input iommufd_ctx.  Such devices can be
+		 * considered owned by the input iommufd_ctx as the device
+		 * cannot be owned by another iommufd_ctx when its iommu_group
+		 * is owned.
+		 *
+		 * Otherwise, reset is not allowed.
 		 */
-		if (!vfio_dev_in_groups(cur_vma, groups)) {
+		if (iommufd_ctx) {
+			int devid = vfio_iommufd_device_hot_reset_devid(&cur_vma->vdev,
+									iommufd_ctx);
+
+			owned = (devid != VFIO_PCI_DEVID_NOT_OWNED);
+		} else {
+			owned = vfio_dev_in_groups(&cur_vma->vdev, groups);
+		}
+
+		if (!owned) {
 			ret = -EINVAL;
 			goto err_undo;
 		}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 70cc31e6b1ce..f753124e1c82 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -690,6 +690,9 @@  enum {
  *	  affected devices are represented in the dev_set and also owned by
  *	  the user.  This flag is available only when
  *	  flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved.
+ *	  When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero
+ *	  length fd array on the calling device as the ownership is validated
+ *	  by iommufd_ctx.
  *
  * Return: 0 on success, -errno on failure:
  *	-enospc = insufficient buffer, -enodev = unsupported for device.
@@ -721,6 +724,17 @@  struct vfio_pci_hot_reset_info {
  * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
  *				    struct vfio_pci_hot_reset)
  *
+ * Userspace requests hot reset for the devices it operates.  Due to the
+ * underlying topology, multiple devices can be affected in the reset
+ * while some might be opened by another user.  To avoid interference
+ * the calling user must ensure all affected devices are owned by itself.
+ *
+ * As the ownership described by VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, the
+ * cdev opened devices must exclusively provide a zero-length fd array and
+ * the group opened devices must exclusively use an array of group fds for
+ * proof of ownership.  Mixed access to devices between cdev and legacy
+ * groups are not supported by this interface.
+ *
  * Return: 0 on success, -errno on failure.
  */
 struct vfio_pci_hot_reset {