diff mbox series

[v5,10/20] vfio/pci: Make vfio cdev pre-openable by passing a file handle

Message ID 20231109114529.1904193-11-zhenzhong.duan@intel.com (mailing list archive)
State New, archived
Headers show
Series vfio: Adopt iommufd | expand

Commit Message

Duan, Zhenzhong Nov. 9, 2023, 11:45 a.m. UTC
This gives management tools like libvirt a chance to open the vfio
cdev with privilege and pass FD to qemu. This way qemu never needs
to have privilege to open a VFIO or iommu cdev node.

Together with the earlier support of pre-opening /dev/iommu device,
now we have full support of passing a vfio device to unprivileged
qemu by management tool. This mode is no more considered for the
legacy backend. So let's remove the "TODO" comment.

Add a helper function vfio_device_get_name() to check fd and get
device name, it will also be used by other vfio devices.

There is no easy way to check if a device is mdev with FD passing,
so fail the x-balloon-allowed check unconditionally in this case.

There is also no easy way to get BDF as name with FD passing, so
we fake a name by VFIO_FD[fd].

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 include/hw/vfio/vfio-common.h |  1 +
 hw/vfio/helpers.c             | 34 +++++++++++++++++++++++++++++
 hw/vfio/iommufd.c             | 12 +++++++----
 hw/vfio/pci.c                 | 40 ++++++++++++++++++++++++-----------
 4 files changed, 71 insertions(+), 16 deletions(-)

Comments

Cédric Le Goater Nov. 10, 2023, 10:52 a.m. UTC | #1
On 11/9/23 12:45, Zhenzhong Duan wrote:
> This gives management tools like libvirt a chance to open the vfio
> cdev with privilege and pass FD to qemu. This way qemu never needs
> to have privilege to open a VFIO or iommu cdev node.
> 
> Together with the earlier support of pre-opening /dev/iommu device,
> now we have full support of passing a vfio device to unprivileged
> qemu by management tool. This mode is no more considered for the
> legacy backend. So let's remove the "TODO" comment.
> 
> Add a helper function vfio_device_get_name() to check fd and get
> device name, it will also be used by other vfio devices.
> 
> There is no easy way to check if a device is mdev with FD passing,
> so fail the x-balloon-allowed check unconditionally in this case.
> 
> There is also no easy way to get BDF as name with FD passing, so
> we fake a name by VFIO_FD[fd].
> 
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   include/hw/vfio/vfio-common.h |  1 +
>   hw/vfio/helpers.c             | 34 +++++++++++++++++++++++++++++
>   hw/vfio/iommufd.c             | 12 +++++++----
>   hw/vfio/pci.c                 | 40 ++++++++++++++++++++++++-----------
>   4 files changed, 71 insertions(+), 16 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 3dac5c167e..960a14e8d8 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -238,6 +238,7 @@ struct vfio_info_cap_header *
>   vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
>   struct vfio_info_cap_header *
>   vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>   #endif
>   
>   bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> index 168847e7c5..d80aa58719 100644
> --- a/hw/vfio/helpers.c
> +++ b/hw/vfio/helpers.c
> @@ -20,6 +20,7 @@
>    */
>   
>   #include "qemu/osdep.h"
> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>   #include <sys/ioctl.h>
>   
>   #include "hw/vfio/vfio-common.h"
> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
>   
>       return ret;
>   }
> +
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
> +{
> +    struct stat st;
> +
> +    if (vbasedev->fd < 0) {
> +        if (stat(vbasedev->sysfsdev, &st) < 0) {
> +            error_setg_errno(errp, errno, "no such host device");
> +            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> +            return -errno;
> +        }
> +        /* User may specify a name, e.g: VFIO platform device */
> +        if (!vbasedev->name) {
> +            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
> +        }
> +    }
> +#ifdef CONFIG_IOMMUFD
> +    else {
> +        if (!vbasedev->iommufd) {


Can we handle with this case without CONFIG_IOMMUFD, simply by
testing vbasedev->iommufd ?

> +            error_setg(errp, "Use FD passing only with iommufd backend");
> +            return -EINVAL;
> +        }
> +        /*
> +         * Give a name with fd so any function printing out vbasedev->name
> +         * will not break.
> +         */
> +        if (!vbasedev->name) {
> +            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
> +        }
> +    }
> +#endif
> +    return 0;
> +}
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 44dc6848bf..fd30477275 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char *name, VFIODevice *vbasedev,
>       uint32_t ioas_id;
>       Error *err = NULL;
>   
> -    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> -    if (devfd < 0) {
> -        return devfd;
> +    if (vbasedev->fd < 0) {
> +        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> +        if (devfd < 0) {
> +            return devfd;
> +        }
> +        vbasedev->fd = devfd;
> +    } else {
> +        devfd = vbasedev->fd;
>       }
> -    vbasedev->fd = devfd;
>   
>       ret = iommufd_connect_and_bind(vbasedev, errp);
>       if (ret) {
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index e9a426200b..f95725ed16 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -44,6 +44,7 @@
>   #include "migration/blocker.h"
>   #include "migration/qemu-file.h"
>   #include "sysemu/iommufd.h"
> +#include "monitor/monitor.h"
>   
>   #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
>   
> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>       VFIODevice *vbasedev = &vdev->vbasedev;
>       char *tmp, *subsys;
>       Error *err = NULL;
> -    struct stat st;
>       int i, ret;
>       bool is_mdev;
>       char uuid[UUID_STR_LEN];
>       char *name;
>   
> -    if (!vbasedev->sysfsdev) {
> +    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>           if (!(~vdev->host.domain || ~vdev->host.bus ||
>                 ~vdev->host.slot || ~vdev->host.function)) {
>               error_setg(errp, "No provided host device");
> +#ifdef CONFIG_IOMMUFD
> +            error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
> +                              "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
> +                              "or -device vfio-pci,fd=DEVICE_FD\n");
> +#else
>               error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>                                 "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
> +#endif

or simply :


                error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
  +#ifdef CONFIG_IOMMUFD
  +                              "or -device vfio-pci,fd=DEVICE_FD "
  +#endif
                                  "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");



>               return;
>           }
>           vbasedev->sysfsdev =
> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>                               vdev->host.slot, vdev->host.function);
>       }
>   
> -    if (stat(vbasedev->sysfsdev, &st) < 0) {
> -        error_setg_errno(errp, errno, "no such host device");
> -        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> +    if (vfio_device_get_name(vbasedev, errp)) {
>           return;
>       }
> -
> -    vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>       vbasedev->ops = &vfio_pci_ops;
>       vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>       vbasedev->dev = DEVICE(vdev);
> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
>       vdev->host.bus = ~0U;
>       vdev->host.slot = ~0U;
>       vdev->host.function = ~0U;
> +    vdev->vbasedev.fd = -1;
We should probably move the all VFIODevice initializations :

     vbasedev->ops = &vfio_pci_ops;
     vbasedev->type = VFIO_DEVICE_TYPE_PCI;
     vbasedev->dev = DEVICE(vdev);

under vfio_instance_init (should be called vfio_pci_instance_init).

This is true for all other VFIO devices. May be not for this series,
it can come later.


>   
>       vdev->nv_gpudirect_clique = 0xFF;
>   
> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
>                                      qdev_prop_nv_gpudirect_clique, uint8_t),
>       DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
>                                   OFF_AUTOPCIBAR_OFF),
> -    /*
> -     * TODO - support passed fds... is this necessary?
> -     * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
> -     * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
> -     */
>   #ifdef CONFIG_IOMMUFD
>       DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
>                        TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
>       DEFINE_PROP_END_OF_LIST(),
>   };
>   
> +#ifdef CONFIG_IOMMUFD
> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
> +{
> +    VFIOPCIDevice *vdev = VFIO_PCI(obj);
> +    int fd = -1;
> +
> +    fd = monitor_fd_param(monitor_cur(), str, errp);
> +    if (fd == -1) {
> +        error_prepend(errp, "Could not parse remote object fd %s:", str);
> +        return;
> +    }
> +    vdev->vbasedev.fd = fd;

We could introduce a common helper in hw/vfio/common.c to remove code
duplication :

#ifdef CONFIG_IOMMUFD
static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
{
     vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
}
#endif
  

Thanks,

C.



> +}
> +#endif
> +
>   static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
>   {
>       DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -3392,6 +3405,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
>   
>       dc->reset = vfio_pci_reset;
>       device_class_set_props(dc, vfio_pci_dev_properties);
> +#ifdef CONFIG_IOMMUFD
> +    object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
> +#endif
>       dc->desc = "VFIO-based PCI device assignment";
>       set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>       pdc->realize = vfio_realize;
Duan, Zhenzhong Nov. 13, 2023, 3 a.m. UTC | #2
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Friday, November 10, 2023 6:53 PM
>Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by passing a
>file handle
>
>On 11/9/23 12:45, Zhenzhong Duan wrote:
>> This gives management tools like libvirt a chance to open the vfio
>> cdev with privilege and pass FD to qemu. This way qemu never needs
>> to have privilege to open a VFIO or iommu cdev node.
>>
>> Together with the earlier support of pre-opening /dev/iommu device,
>> now we have full support of passing a vfio device to unprivileged
>> qemu by management tool. This mode is no more considered for the
>> legacy backend. So let's remove the "TODO" comment.
>>
>> Add a helper function vfio_device_get_name() to check fd and get
>> device name, it will also be used by other vfio devices.
>>
>> There is no easy way to check if a device is mdev with FD passing,
>> so fail the x-balloon-allowed check unconditionally in this case.
>>
>> There is also no easy way to get BDF as name with FD passing, so
>> we fake a name by VFIO_FD[fd].
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   include/hw/vfio/vfio-common.h |  1 +
>>   hw/vfio/helpers.c             | 34 +++++++++++++++++++++++++++++
>>   hw/vfio/iommufd.c             | 12 +++++++----
>>   hw/vfio/pci.c                 | 40 ++++++++++++++++++++++++-----------
>>   4 files changed, 71 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 3dac5c167e..960a14e8d8 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -238,6 +238,7 @@ struct vfio_info_cap_header *
>>   vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
>>   struct vfio_info_cap_header *
>>   vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>   #endif
>>
>>   bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>> index 168847e7c5..d80aa58719 100644
>> --- a/hw/vfio/helpers.c
>> +++ b/hw/vfio/helpers.c
>> @@ -20,6 +20,7 @@
>>    */
>>
>>   #include "qemu/osdep.h"
>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>>   #include <sys/ioctl.h>
>>
>>   #include "hw/vfio/vfio-common.h"
>> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int
>region, uint16_t cap_type)
>>
>>       return ret;
>>   }
>> +
>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>> +{
>> +    struct stat st;
>> +
>> +    if (vbasedev->fd < 0) {
>> +        if (stat(vbasedev->sysfsdev, &st) < 0) {
>> +            error_setg_errno(errp, errno, "no such host device");
>> +            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>> +            return -errno;
>> +        }
>> +        /* User may specify a name, e.g: VFIO platform device */
>> +        if (!vbasedev->name) {
>> +            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>> +        }
>> +    }
>> +#ifdef CONFIG_IOMMUFD
>> +    else {
>> +        if (!vbasedev->iommufd) {
>
>
>Can we handle with this case without CONFIG_IOMMUFD, simply by
>testing vbasedev->iommufd ?

Sure, will do.

>
>> +            error_setg(errp, "Use FD passing only with iommufd backend");
>> +            return -EINVAL;
>> +        }
>> +        /*
>> +         * Give a name with fd so any function printing out vbasedev->name
>> +         * will not break.
>> +         */
>> +        if (!vbasedev->name) {
>> +            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>> +        }
>> +    }
>> +#endif
>> +    return 0;
>> +}
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 44dc6848bf..fd30477275 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char *name,
>VFIODevice *vbasedev,
>>       uint32_t ioas_id;
>>       Error *err = NULL;
>>
>> -    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>> -    if (devfd < 0) {
>> -        return devfd;
>> +    if (vbasedev->fd < 0) {
>> +        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>> +        if (devfd < 0) {
>> +            return devfd;
>> +        }
>> +        vbasedev->fd = devfd;
>> +    } else {
>> +        devfd = vbasedev->fd;
>>       }
>> -    vbasedev->fd = devfd;
>>
>>       ret = iommufd_connect_and_bind(vbasedev, errp);
>>       if (ret) {
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index e9a426200b..f95725ed16 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -44,6 +44,7 @@
>>   #include "migration/blocker.h"
>>   #include "migration/qemu-file.h"
>>   #include "sysemu/iommufd.h"
>> +#include "monitor/monitor.h"
>>
>>   #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
>>
>> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>       VFIODevice *vbasedev = &vdev->vbasedev;
>>       char *tmp, *subsys;
>>       Error *err = NULL;
>> -    struct stat st;
>>       int i, ret;
>>       bool is_mdev;
>>       char uuid[UUID_STR_LEN];
>>       char *name;
>>
>> -    if (!vbasedev->sysfsdev) {
>> +    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>>           if (!(~vdev->host.domain || ~vdev->host.bus ||
>>                 ~vdev->host.slot || ~vdev->host.function)) {
>>               error_setg(errp, "No provided host device");
>> +#ifdef CONFIG_IOMMUFD
>> +            error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
>> +                              "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
>> +                              "or -device vfio-pci,fd=DEVICE_FD\n");
>> +#else
>>               error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>                                 "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>> +#endif
>
>or simply :
>
>
>                error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>  +#ifdef CONFIG_IOMMUFD
>  +                              "or -device vfio-pci,fd=DEVICE_FD "
>  +#endif
>                                  "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");

Good idea, will do.

>
>
>
>>               return;
>>           }
>>           vbasedev->sysfsdev =
>> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>                               vdev->host.slot, vdev->host.function);
>>       }
>>
>> -    if (stat(vbasedev->sysfsdev, &st) < 0) {
>> -        error_setg_errno(errp, errno, "no such host device");
>> -        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>> +    if (vfio_device_get_name(vbasedev, errp)) {
>>           return;
>>       }
>> -
>> -    vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>       vbasedev->ops = &vfio_pci_ops;
>>       vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>       vbasedev->dev = DEVICE(vdev);
>> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
>>       vdev->host.bus = ~0U;
>>       vdev->host.slot = ~0U;
>>       vdev->host.function = ~0U;
>> +    vdev->vbasedev.fd = -1;
>We should probably move the all VFIODevice initializations :
>
>     vbasedev->ops = &vfio_pci_ops;
>     vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>     vbasedev->dev = DEVICE(vdev);
>
>under vfio_instance_init (should be called vfio_pci_instance_init).
>
>This is true for all other VFIO devices. May be not for this series,
>it can come later.

Sure, Let me send a separate series addressing this.

>
>
>>
>>       vdev->nv_gpudirect_clique = 0xFF;
>>
>> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
>>                                      qdev_prop_nv_gpudirect_clique, uint8_t),
>>       DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice,
>msix_relo,
>>                                   OFF_AUTOPCIBAR_OFF),
>> -    /*
>> -     * TODO - support passed fds... is this necessary?
>> -     * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
>> -     * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
>> -     */
>>   #ifdef CONFIG_IOMMUFD
>>       DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
>>                        TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
>> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
>>       DEFINE_PROP_END_OF_LIST(),
>>   };
>>
>> +#ifdef CONFIG_IOMMUFD
>> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>> +{
>> +    VFIOPCIDevice *vdev = VFIO_PCI(obj);
>> +    int fd = -1;
>> +
>> +    fd = monitor_fd_param(monitor_cur(), str, errp);
>> +    if (fd == -1) {
>> +        error_prepend(errp, "Could not parse remote object fd %s:", str);
>> +        return;
>> +    }
>> +    vdev->vbasedev.fd = fd;
>
>We could introduce a common helper in hw/vfio/common.c to remove code
>duplication :
>
>#ifdef CONFIG_IOMMUFD
>static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>{
>     vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
>}
>#endif

Good suggestions! Will do.

Thanks
Zhenzhong
Cédric Le Goater Nov. 13, 2023, 11:08 a.m. UTC | #3
On 11/13/23 04:00, Duan, Zhenzhong wrote:
> 
> 
>> -----Original Message-----
>> From: Cédric Le Goater <clg@redhat.com>
>> Sent: Friday, November 10, 2023 6:53 PM
>> Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by passing a
>> file handle
>>
>> On 11/9/23 12:45, Zhenzhong Duan wrote:
>>> This gives management tools like libvirt a chance to open the vfio
>>> cdev with privilege and pass FD to qemu. This way qemu never needs
>>> to have privilege to open a VFIO or iommu cdev node.
>>>
>>> Together with the earlier support of pre-opening /dev/iommu device,
>>> now we have full support of passing a vfio device to unprivileged
>>> qemu by management tool. This mode is no more considered for the
>>> legacy backend. So let's remove the "TODO" comment.
>>>
>>> Add a helper function vfio_device_get_name() to check fd and get
>>> device name, it will also be used by other vfio devices.
>>>
>>> There is no easy way to check if a device is mdev with FD passing,
>>> so fail the x-balloon-allowed check unconditionally in this case.
>>>
>>> There is also no easy way to get BDF as name with FD passing, so
>>> we fake a name by VFIO_FD[fd].
>>>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>    include/hw/vfio/vfio-common.h |  1 +
>>>    hw/vfio/helpers.c             | 34 +++++++++++++++++++++++++++++
>>>    hw/vfio/iommufd.c             | 12 +++++++----
>>>    hw/vfio/pci.c                 | 40 ++++++++++++++++++++++++-----------
>>>    4 files changed, 71 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 3dac5c167e..960a14e8d8 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -238,6 +238,7 @@ struct vfio_info_cap_header *
>>>    vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
>>>    struct vfio_info_cap_header *
>>>    vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>    #endif
>>>
>>>    bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>> index 168847e7c5..d80aa58719 100644
>>> --- a/hw/vfio/helpers.c
>>> +++ b/hw/vfio/helpers.c
>>> @@ -20,6 +20,7 @@
>>>     */
>>>
>>>    #include "qemu/osdep.h"
>>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>>>    #include <sys/ioctl.h>
>>>
>>>    #include "hw/vfio/vfio-common.h"
>>> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int
>> region, uint16_t cap_type)
>>>
>>>        return ret;
>>>    }
>>> +
>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> +    struct stat st;
>>> +
>>> +    if (vbasedev->fd < 0) {
>>> +        if (stat(vbasedev->sysfsdev, &st) < 0) {
>>> +            error_setg_errno(errp, errno, "no such host device");
>>> +            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>> +            return -errno;
>>> +        }
>>> +        /* User may specify a name, e.g: VFIO platform device */
>>> +        if (!vbasedev->name) {
>>> +            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>> +        }
>>> +    }
>>> +#ifdef CONFIG_IOMMUFD
>>> +    else {
>>> +        if (!vbasedev->iommufd) {
>>
>>
>> Can we handle with this case without CONFIG_IOMMUFD, simply by
>> testing vbasedev->iommufd ?
> 
> Sure, will do.
> 
>>
>>> +            error_setg(errp, "Use FD passing only with iommufd backend");
>>> +            return -EINVAL;
>>> +        }
>>> +        /*
>>> +         * Give a name with fd so any function printing out vbasedev->name
>>> +         * will not break.
>>> +         */
>>> +        if (!vbasedev->name) {
>>> +            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>>> +        }
>>> +    }
>>> +#endif
>>> +    return 0;
>>> +}
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 44dc6848bf..fd30477275 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char *name,
>> VFIODevice *vbasedev,
>>>        uint32_t ioas_id;
>>>        Error *err = NULL;
>>>
>>> -    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>> -    if (devfd < 0) {
>>> -        return devfd;
>>> +    if (vbasedev->fd < 0) {
>>> +        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>> +        if (devfd < 0) {
>>> +            return devfd;
>>> +        }
>>> +        vbasedev->fd = devfd;
>>> +    } else {
>>> +        devfd = vbasedev->fd;
>>>        }
>>> -    vbasedev->fd = devfd;
>>>
>>>        ret = iommufd_connect_and_bind(vbasedev, errp);
>>>        if (ret) {
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index e9a426200b..f95725ed16 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -44,6 +44,7 @@
>>>    #include "migration/blocker.h"
>>>    #include "migration/qemu-file.h"
>>>    #include "sysemu/iommufd.h"
>>> +#include "monitor/monitor.h"
>>>
>>>    #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
>>>
>>> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>>        VFIODevice *vbasedev = &vdev->vbasedev;
>>>        char *tmp, *subsys;
>>>        Error *err = NULL;
>>> -    struct stat st;
>>>        int i, ret;
>>>        bool is_mdev;
>>>        char uuid[UUID_STR_LEN];
>>>        char *name;
>>>
>>> -    if (!vbasedev->sysfsdev) {
>>> +    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>>>            if (!(~vdev->host.domain || ~vdev->host.bus ||
>>>                  ~vdev->host.slot || ~vdev->host.function)) {
>>>                error_setg(errp, "No provided host device");
>>> +#ifdef CONFIG_IOMMUFD
>>> +            error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
>>> +                              "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
>>> +                              "or -device vfio-pci,fd=DEVICE_FD\n");
>>> +#else
>>>                error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>>                                  "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>>> +#endif
>>
>> or simply :
>>
>>
>>                 error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>   +#ifdef CONFIG_IOMMUFD
>>   +                              "or -device vfio-pci,fd=DEVICE_FD "
>>   +#endif
>>                                   "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
> 
> Good idea, will do.
> 
>>
>>
>>
>>>                return;
>>>            }
>>>            vbasedev->sysfsdev =
>>> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>>                                vdev->host.slot, vdev->host.function);
>>>        }
>>>
>>> -    if (stat(vbasedev->sysfsdev, &st) < 0) {
>>> -        error_setg_errno(errp, errno, "no such host device");
>>> -        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>> +    if (vfio_device_get_name(vbasedev, errp)) {
>>>            return;
>>>        }
>>> -
>>> -    vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>>        vbasedev->ops = &vfio_pci_ops;
>>>        vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>>        vbasedev->dev = DEVICE(vdev);
>>> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
>>>        vdev->host.bus = ~0U;
>>>        vdev->host.slot = ~0U;
>>>        vdev->host.function = ~0U;
>>> +    vdev->vbasedev.fd = -1;
>> We should probably move the all VFIODevice initializations :
>>
>>      vbasedev->ops = &vfio_pci_ops;
>>      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>      vbasedev->dev = DEVICE(vdev);
>>
>> under vfio_instance_init (should be called vfio_pci_instance_init).
>>
>> This is true for all other VFIO devices. May be not for this series,
>> it can come later.
> 
> Sure, Let me send a separate series addressing this.
> 
>>
>>
>>>
>>>        vdev->nv_gpudirect_clique = 0xFF;
>>>
>>> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
>>>                                       qdev_prop_nv_gpudirect_clique, uint8_t),
>>>        DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice,
>> msix_relo,
>>>                                    OFF_AUTOPCIBAR_OFF),
>>> -    /*
>>> -     * TODO - support passed fds... is this necessary?
>>> -     * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
>>> -     * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
>>> -     */
>>>    #ifdef CONFIG_IOMMUFD
>>>        DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
>>>                         TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
>>> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
>>>        DEFINE_PROP_END_OF_LIST(),
>>>    };
>>>
>>> +#ifdef CONFIG_IOMMUFD
>>> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>>> +{
>>> +    VFIOPCIDevice *vdev = VFIO_PCI(obj);
>>> +    int fd = -1;
>>> +
>>> +    fd = monitor_fd_param(monitor_cur(), str, errp);
>>> +    if (fd == -1) {
>>> +        error_prepend(errp, "Could not parse remote object fd %s:", str);
>>> +        return;
>>> +    }
>>> +    vdev->vbasedev.fd = fd;
>>
>> We could introduce a common helper in hw/vfio/common.c to remove code
>> duplication :
>>
>> #ifdef CONFIG_IOMMUFD
>> static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>> {
>>      vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
>> }
>> #endif
> 
> Good suggestions! Will do.


The idead is to isolate the iommufd addition in some common extension.
I tried with a QOM interface but it is not really satifying. See
previous email. I am not loosing faith though to find a solution for
this multi inheritance issue with VFIO devices


Thanks,

C.



> 
> Thanks
> Zhenzhong
Duan, Zhenzhong Nov. 14, 2023, 3 a.m. UTC | #4
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Monday, November 13, 2023 7:08 PM
>Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by passing a
>file handle
>
>On 11/13/23 04:00, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Cédric Le Goater <clg@redhat.com>
>>> Sent: Friday, November 10, 2023 6:53 PM
>>> Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by
>passing a
>>> file handle
>>>
>>> On 11/9/23 12:45, Zhenzhong Duan wrote:
>>>> This gives management tools like libvirt a chance to open the vfio
>>>> cdev with privilege and pass FD to qemu. This way qemu never needs
>>>> to have privilege to open a VFIO or iommu cdev node.
>>>>
>>>> Together with the earlier support of pre-opening /dev/iommu device,
>>>> now we have full support of passing a vfio device to unprivileged
>>>> qemu by management tool. This mode is no more considered for the
>>>> legacy backend. So let's remove the "TODO" comment.
>>>>
>>>> Add a helper function vfio_device_get_name() to check fd and get
>>>> device name, it will also be used by other vfio devices.
>>>>
>>>> There is no easy way to check if a device is mdev with FD passing,
>>>> so fail the x-balloon-allowed check unconditionally in this case.
>>>>
>>>> There is also no easy way to get BDF as name with FD passing, so
>>>> we fake a name by VFIO_FD[fd].
>>>>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>    include/hw/vfio/vfio-common.h |  1 +
>>>>    hw/vfio/helpers.c             | 34 +++++++++++++++++++++++++++++
>>>>    hw/vfio/iommufd.c             | 12 +++++++----
>>>>    hw/vfio/pci.c                 | 40 ++++++++++++++++++++++++-----------
>>>>    4 files changed, 71 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>>>> index 3dac5c167e..960a14e8d8 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -238,6 +238,7 @@ struct vfio_info_cap_header *
>>>>    vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
>>>>    struct vfio_info_cap_header *
>>>>    vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
>>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>    #endif
>>>>
>>>>    bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>> index 168847e7c5..d80aa58719 100644
>>>> --- a/hw/vfio/helpers.c
>>>> +++ b/hw/vfio/helpers.c
>>>> @@ -20,6 +20,7 @@
>>>>     */
>>>>
>>>>    #include "qemu/osdep.h"
>>>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>>>>    #include <sys/ioctl.h>
>>>>
>>>>    #include "hw/vfio/vfio-common.h"
>>>> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev,
>int
>>> region, uint16_t cap_type)
>>>>
>>>>        return ret;
>>>>    }
>>>> +
>>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>>>> +{
>>>> +    struct stat st;
>>>> +
>>>> +    if (vbasedev->fd < 0) {
>>>> +        if (stat(vbasedev->sysfsdev, &st) < 0) {
>>>> +            error_setg_errno(errp, errno, "no such host device");
>>>> +            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>>> +            return -errno;
>>>> +        }
>>>> +        /* User may specify a name, e.g: VFIO platform device */
>>>> +        if (!vbasedev->name) {
>>>> +            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>>> +        }
>>>> +    }
>>>> +#ifdef CONFIG_IOMMUFD
>>>> +    else {
>>>> +        if (!vbasedev->iommufd) {
>>>
>>>
>>> Can we handle with this case without CONFIG_IOMMUFD, simply by
>>> testing vbasedev->iommufd ?
>>
>> Sure, will do.
>>
>>>
>>>> +            error_setg(errp, "Use FD passing only with iommufd backend");
>>>> +            return -EINVAL;
>>>> +        }
>>>> +        /*
>>>> +         * Give a name with fd so any function printing out vbasedev->name
>>>> +         * will not break.
>>>> +         */
>>>> +        if (!vbasedev->name) {
>>>> +            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>>>> +        }
>>>> +    }
>>>> +#endif
>>>> +    return 0;
>>>> +}
>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>> index 44dc6848bf..fd30477275 100644
>>>> --- a/hw/vfio/iommufd.c
>>>> +++ b/hw/vfio/iommufd.c
>>>> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char
>*name,
>>> VFIODevice *vbasedev,
>>>>        uint32_t ioas_id;
>>>>        Error *err = NULL;
>>>>
>>>> -    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>>> -    if (devfd < 0) {
>>>> -        return devfd;
>>>> +    if (vbasedev->fd < 0) {
>>>> +        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>>> +        if (devfd < 0) {
>>>> +            return devfd;
>>>> +        }
>>>> +        vbasedev->fd = devfd;
>>>> +    } else {
>>>> +        devfd = vbasedev->fd;
>>>>        }
>>>> -    vbasedev->fd = devfd;
>>>>
>>>>        ret = iommufd_connect_and_bind(vbasedev, errp);
>>>>        if (ret) {
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index e9a426200b..f95725ed16 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -44,6 +44,7 @@
>>>>    #include "migration/blocker.h"
>>>>    #include "migration/qemu-file.h"
>>>>    #include "sysemu/iommufd.h"
>>>> +#include "monitor/monitor.h"
>>>>
>>>>    #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
>>>>
>>>> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error
>>> **errp)
>>>>        VFIODevice *vbasedev = &vdev->vbasedev;
>>>>        char *tmp, *subsys;
>>>>        Error *err = NULL;
>>>> -    struct stat st;
>>>>        int i, ret;
>>>>        bool is_mdev;
>>>>        char uuid[UUID_STR_LEN];
>>>>        char *name;
>>>>
>>>> -    if (!vbasedev->sysfsdev) {
>>>> +    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>>>>            if (!(~vdev->host.domain || ~vdev->host.bus ||
>>>>                  ~vdev->host.slot || ~vdev->host.function)) {
>>>>                error_setg(errp, "No provided host device");
>>>> +#ifdef CONFIG_IOMMUFD
>>>> +            error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
>>>> +                              "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
>>>> +                              "or -device vfio-pci,fd=DEVICE_FD\n");
>>>> +#else
>>>>                error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>>>                                  "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>>>> +#endif
>>>
>>> or simply :
>>>
>>>
>>>                 error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>>   +#ifdef CONFIG_IOMMUFD
>>>   +                              "or -device vfio-pci,fd=DEVICE_FD "
>>>   +#endif
>>>                                   "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>>
>> Good idea, will do.
>>
>>>
>>>
>>>
>>>>                return;
>>>>            }
>>>>            vbasedev->sysfsdev =
>>>> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>>> **errp)
>>>>                                vdev->host.slot, vdev->host.function);
>>>>        }
>>>>
>>>> -    if (stat(vbasedev->sysfsdev, &st) < 0) {
>>>> -        error_setg_errno(errp, errno, "no such host device");
>>>> -        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>>> +    if (vfio_device_get_name(vbasedev, errp)) {
>>>>            return;
>>>>        }
>>>> -
>>>> -    vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>>>        vbasedev->ops = &vfio_pci_ops;
>>>>        vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>>>        vbasedev->dev = DEVICE(vdev);
>>>> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
>>>>        vdev->host.bus = ~0U;
>>>>        vdev->host.slot = ~0U;
>>>>        vdev->host.function = ~0U;
>>>> +    vdev->vbasedev.fd = -1;
>>> We should probably move the all VFIODevice initializations :
>>>
>>>      vbasedev->ops = &vfio_pci_ops;
>>>      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>>      vbasedev->dev = DEVICE(vdev);
>>>
>>> under vfio_instance_init (should be called vfio_pci_instance_init).
>>>
>>> This is true for all other VFIO devices. May be not for this series,
>>> it can come later.
>>
>> Sure, Let me send a separate series addressing this.
>>
>>>
>>>
>>>>
>>>>        vdev->nv_gpudirect_clique = 0xFF;
>>>>
>>>> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
>>>>                                       qdev_prop_nv_gpudirect_clique, uint8_t),
>>>>        DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice,
>>> msix_relo,
>>>>                                    OFF_AUTOPCIBAR_OFF),
>>>> -    /*
>>>> -     * TODO - support passed fds... is this necessary?
>>>> -     * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
>>>> -     * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice,
>vfiogroupfd_name),
>>>> -     */
>>>>    #ifdef CONFIG_IOMMUFD
>>>>        DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
>>>>                         TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
>>>> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
>>>>        DEFINE_PROP_END_OF_LIST(),
>>>>    };
>>>>
>>>> +#ifdef CONFIG_IOMMUFD
>>>> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>>>> +{
>>>> +    VFIOPCIDevice *vdev = VFIO_PCI(obj);
>>>> +    int fd = -1;
>>>> +
>>>> +    fd = monitor_fd_param(monitor_cur(), str, errp);
>>>> +    if (fd == -1) {
>>>> +        error_prepend(errp, "Could not parse remote object fd %s:", str);
>>>> +        return;
>>>> +    }
>>>> +    vdev->vbasedev.fd = fd;
>>>
>>> We could introduce a common helper in hw/vfio/common.c to remove code
>>> duplication :
>>>
>>> #ifdef CONFIG_IOMMUFD
>>> static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
>>> {
>>>      vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
>>> }
>>> #endif
>>
>> Good suggestions! Will do.
>
>
>The idead is to isolate the iommufd addition in some common extension.
>I tried with a QOM interface but it is not really satifying. See
>previous email. I am not loosing faith though to find a solution for
>this multi inheritance issue with VFIO devices

I know, always keep this in mind
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 3dac5c167e..960a14e8d8 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -238,6 +238,7 @@  struct vfio_info_cap_header *
 vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
 struct vfio_info_cap_header *
 vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
 #endif
 
 bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 168847e7c5..d80aa58719 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -20,6 +20,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
 #include <sys/ioctl.h>
 
 #include "hw/vfio/vfio-common.h"
@@ -609,3 +610,36 @@  bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
 
     return ret;
 }
+
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
+{
+    struct stat st;
+
+    if (vbasedev->fd < 0) {
+        if (stat(vbasedev->sysfsdev, &st) < 0) {
+            error_setg_errno(errp, errno, "no such host device");
+            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+            return -errno;
+        }
+        /* User may specify a name, e.g: VFIO platform device */
+        if (!vbasedev->name) {
+            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
+        }
+    }
+#ifdef CONFIG_IOMMUFD
+    else {
+        if (!vbasedev->iommufd) {
+            error_setg(errp, "Use FD passing only with iommufd backend");
+            return -EINVAL;
+        }
+        /*
+         * Give a name with fd so any function printing out vbasedev->name
+         * will not break.
+         */
+        if (!vbasedev->name) {
+            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+        }
+    }
+#endif
+    return 0;
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 44dc6848bf..fd30477275 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -326,11 +326,15 @@  static int iommufd_attach_device(const char *name, VFIODevice *vbasedev,
     uint32_t ioas_id;
     Error *err = NULL;
 
-    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
-    if (devfd < 0) {
-        return devfd;
+    if (vbasedev->fd < 0) {
+        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
+        if (devfd < 0) {
+            return devfd;
+        }
+        vbasedev->fd = devfd;
+    } else {
+        devfd = vbasedev->fd;
     }
-    vbasedev->fd = devfd;
 
     ret = iommufd_connect_and_bind(vbasedev, errp);
     if (ret) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e9a426200b..f95725ed16 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -44,6 +44,7 @@ 
 #include "migration/blocker.h"
 #include "migration/qemu-file.h"
 #include "sysemu/iommufd.h"
+#include "monitor/monitor.h"
 
 #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
 
@@ -2934,18 +2935,23 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     VFIODevice *vbasedev = &vdev->vbasedev;
     char *tmp, *subsys;
     Error *err = NULL;
-    struct stat st;
     int i, ret;
     bool is_mdev;
     char uuid[UUID_STR_LEN];
     char *name;
 
-    if (!vbasedev->sysfsdev) {
+    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
         if (!(~vdev->host.domain || ~vdev->host.bus ||
               ~vdev->host.slot || ~vdev->host.function)) {
             error_setg(errp, "No provided host device");
+#ifdef CONFIG_IOMMUFD
+            error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
+                              "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
+                              "or -device vfio-pci,fd=DEVICE_FD\n");
+#else
             error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
                               "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
+#endif
             return;
         }
         vbasedev->sysfsdev =
@@ -2954,13 +2960,9 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
                             vdev->host.slot, vdev->host.function);
     }
 
-    if (stat(vbasedev->sysfsdev, &st) < 0) {
-        error_setg_errno(errp, errno, "no such host device");
-        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+    if (vfio_device_get_name(vbasedev, errp)) {
         return;
     }
-
-    vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
     vbasedev->ops = &vfio_pci_ops;
     vbasedev->type = VFIO_DEVICE_TYPE_PCI;
     vbasedev->dev = DEVICE(vdev);
@@ -3320,6 +3322,7 @@  static void vfio_instance_init(Object *obj)
     vdev->host.bus = ~0U;
     vdev->host.slot = ~0U;
     vdev->host.function = ~0U;
+    vdev->vbasedev.fd = -1;
 
     vdev->nv_gpudirect_clique = 0xFF;
 
@@ -3373,11 +3376,6 @@  static Property vfio_pci_dev_properties[] = {
                                    qdev_prop_nv_gpudirect_clique, uint8_t),
     DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
                                 OFF_AUTOPCIBAR_OFF),
-    /*
-     * TODO - support passed fds... is this necessary?
-     * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
-     * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
-     */
 #ifdef CONFIG_IOMMUFD
     DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
                      TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
@@ -3385,6 +3383,21 @@  static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+#ifdef CONFIG_IOMMUFD
+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
+{
+    VFIOPCIDevice *vdev = VFIO_PCI(obj);
+    int fd = -1;
+
+    fd = monitor_fd_param(monitor_cur(), str, errp);
+    if (fd == -1) {
+        error_prepend(errp, "Could not parse remote object fd %s:", str);
+        return;
+    }
+    vdev->vbasedev.fd = fd;
+}
+#endif
+
 static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -3392,6 +3405,9 @@  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
 
     dc->reset = vfio_pci_reset;
     device_class_set_props(dc, vfio_pci_dev_properties);
+#ifdef CONFIG_IOMMUFD
+    object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
+#endif
     dc->desc = "VFIO-based PCI device assignment";
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     pdc->realize = vfio_realize;