diff mbox series

[v2,06/12] vfio/pci: Introduce vfio_[attach/detach]_device

Message ID 20230926113255.1177834-7-zhenzhong.duan@intel.com (mailing list archive)
State New, archived
Headers show
Series Prerequisite change for IOMMUFD support | expand

Commit Message

Duan, Zhenzhong Sept. 26, 2023, 11:32 a.m. UTC
From: Eric Auger <eric.auger@redhat.com>

We want the VFIO devices to be able to use two different
IOMMU backends, the legacy VFIO one and the new iommufd one.

Introduce vfio_[attach/detach]_device which aim at hiding the
underlying IOMMU backend (IOCTLs, datatypes, ...).

Once vfio_attach_device completes, the device is attached
to a security context and its fd can be used. Conversely
When vfio_detach_device completes, the device has been
detached from the security context.

At the moment only the implementation based on the legacy
container/group exists. Let's use it from the vfio-pci device.
Subsequent patches will handle other devices.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 include/hw/vfio/vfio-common.h |  3 ++
 hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
 hw/vfio/pci.c                 | 50 +++-----------------------
 hw/vfio/trace-events          |  2 +-
 4 files changed, 77 insertions(+), 46 deletions(-)

Comments

Eric Auger Sept. 27, 2023, 7:35 a.m. UTC | #1
Hi Zhenzhong,

On 9/26/23 13:32, Zhenzhong Duan wrote:
> From: Eric Auger <eric.auger@redhat.com>
>
> We want the VFIO devices to be able to use two different
> IOMMU backends, the legacy VFIO one and the new iommufd one.
>
> Introduce vfio_[attach/detach]_device which aim at hiding the
> underlying IOMMU backend (IOCTLs, datatypes, ...).
>
> Once vfio_attach_device completes, the device is attached
> to a security context and its fd can be used. Conversely
> When vfio_detach_device completes, the device has been
> detached from the security context.
>
> At the moment only the implementation based on the legacy
> container/group exists. Let's use it from the vfio-pci device.
> Subsequent patches will handle other devices.

you may add: no functional change intended
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/vfio/vfio-common.h |  3 ++
>  hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.c                 | 50 +++-----------------------
>  hw/vfio/trace-events          |  2 +-
>  4 files changed, 77 insertions(+), 46 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index c4e7c3b4a7..12fbfbc37d 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>  struct vfio_device_info *vfio_get_device_info(int fd);
>  int vfio_get_device(VFIOGroup *group, const char *name,
>                      VFIODevice *vbasedev, Error **errp);
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> +                       AddressSpace *as, Error **errp);
> +void vfio_detach_device(VFIODevice *vbasedev);
>  
>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>  int vfio_kvm_device_del_fd(int fd, Error **errp);
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 959b1362bb..7f3798b152 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>      }
>      return vfio_eeh_container_op(container, op);
>  }
> +
> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
> +{
> +    char *tmp, group_path[PATH_MAX], *group_name;
> +    int ret, groupid;
> +    ssize_t len;
> +
> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> +    len = readlink(tmp, group_path, sizeof(group_path));
> +    g_free(tmp);
> +
> +    if (len <= 0 || len >= sizeof(group_path)) {
> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
> +        error_setg_errno(errp, -ret, "no iommu_group found");
> +        return ret;
> +    }
> +
> +    group_path[len] = 0;
> +
> +    group_name = basename(group_path);
> +    if (sscanf(group_name, "%d", &groupid) != 1) {
> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
> +        return -errno;
> +    }
> +    return groupid;
> +}
> +
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> +                       AddressSpace *as, Error **errp)
> +{
> +    int groupid = vfio_device_groupid(vbasedev, errp);
> +    VFIODevice *vbasedev_iter;
> +    VFIOGroup *group;
> +    int ret;
> +
> +    if (groupid < 0) {
> +        return groupid;
> +    }
> +
> +    trace_vfio_attach_device(vbasedev->name, groupid);
hum looking at that again, I was confused by the fact we passed the name
arg in

vfio_attach_device() whereas vbasedev->name was already filled. Looking at pci vfio_realize()
both are sometimes different

    if (!qemu_uuid_is_null(&vdev->vf_token)) {
        qemu_uuid_unparse(&vdev->vf_token, uuid);
        name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid);
    } else {
        name = g_strdup(vbasedev->name);
    }
This may be worth a doc comment.


> +
> +    group = vfio_get_group(groupid, as, errp);
> +    if (!group) {
> +        return -ENOENT;
> +    }
> +
> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> +            error_setg(errp, "device is already attached");
> +            vfio_put_group(group);
> +            return -EBUSY;
> +        }
> +    }
> +    ret = vfio_get_device(group, name, vbasedev, errp);
> +    if (ret) {
> +        vfio_put_group(group);
> +    }
> +
> +    return ret;
> +}
> +
> +void vfio_detach_device(VFIODevice *vbasedev)
> +{
> +    VFIOGroup *group = vbasedev->group;
> +
nit we could add a  trace_vfio_detach_device tracepoint here for symetry
sake
> +    vfio_put_base_device(vbasedev);
> +    vfio_put_group(group);
> +}
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 3b2ca3c24c..fe56789893 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>  
>  static void vfio_put_device(VFIOPCIDevice *vdev)
>  {
> +    vfio_detach_device(&vdev->vbasedev);
> +
>      g_free(vdev->vbasedev.name);
>      g_free(vdev->msix);
> -
> -    vfio_put_base_device(&vdev->vbasedev);
>  }
>  
>  static void vfio_err_notifier_handler(void *opaque)
> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>  {
>      VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>      VFIODevice *vbasedev = &vdev->vbasedev;
> -    VFIODevice *vbasedev_iter;
> -    VFIOGroup *group;
> -    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
> +    char *tmp, *subsys;
>      Error *err = NULL;
> -    ssize_t len;
>      struct stat st;
> -    int groupid;
>      int i, ret;
>      bool is_mdev;
>      char uuid[UUID_FMT_LEN];
> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>      vbasedev->dev = DEVICE(vdev);
>  
> -    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> -    len = readlink(tmp, group_path, sizeof(group_path));
> -    g_free(tmp);
> -
> -    if (len <= 0 || len >= sizeof(group_path)) {
> -        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
> -                         "no iommu_group found");
> -        goto error;
> -    }
> -
> -    group_path[len] = 0;
> -
> -    group_name = basename(group_path);
> -    if (sscanf(group_name, "%d", &groupid) != 1) {
> -        error_setg_errno(errp, errno, "failed to read %s", group_path);
> -        goto error;
> -    }
> -
> -    trace_vfio_realize(vbasedev->name, groupid);
> -
> -    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp);
> -    if (!group) {
> -        goto error;
> -    }
> -
> -    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> -        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> -            error_setg(errp, "device is already attached");
> -            vfio_put_group(group);
> -            goto error;
> -        }
> -    }
> -
>      /*
>       * Mediated devices *might* operate compatibly with discarding of RAM, but
>       * we cannot know for certain, it depends on whether the mdev vendor driver
> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>      if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>          error_setg(errp, "x-balloon-allowed only potentially compatible "
>                     "with mdev devices");
> -        vfio_put_group(group);
>          goto error;
>      }
>  
> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>          name = g_strdup(vbasedev->name);
>      }
>  
> -    ret = vfio_get_device(group, name, vbasedev, errp);
> +    ret = vfio_attach_device(name, vbasedev,
> +                             pci_device_iommu_address_space(pdev), errp);
>      g_free(name);
>      if (ret) {
> -        vfio_put_group(group);
>          goto error;
>      }
>  
> @@ -3304,7 +3266,6 @@ error:
>  static void vfio_instance_finalize(Object *obj)
>  {
>      VFIOPCIDevice *vdev = VFIO_PCI(obj);
> -    VFIOGroup *group = vdev->vbasedev.group;
>  
>      vfio_display_finalize(vdev);
>      vfio_bars_finalize(vdev);
> @@ -3318,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj)
>       * g_free(vdev->igd_opregion);
>       */
>      vfio_put_device(vdev);
> -    vfio_put_group(group);
>  }
>  
>  static void vfio_exitfn(PCIDevice *pdev)
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index e64ca4a019..e710026a73 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -37,7 +37,7 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int
>  vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
>  vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
>  vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
> -vfio_realize(const char *name, int group_id) " (%s) group %d"
> +vfio_attach_device(const char *name, int group_id) " (%s) group %d"
>  vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
>  vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
>  vfio_pci_reset(const char *name) " (%s)"
Thanks

Eric
Eric Auger Sept. 27, 2023, 9:01 a.m. UTC | #2
Hi Zhenzhong,

On 9/26/23 13:32, Zhenzhong Duan wrote:
> From: Eric Auger <eric.auger@redhat.com>
>
> We want the VFIO devices to be able to use two different
> IOMMU backends, the legacy VFIO one and the new iommufd one.
>
> Introduce vfio_[attach/detach]_device which aim at hiding the
> underlying IOMMU backend (IOCTLs, datatypes, ...).
>
> Once vfio_attach_device completes, the device is attached
> to a security context and its fd can be used. Conversely
> When vfio_detach_device completes, the device has been
> detached from the security context.
>
> At the moment only the implementation based on the legacy
> container/group exists. Let's use it from the vfio-pci device.
> Subsequent patches will handle other devices.
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/vfio/vfio-common.h |  3 ++
>  hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.c                 | 50 +++-----------------------
>  hw/vfio/trace-events          |  2 +-
>  4 files changed, 77 insertions(+), 46 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index c4e7c3b4a7..12fbfbc37d 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>  struct vfio_device_info *vfio_get_device_info(int fd);
>  int vfio_get_device(VFIOGroup *group, const char *name,
>                      VFIODevice *vbasedev, Error **errp);
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> +                       AddressSpace *as, Error **errp);
> +void vfio_detach_device(VFIODevice *vbasedev);
>  
>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>  int vfio_kvm_device_del_fd(int fd, Error **errp);
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 959b1362bb..7f3798b152 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>      }
>      return vfio_eeh_container_op(container, op);
>  }
> +
> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
> +{
> +    char *tmp, group_path[PATH_MAX], *group_name;
> +    int ret, groupid;
> +    ssize_t len;
> +
> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> +    len = readlink(tmp, group_path, sizeof(group_path));
> +    g_free(tmp);
> +
> +    if (len <= 0 || len >= sizeof(group_path)) {
> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
> +        error_setg_errno(errp, -ret, "no iommu_group found");
> +        return ret;
> +    }
> +
> +    group_path[len] = 0;
> +
> +    group_name = basename(group_path);
> +    if (sscanf(group_name, "%d", &groupid) != 1) {
> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
> +        return -errno;
> +    }
> +    return groupid;
> +}
> +
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> +                       AddressSpace *as, Error **errp)
> +{
> +    int groupid = vfio_device_groupid(vbasedev, errp);
> +    VFIODevice *vbasedev_iter;
> +    VFIOGroup *group;
> +    int ret;
> +
> +    if (groupid < 0) {
> +        return groupid;
> +    }
> +
> +    trace_vfio_attach_device(vbasedev->name, groupid);
> +
> +    group = vfio_get_group(groupid, as, errp);
> +    if (!group) {
> +        return -ENOENT;
> +    }
> +
> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> +            error_setg(errp, "device is already attached");
> +            vfio_put_group(group);
> +            return -EBUSY;
> +        }
> +    }
> +    ret = vfio_get_device(group, name, vbasedev, errp);
> +    if (ret) {
> +        vfio_put_group(group);
> +    }
> +
> +    return ret;
> +}
> +
> +void vfio_detach_device(VFIODevice *vbasedev)
> +{
> +    VFIOGroup *group = vbasedev->group;
> +
> +    vfio_put_base_device(vbasedev);
> +    vfio_put_group(group);
> +}
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 3b2ca3c24c..fe56789893 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>  
>  static void vfio_put_device(VFIOPCIDevice *vdev)
>  {
> +    vfio_detach_device(&vdev->vbasedev);
> +
>      g_free(vdev->vbasedev.name);
>      g_free(vdev->msix);
> -
> -    vfio_put_base_device(&vdev->vbasedev);
>  }
>  
>  static void vfio_err_notifier_handler(void *opaque)
> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>  {
>      VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>      VFIODevice *vbasedev = &vdev->vbasedev;
> -    VFIODevice *vbasedev_iter;
> -    VFIOGroup *group;
> -    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
> +    char *tmp, *subsys;
>      Error *err = NULL;
> -    ssize_t len;
>      struct stat st;
> -    int groupid;
>      int i, ret;
>      bool is_mdev;
>      char uuid[UUID_FMT_LEN];
> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>      vbasedev->dev = DEVICE(vdev);
>  
> -    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> -    len = readlink(tmp, group_path, sizeof(group_path));
> -    g_free(tmp);
> -
> -    if (len <= 0 || len >= sizeof(group_path)) {
> -        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
> -                         "no iommu_group found");
> -        goto error;
> -    }
> -
> -    group_path[len] = 0;
> -
> -    group_name = basename(group_path);
> -    if (sscanf(group_name, "%d", &groupid) != 1) {
> -        error_setg_errno(errp, errno, "failed to read %s", group_path);
> -        goto error;
> -    }
> -
> -    trace_vfio_realize(vbasedev->name, groupid);
> -
> -    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp);
> -    if (!group) {
> -        goto error;
> -    }
> -
> -    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> -        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> -            error_setg(errp, "device is already attached");
> -            vfio_put_group(group);
> -            goto error;
> -        }
> -    }
> -
>      /*
>       * Mediated devices *might* operate compatibly with discarding of RAM, but
>       * we cannot know for certain, it depends on whether the mdev vendor driver
> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>      if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>          error_setg(errp, "x-balloon-allowed only potentially compatible "
>                     "with mdev devices");
> -        vfio_put_group(group);
>          goto error;
>      }
>  
> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>          name = g_strdup(vbasedev->name);
>      }
>  
> -    ret = vfio_get_device(group, name, vbasedev, errp);
> +    ret = vfio_attach_device(name, vbasedev,
> +                             pci_device_iommu_address_space(pdev), errp);
>      g_free(name);
>      if (ret) {
> -        vfio_put_group(group);
independently on this patch, I think in case of error we leak
vbasedev->name. Please have a look and if confirmed you can send a
separate patch.

Also I think if any subsequent action fail we shoudl properly detach the
detach the device so introduce an extra error goto label, my bad sorry.

Thanks

Eric
>          goto error;
>      }
>  
> @@ -3304,7 +3266,6 @@ error:
>  static void vfio_instance_finalize(Object *obj)
>  {
>      VFIOPCIDevice *vdev = VFIO_PCI(obj);
> -    VFIOGroup *group = vdev->vbasedev.group;
>  
>      vfio_display_finalize(vdev);
>      vfio_bars_finalize(vdev);
> @@ -3318,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj)
>       * g_free(vdev->igd_opregion);
>       */
>      vfio_put_device(vdev);
> -    vfio_put_group(group);
>  }
>  
>  static void vfio_exitfn(PCIDevice *pdev)
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index e64ca4a019..e710026a73 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -37,7 +37,7 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int
>  vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
>  vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
>  vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
> -vfio_realize(const char *name, int group_id) " (%s) group %d"
> +vfio_attach_device(const char *name, int group_id) " (%s) group %d"
>  vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
>  vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
>  vfio_pci_reset(const char *name) " (%s)"
Duan, Zhenzhong Sept. 27, 2023, 9:58 a.m. UTC | #3
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Sent: Wednesday, September 27, 2023 3:36 PM
>Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>
>Hi Zhenzhong,
>
>On 9/26/23 13:32, Zhenzhong Duan wrote:
>> From: Eric Auger <eric.auger@redhat.com>
>>
>> We want the VFIO devices to be able to use two different
>> IOMMU backends, the legacy VFIO one and the new iommufd one.
>>
>> Introduce vfio_[attach/detach]_device which aim at hiding the
>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>
>> Once vfio_attach_device completes, the device is attached
>> to a security context and its fd can be used. Conversely
>> When vfio_detach_device completes, the device has been
>> detached from the security context.
>>
>> At the moment only the implementation based on the legacy
>> container/group exists. Let's use it from the vfio-pci device.
>> Subsequent patches will handle other devices.
>
>you may add: no functional change intended

Will do.

>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  include/hw/vfio/vfio-common.h |  3 ++
>>  hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>>  hw/vfio/pci.c                 | 50 +++-----------------------
>>  hw/vfio/trace-events          |  2 +-
>>  4 files changed, 77 insertions(+), 46 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index c4e7c3b4a7..12fbfbc37d 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>>  struct vfio_device_info *vfio_get_device_info(int fd);
>>  int vfio_get_device(VFIOGroup *group, const char *name,
>>                      VFIODevice *vbasedev, Error **errp);
>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> +                       AddressSpace *as, Error **errp);
>> +void vfio_detach_device(VFIODevice *vbasedev);
>>
>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 959b1362bb..7f3798b152 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>>      }
>>      return vfio_eeh_container_op(container, op);
>>  }
>> +
>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>> +{
>> +    char *tmp, group_path[PATH_MAX], *group_name;
>> +    int ret, groupid;
>> +    ssize_t len;
>> +
>> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>> +    len = readlink(tmp, group_path, sizeof(group_path));
>> +    g_free(tmp);
>> +
>> +    if (len <= 0 || len >= sizeof(group_path)) {
>> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
>> +        error_setg_errno(errp, -ret, "no iommu_group found");
>> +        return ret;
>> +    }
>> +
>> +    group_path[len] = 0;
>> +
>> +    group_name = basename(group_path);
>> +    if (sscanf(group_name, "%d", &groupid) != 1) {
>> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
>> +        return -errno;
>> +    }
>> +    return groupid;
>> +}
>> +
>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> +                       AddressSpace *as, Error **errp)
>> +{
>> +    int groupid = vfio_device_groupid(vbasedev, errp);
>> +    VFIODevice *vbasedev_iter;
>> +    VFIOGroup *group;
>> +    int ret;
>> +
>> +    if (groupid < 0) {
>> +        return groupid;
>> +    }
>> +
>> +    trace_vfio_attach_device(vbasedev->name, groupid);
>hum looking at that again, I was confused by the fact we passed the name
>arg in
>
>vfio_attach_device() whereas vbasedev->name was already filled. Looking at pci
>vfio_realize()
>both are sometimes different
>
>    if (!qemu_uuid_is_null(&vdev->vf_token)) {
>        qemu_uuid_unparse(&vdev->vf_token, uuid);
>        name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid);
>    } else {
>        name = g_strdup(vbasedev->name);
>    }
>This may be worth a doc comment.

Yes, agree this is confusing. Just want to ask about the doc comment?
Should I create a vfio doc or just a small comment on call site of vfio_attach_device()?

Thanks
Zhenzhong
Eric Auger Sept. 27, 2023, 10:02 a.m. UTC | #4
On 9/27/23 11:58, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: Wednesday, September 27, 2023 3:36 PM
>> Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>>
>> Hi Zhenzhong,
>>
>> On 9/26/23 13:32, Zhenzhong Duan wrote:
>>> From: Eric Auger <eric.auger@redhat.com>
>>>
>>> We want the VFIO devices to be able to use two different
>>> IOMMU backends, the legacy VFIO one and the new iommufd one.
>>>
>>> Introduce vfio_[attach/detach]_device which aim at hiding the
>>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>>
>>> Once vfio_attach_device completes, the device is attached
>>> to a security context and its fd can be used. Conversely
>>> When vfio_detach_device completes, the device has been
>>> detached from the security context.
>>>
>>> At the moment only the implementation based on the legacy
>>> container/group exists. Let's use it from the vfio-pci device.
>>> Subsequent patches will handle other devices.
>> you may add: no functional change intended
> Will do.
>
>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>  include/hw/vfio/vfio-common.h |  3 ++
>>>  hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>>>  hw/vfio/pci.c                 | 50 +++-----------------------
>>>  hw/vfio/trace-events          |  2 +-
>>>  4 files changed, 77 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index c4e7c3b4a7..12fbfbc37d 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>>>  struct vfio_device_info *vfio_get_device_info(int fd);
>>>  int vfio_get_device(VFIOGroup *group, const char *name,
>>>                      VFIODevice *vbasedev, Error **errp);
>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>> +                       AddressSpace *as, Error **errp);
>>> +void vfio_detach_device(VFIODevice *vbasedev);
>>>
>>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index 959b1362bb..7f3798b152 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>>>      }
>>>      return vfio_eeh_container_op(container, op);
>>>  }
>>> +
>>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> +    char *tmp, group_path[PATH_MAX], *group_name;
>>> +    int ret, groupid;
>>> +    ssize_t len;
>>> +
>>> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>> +    len = readlink(tmp, group_path, sizeof(group_path));
>>> +    g_free(tmp);
>>> +
>>> +    if (len <= 0 || len >= sizeof(group_path)) {
>>> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
>>> +        error_setg_errno(errp, -ret, "no iommu_group found");
>>> +        return ret;
>>> +    }
>>> +
>>> +    group_path[len] = 0;
>>> +
>>> +    group_name = basename(group_path);
>>> +    if (sscanf(group_name, "%d", &groupid) != 1) {
>>> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
>>> +        return -errno;
>>> +    }
>>> +    return groupid;
>>> +}
>>> +
>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>> +                       AddressSpace *as, Error **errp)
>>> +{
>>> +    int groupid = vfio_device_groupid(vbasedev, errp);
>>> +    VFIODevice *vbasedev_iter;
>>> +    VFIOGroup *group;
>>> +    int ret;
>>> +
>>> +    if (groupid < 0) {
>>> +        return groupid;
>>> +    }
>>> +
>>> +    trace_vfio_attach_device(vbasedev->name, groupid);
>> hum looking at that again, I was confused by the fact we passed the name
>> arg in
>>
>> vfio_attach_device() whereas vbasedev->name was already filled. Looking at pci
>> vfio_realize()
>> both are sometimes different
>>
>>    if (!qemu_uuid_is_null(&vdev->vf_token)) {
>>        qemu_uuid_unparse(&vdev->vf_token, uuid);
>>        name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid);
>>    } else {
>>        name = g_strdup(vbasedev->name);
>>    }
>> This may be worth a doc comment.
> Yes, agree this is confusing. Just want to ask about the doc comment?
> Should I create a vfio doc or just a small comment on call site of vfio_attach_device()?
I meant a comment associated to this vfio_attach_device helper
indicating what is the purpose/semantic of name versus vbasedev->name

See my last comment on the ccw conversion which is even more confusing now to me :-(

Eric

>
> Thanks
> Zhenzhong
Duan, Zhenzhong Sept. 27, 2023, 10:07 a.m. UTC | #5
Hi Eric,

>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Sent: Wednesday, September 27, 2023 5:02 PM
>Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>
>Hi Zhenzhong,
>
>On 9/26/23 13:32, Zhenzhong Duan wrote:
>> From: Eric Auger <eric.auger@redhat.com>
>>
>> We want the VFIO devices to be able to use two different
>> IOMMU backends, the legacy VFIO one and the new iommufd one.
>>
>> Introduce vfio_[attach/detach]_device which aim at hiding the
>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>
>> Once vfio_attach_device completes, the device is attached
>> to a security context and its fd can be used. Conversely
>> When vfio_detach_device completes, the device has been
>> detached from the security context.
>>
>> At the moment only the implementation based on the legacy
>> container/group exists. Let's use it from the vfio-pci device.
>> Subsequent patches will handle other devices.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  include/hw/vfio/vfio-common.h |  3 ++
>>  hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>>  hw/vfio/pci.c                 | 50 +++-----------------------
>>  hw/vfio/trace-events          |  2 +-
>>  4 files changed, 77 insertions(+), 46 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index c4e7c3b4a7..12fbfbc37d 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>>  struct vfio_device_info *vfio_get_device_info(int fd);
>>  int vfio_get_device(VFIOGroup *group, const char *name,
>>                      VFIODevice *vbasedev, Error **errp);
>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> +                       AddressSpace *as, Error **errp);
>> +void vfio_detach_device(VFIODevice *vbasedev);
>>
>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 959b1362bb..7f3798b152 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>>      }
>>      return vfio_eeh_container_op(container, op);
>>  }
>> +
>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>> +{
>> +    char *tmp, group_path[PATH_MAX], *group_name;
>> +    int ret, groupid;
>> +    ssize_t len;
>> +
>> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>> +    len = readlink(tmp, group_path, sizeof(group_path));
>> +    g_free(tmp);
>> +
>> +    if (len <= 0 || len >= sizeof(group_path)) {
>> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
>> +        error_setg_errno(errp, -ret, "no iommu_group found");
>> +        return ret;
>> +    }
>> +
>> +    group_path[len] = 0;
>> +
>> +    group_name = basename(group_path);
>> +    if (sscanf(group_name, "%d", &groupid) != 1) {
>> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
>> +        return -errno;
>> +    }
>> +    return groupid;
>> +}
>> +
>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> +                       AddressSpace *as, Error **errp)
>> +{
>> +    int groupid = vfio_device_groupid(vbasedev, errp);
>> +    VFIODevice *vbasedev_iter;
>> +    VFIOGroup *group;
>> +    int ret;
>> +
>> +    if (groupid < 0) {
>> +        return groupid;
>> +    }
>> +
>> +    trace_vfio_attach_device(vbasedev->name, groupid);
>> +
>> +    group = vfio_get_group(groupid, as, errp);
>> +    if (!group) {
>> +        return -ENOENT;
>> +    }
>> +
>> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> +        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>> +            error_setg(errp, "device is already attached");
>> +            vfio_put_group(group);
>> +            return -EBUSY;
>> +        }
>> +    }
>> +    ret = vfio_get_device(group, name, vbasedev, errp);
>> +    if (ret) {
>> +        vfio_put_group(group);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +void vfio_detach_device(VFIODevice *vbasedev)
>> +{
>> +    VFIOGroup *group = vbasedev->group;
>> +
>> +    vfio_put_base_device(vbasedev);
>> +    vfio_put_group(group);
>> +}
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 3b2ca3c24c..fe56789893 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice
>*vdev, Error **errp)
>>
>>  static void vfio_put_device(VFIOPCIDevice *vdev)
>>  {
>> +    vfio_detach_device(&vdev->vbasedev);
>> +
>>      g_free(vdev->vbasedev.name);
>>      g_free(vdev->msix);
>> -
>> -    vfio_put_base_device(&vdev->vbasedev);
>>  }
>>
>>  static void vfio_err_notifier_handler(void *opaque)
>> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>  {
>>      VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>>      VFIODevice *vbasedev = &vdev->vbasedev;
>> -    VFIODevice *vbasedev_iter;
>> -    VFIOGroup *group;
>> -    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
>> +    char *tmp, *subsys;
>>      Error *err = NULL;
>> -    ssize_t len;
>>      struct stat st;
>> -    int groupid;
>>      int i, ret;
>>      bool is_mdev;
>>      char uuid[UUID_FMT_LEN];
>> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>      vbasedev->dev = DEVICE(vdev);
>>
>> -    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>> -    len = readlink(tmp, group_path, sizeof(group_path));
>> -    g_free(tmp);
>> -
>> -    if (len <= 0 || len >= sizeof(group_path)) {
>> -        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>> -                         "no iommu_group found");
>> -        goto error;
>> -    }
>> -
>> -    group_path[len] = 0;
>> -
>> -    group_name = basename(group_path);
>> -    if (sscanf(group_name, "%d", &groupid) != 1) {
>> -        error_setg_errno(errp, errno, "failed to read %s", group_path);
>> -        goto error;
>> -    }
>> -
>> -    trace_vfio_realize(vbasedev->name, groupid);
>> -
>> -    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev),
>errp);
>> -    if (!group) {
>> -        goto error;
>> -    }
>> -
>> -    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> -        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>> -            error_setg(errp, "device is already attached");
>> -            vfio_put_group(group);
>> -            goto error;
>> -        }
>> -    }
>> -
>>      /*
>>       * Mediated devices *might* operate compatibly with discarding of RAM, but
>>       * we cannot know for certain, it depends on whether the mdev vendor
>driver
>> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>      if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>>          error_setg(errp, "x-balloon-allowed only potentially compatible "
>>                     "with mdev devices");
>> -        vfio_put_group(group);
>>          goto error;
>>      }
>>
>> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>          name = g_strdup(vbasedev->name);
>>      }
>>
>> -    ret = vfio_get_device(group, name, vbasedev, errp);
>> +    ret = vfio_attach_device(name, vbasedev,
>> +                             pci_device_iommu_address_space(pdev), errp);
>>      g_free(name);
>>      if (ret) {
>> -        vfio_put_group(group);
>independently on this patch, I think in case of error we leak
>vbasedev->name. Please have a look and if confirmed you can send a
>separate patch.

In case of error, vbasedev->name is freed in vfio_put_device().

>
>Also I think if any subsequent action fail we shoudl properly detach the
>detach the device so introduce an extra error goto label, my bad sorry.

vfio_detach_device is called in vfio_instance_finalize(), this is just to follow
the old code, group and container resource allocated in vfio_realize() are
freed in vfio_instance_finalize().

I agree this is strange, but I guess there may be some reason I'm unclear.

Thanks
Zhenzhong
Cédric Le Goater Sept. 27, 2023, 10:16 a.m. UTC | #6
On 9/26/23 13:32, Zhenzhong Duan wrote:
> From: Eric Auger <eric.auger@redhat.com>
> 
> We want the VFIO devices to be able to use two different
> IOMMU backends, the legacy VFIO one and the new iommufd one.
> 
> Introduce vfio_[attach/detach]_device which aim at hiding the
> underlying IOMMU backend (IOCTLs, datatypes, ...).
> 
> Once vfio_attach_device completes, the device is attached
> to a security context and its fd can be used. Conversely
> When vfio_detach_device completes, the device has been
> detached from the security context.
> 
> At the moment only the implementation based on the legacy
> container/group exists. Let's use it from the vfio-pci device.
> Subsequent patches will handle other devices.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   include/hw/vfio/vfio-common.h |  3 ++
>   hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>   hw/vfio/pci.c                 | 50 +++-----------------------
>   hw/vfio/trace-events          |  2 +-
>   4 files changed, 77 insertions(+), 46 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index c4e7c3b4a7..12fbfbc37d 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>   struct vfio_device_info *vfio_get_device_info(int fd);
>   int vfio_get_device(VFIOGroup *group, const char *name,
>                       VFIODevice *vbasedev, Error **errp);
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> +                       AddressSpace *as, Error **errp);
> +void vfio_detach_device(VFIODevice *vbasedev);
>   
>   int vfio_kvm_device_add_fd(int fd, Error **errp);
>   int vfio_kvm_device_del_fd(int fd, Error **errp);
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 959b1362bb..7f3798b152 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>       }
>       return vfio_eeh_container_op(container, op);
>   }
> +
> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
> +{
> +    char *tmp, group_path[PATH_MAX], *group_name;
> +    int ret, groupid;
> +    ssize_t len;
> +
> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> +    len = readlink(tmp, group_path, sizeof(group_path));
> +    g_free(tmp);
> +
> +    if (len <= 0 || len >= sizeof(group_path)) {
> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
> +        error_setg_errno(errp, -ret, "no iommu_group found");
> +        return ret;
> +    }
> +
> +    group_path[len] = 0;
> +
> +    group_name = basename(group_path);
> +    if (sscanf(group_name, "%d", &groupid) != 1) {
> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
> +        return -errno;
> +    }
> +    return groupid;
> +}
> +
> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
> +                       AddressSpace *as, Error **errp)
> +{
> +    int groupid = vfio_device_groupid(vbasedev, errp);
> +    VFIODevice *vbasedev_iter;
> +    VFIOGroup *group;
> +    int ret;
> +
> +    if (groupid < 0) {
> +        return groupid;
> +    }
> +
> +    trace_vfio_attach_device(vbasedev->name, groupid);
> +
> +    group = vfio_get_group(groupid, as, errp);
> +    if (!group) {
> +        return -ENOENT;
> +    }
> +
> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> +            error_setg(errp, "device is already attached");
> +            vfio_put_group(group);
> +            return -EBUSY;
> +        }
> +    }
> +    ret = vfio_get_device(group, name, vbasedev, errp);
> +    if (ret) {
> +        vfio_put_group(group);
> +    }
> +
> +    return ret;
> +}
> +
> +void vfio_detach_device(VFIODevice *vbasedev)
> +{
> +    VFIOGroup *group = vbasedev->group;
> +
> +    vfio_put_base_device(vbasedev);
> +    vfio_put_group(group);
> +}
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 3b2ca3c24c..fe56789893 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>   
>   static void vfio_put_device(VFIOPCIDevice *vdev)
>   {
> +    vfio_detach_device(&vdev->vbasedev);
> +
>       g_free(vdev->vbasedev.name);
>       g_free(vdev->msix);
> -
> -    vfio_put_base_device(&vdev->vbasedev);
>   }
>   
>   static void vfio_err_notifier_handler(void *opaque)
> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>   {
>       VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>       VFIODevice *vbasedev = &vdev->vbasedev;
> -    VFIODevice *vbasedev_iter;
> -    VFIOGroup *group;
> -    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
> +    char *tmp, *subsys;
>       Error *err = NULL;
> -    ssize_t len;
>       struct stat st;
> -    int groupid;
>       int i, ret;
>       bool is_mdev;
>       char uuid[UUID_FMT_LEN];
> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>       vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>       vbasedev->dev = DEVICE(vdev);
>   
> -    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
> -    len = readlink(tmp, group_path, sizeof(group_path));
> -    g_free(tmp);
> -
> -    if (len <= 0 || len >= sizeof(group_path)) {
> -        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
> -                         "no iommu_group found");
> -        goto error;
> -    }
> -
> -    group_path[len] = 0;
> -
> -    group_name = basename(group_path);
> -    if (sscanf(group_name, "%d", &groupid) != 1) {
> -        error_setg_errno(errp, errno, "failed to read %s", group_path);
> -        goto error;
> -    }
> -
> -    trace_vfio_realize(vbasedev->name, groupid);
> -
> -    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp);
> -    if (!group) {
> -        goto error;
> -    }
> -
> -    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> -        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
> -            error_setg(errp, "device is already attached");
> -            vfio_put_group(group);
> -            goto error;
> -        }
> -    }
> -
>       /*
>        * Mediated devices *might* operate compatibly with discarding of RAM, but
>        * we cannot know for certain, it depends on whether the mdev vendor driver
> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>       if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>           error_setg(errp, "x-balloon-allowed only potentially compatible "
>                      "with mdev devices");
> -        vfio_put_group(group);
>           goto error;
>       }
>   
> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>           name = g_strdup(vbasedev->name);
>       }
>   
> -    ret = vfio_get_device(group, name, vbasedev, errp);
> +    ret = vfio_attach_device(name, vbasedev,
> +                             pci_device_iommu_address_space(pdev), errp);
>       g_free(name);
>       if (ret) {
> -        vfio_put_group(group);
>           goto error;
>       }
>   
> @@ -3304,7 +3266,6 @@ error:
>   static void vfio_instance_finalize(Object *obj)
>   {
>       VFIOPCIDevice *vdev = VFIO_PCI(obj);
> -    VFIOGroup *group = vdev->vbasedev.group;
>   
>       vfio_display_finalize(vdev);
>       vfio_bars_finalize(vdev);
> @@ -3318,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj)
>        * g_free(vdev->igd_opregion);
>        */
>       vfio_put_device(vdev);

Please rebase v2 on vfio-next. I merged your patch since :)

Thanks,

C.



> -    vfio_put_group(group);
>   }
>   
>   static void vfio_exitfn(PCIDevice *pdev)
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index e64ca4a019..e710026a73 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -37,7 +37,7 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int
>   vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
>   vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
>   vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
> -vfio_realize(const char *name, int group_id) " (%s) group %d"
> +vfio_attach_device(const char *name, int group_id) " (%s) group %d"
>   vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
>   vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
>   vfio_pci_reset(const char *name) " (%s)"
Duan, Zhenzhong Sept. 27, 2023, 12:15 p.m. UTC | #7
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Wednesday, September 27, 2023 6:17 PM
>Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>
>On 9/26/23 13:32, Zhenzhong Duan wrote:
>> From: Eric Auger <eric.auger@redhat.com>
>>
>> We want the VFIO devices to be able to use two different
>> IOMMU backends, the legacy VFIO one and the new iommufd one.
>>
>> Introduce vfio_[attach/detach]_device which aim at hiding the
>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>
>> Once vfio_attach_device completes, the device is attached
>> to a security context and its fd can be used. Conversely
>> When vfio_detach_device completes, the device has been
>> detached from the security context.
>>
>> At the moment only the implementation based on the legacy
>> container/group exists. Let's use it from the vfio-pci device.
>> Subsequent patches will handle other devices.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   include/hw/vfio/vfio-common.h |  3 ++
>>   hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>>   hw/vfio/pci.c                 | 50 +++-----------------------
>>   hw/vfio/trace-events          |  2 +-
>>   4 files changed, 77 insertions(+), 46 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index c4e7c3b4a7..12fbfbc37d 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>>   struct vfio_device_info *vfio_get_device_info(int fd);
>>   int vfio_get_device(VFIOGroup *group, const char *name,
>>                       VFIODevice *vbasedev, Error **errp);
>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> +                       AddressSpace *as, Error **errp);
>> +void vfio_detach_device(VFIODevice *vbasedev);
>>
>>   int vfio_kvm_device_add_fd(int fd, Error **errp);
>>   int vfio_kvm_device_del_fd(int fd, Error **errp);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 959b1362bb..7f3798b152 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>>       }
>>       return vfio_eeh_container_op(container, op);
>>   }
>> +
>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>> +{
>> +    char *tmp, group_path[PATH_MAX], *group_name;
>> +    int ret, groupid;
>> +    ssize_t len;
>> +
>> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>> +    len = readlink(tmp, group_path, sizeof(group_path));
>> +    g_free(tmp);
>> +
>> +    if (len <= 0 || len >= sizeof(group_path)) {
>> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
>> +        error_setg_errno(errp, -ret, "no iommu_group found");
>> +        return ret;
>> +    }
>> +
>> +    group_path[len] = 0;
>> +
>> +    group_name = basename(group_path);
>> +    if (sscanf(group_name, "%d", &groupid) != 1) {
>> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
>> +        return -errno;
>> +    }
>> +    return groupid;
>> +}
>> +
>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> +                       AddressSpace *as, Error **errp)
>> +{
>> +    int groupid = vfio_device_groupid(vbasedev, errp);
>> +    VFIODevice *vbasedev_iter;
>> +    VFIOGroup *group;
>> +    int ret;
>> +
>> +    if (groupid < 0) {
>> +        return groupid;
>> +    }
>> +
>> +    trace_vfio_attach_device(vbasedev->name, groupid);
>> +
>> +    group = vfio_get_group(groupid, as, errp);
>> +    if (!group) {
>> +        return -ENOENT;
>> +    }
>> +
>> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> +        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>> +            error_setg(errp, "device is already attached");
>> +            vfio_put_group(group);
>> +            return -EBUSY;
>> +        }
>> +    }
>> +    ret = vfio_get_device(group, name, vbasedev, errp);
>> +    if (ret) {
>> +        vfio_put_group(group);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +void vfio_detach_device(VFIODevice *vbasedev)
>> +{
>> +    VFIOGroup *group = vbasedev->group;
>> +
>> +    vfio_put_base_device(vbasedev);
>> +    vfio_put_group(group);
>> +}
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 3b2ca3c24c..fe56789893 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice
>*vdev, Error **errp)
>>
>>   static void vfio_put_device(VFIOPCIDevice *vdev)
>>   {
>> +    vfio_detach_device(&vdev->vbasedev);
>> +
>>       g_free(vdev->vbasedev.name);
>>       g_free(vdev->msix);
>> -
>> -    vfio_put_base_device(&vdev->vbasedev);
>>   }
>>
>>   static void vfio_err_notifier_handler(void *opaque)
>> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>   {
>>       VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>>       VFIODevice *vbasedev = &vdev->vbasedev;
>> -    VFIODevice *vbasedev_iter;
>> -    VFIOGroup *group;
>> -    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
>> +    char *tmp, *subsys;
>>       Error *err = NULL;
>> -    ssize_t len;
>>       struct stat st;
>> -    int groupid;
>>       int i, ret;
>>       bool is_mdev;
>>       char uuid[UUID_FMT_LEN];
>> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>       vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>       vbasedev->dev = DEVICE(vdev);
>>
>> -    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>> -    len = readlink(tmp, group_path, sizeof(group_path));
>> -    g_free(tmp);
>> -
>> -    if (len <= 0 || len >= sizeof(group_path)) {
>> -        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>> -                         "no iommu_group found");
>> -        goto error;
>> -    }
>> -
>> -    group_path[len] = 0;
>> -
>> -    group_name = basename(group_path);
>> -    if (sscanf(group_name, "%d", &groupid) != 1) {
>> -        error_setg_errno(errp, errno, "failed to read %s", group_path);
>> -        goto error;
>> -    }
>> -
>> -    trace_vfio_realize(vbasedev->name, groupid);
>> -
>> -    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev),
>errp);
>> -    if (!group) {
>> -        goto error;
>> -    }
>> -
>> -    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> -        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>> -            error_setg(errp, "device is already attached");
>> -            vfio_put_group(group);
>> -            goto error;
>> -        }
>> -    }
>> -
>>       /*
>>        * Mediated devices *might* operate compatibly with discarding of RAM,
>but
>>        * we cannot know for certain, it depends on whether the mdev vendor
>driver
>> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>       if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>>           error_setg(errp, "x-balloon-allowed only potentially compatible "
>>                      "with mdev devices");
>> -        vfio_put_group(group);
>>           goto error;
>>       }
>>
>> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>           name = g_strdup(vbasedev->name);
>>       }
>>
>> -    ret = vfio_get_device(group, name, vbasedev, errp);
>> +    ret = vfio_attach_device(name, vbasedev,
>> +                             pci_device_iommu_address_space(pdev), errp);
>>       g_free(name);
>>       if (ret) {
>> -        vfio_put_group(group);
>>           goto error;
>>       }
>>
>> @@ -3304,7 +3266,6 @@ error:
>>   static void vfio_instance_finalize(Object *obj)
>>   {
>>       VFIOPCIDevice *vdev = VFIO_PCI(obj);
>> -    VFIOGroup *group = vdev->vbasedev.group;
>>
>>       vfio_display_finalize(vdev);
>>       vfio_bars_finalize(vdev);
>> @@ -3318,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj)
>>        * g_free(vdev->igd_opregion);
>>        */
>>       vfio_put_device(vdev);
>
>Please rebase v2 on vfio-next. I merged your patch since :)

Sure, will do. Maybe will be a little late. We will have national holiday between Sep 29 and Oct 6.

Thanks
Zhenzhong
Eric Auger Sept. 27, 2023, 12:22 p.m. UTC | #8
On 9/27/23 12:07, Duan, Zhenzhong wrote:
> Hi Eric,
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: Wednesday, September 27, 2023 5:02 PM
>> Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>>
>> Hi Zhenzhong,
>>
>> On 9/26/23 13:32, Zhenzhong Duan wrote:
>>> From: Eric Auger <eric.auger@redhat.com>
>>>
>>> We want the VFIO devices to be able to use two different
>>> IOMMU backends, the legacy VFIO one and the new iommufd one.
>>>
>>> Introduce vfio_[attach/detach]_device which aim at hiding the
>>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>>
>>> Once vfio_attach_device completes, the device is attached
>>> to a security context and its fd can be used. Conversely
>>> When vfio_detach_device completes, the device has been
>>> detached from the security context.
>>>
>>> At the moment only the implementation based on the legacy
>>> container/group exists. Let's use it from the vfio-pci device.
>>> Subsequent patches will handle other devices.
>>>
>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>  include/hw/vfio/vfio-common.h |  3 ++
>>>  hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>>>  hw/vfio/pci.c                 | 50 +++-----------------------
>>>  hw/vfio/trace-events          |  2 +-
>>>  4 files changed, 77 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index c4e7c3b4a7..12fbfbc37d 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>>>  struct vfio_device_info *vfio_get_device_info(int fd);
>>>  int vfio_get_device(VFIOGroup *group, const char *name,
>>>                      VFIODevice *vbasedev, Error **errp);
>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>> +                       AddressSpace *as, Error **errp);
>>> +void vfio_detach_device(VFIODevice *vbasedev);
>>>
>>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index 959b1362bb..7f3798b152 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
>>>      }
>>>      return vfio_eeh_container_op(container, op);
>>>  }
>>> +
>>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> +    char *tmp, group_path[PATH_MAX], *group_name;
>>> +    int ret, groupid;
>>> +    ssize_t len;
>>> +
>>> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>> +    len = readlink(tmp, group_path, sizeof(group_path));
>>> +    g_free(tmp);
>>> +
>>> +    if (len <= 0 || len >= sizeof(group_path)) {
>>> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
>>> +        error_setg_errno(errp, -ret, "no iommu_group found");
>>> +        return ret;
>>> +    }
>>> +
>>> +    group_path[len] = 0;
>>> +
>>> +    group_name = basename(group_path);
>>> +    if (sscanf(group_name, "%d", &groupid) != 1) {
>>> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
>>> +        return -errno;
>>> +    }
>>> +    return groupid;
>>> +}
>>> +
>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>> +                       AddressSpace *as, Error **errp)
>>> +{
>>> +    int groupid = vfio_device_groupid(vbasedev, errp);
>>> +    VFIODevice *vbasedev_iter;
>>> +    VFIOGroup *group;
>>> +    int ret;
>>> +
>>> +    if (groupid < 0) {
>>> +        return groupid;
>>> +    }
>>> +
>>> +    trace_vfio_attach_device(vbasedev->name, groupid);
>>> +
>>> +    group = vfio_get_group(groupid, as, errp);
>>> +    if (!group) {
>>> +        return -ENOENT;
>>> +    }
>>> +
>>> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>> +        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>> +            error_setg(errp, "device is already attached");
>>> +            vfio_put_group(group);
>>> +            return -EBUSY;
>>> +        }
>>> +    }
>>> +    ret = vfio_get_device(group, name, vbasedev, errp);
>>> +    if (ret) {
>>> +        vfio_put_group(group);
>>> +    }
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +void vfio_detach_device(VFIODevice *vbasedev)
>>> +{
>>> +    VFIOGroup *group = vbasedev->group;
>>> +
>>> +    vfio_put_base_device(vbasedev);
>>> +    vfio_put_group(group);
>>> +}
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index 3b2ca3c24c..fe56789893 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice
>> *vdev, Error **errp)
>>>  static void vfio_put_device(VFIOPCIDevice *vdev)
>>>  {
>>> +    vfio_detach_device(&vdev->vbasedev);
>>> +
>>>      g_free(vdev->vbasedev.name);
>>>      g_free(vdev->msix);
>>> -
>>> -    vfio_put_base_device(&vdev->vbasedev);
>>>  }
>>>
>>>  static void vfio_err_notifier_handler(void *opaque)
>>> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>>  {
>>>      VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>>>      VFIODevice *vbasedev = &vdev->vbasedev;
>>> -    VFIODevice *vbasedev_iter;
>>> -    VFIOGroup *group;
>>> -    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
>>> +    char *tmp, *subsys;
>>>      Error *err = NULL;
>>> -    ssize_t len;
>>>      struct stat st;
>>> -    int groupid;
>>>      int i, ret;
>>>      bool is_mdev;
>>>      char uuid[UUID_FMT_LEN];
>>> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>>      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>>      vbasedev->dev = DEVICE(vdev);
>>>
>>> -    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>> -    len = readlink(tmp, group_path, sizeof(group_path));
>>> -    g_free(tmp);
>>> -
>>> -    if (len <= 0 || len >= sizeof(group_path)) {
>>> -        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>>> -                         "no iommu_group found");
>>> -        goto error;
>>> -    }
>>> -
>>> -    group_path[len] = 0;
>>> -
>>> -    group_name = basename(group_path);
>>> -    if (sscanf(group_name, "%d", &groupid) != 1) {
>>> -        error_setg_errno(errp, errno, "failed to read %s", group_path);
>>> -        goto error;
>>> -    }
>>> -
>>> -    trace_vfio_realize(vbasedev->name, groupid);
>>> -
>>> -    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev),
>> errp);
>>> -    if (!group) {
>>> -        goto error;
>>> -    }
>>> -
>>> -    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>> -        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>> -            error_setg(errp, "device is already attached");
>>> -            vfio_put_group(group);
>>> -            goto error;
>>> -        }
>>> -    }
>>> -
>>>      /*
>>>       * Mediated devices *might* operate compatibly with discarding of RAM, but
>>>       * we cannot know for certain, it depends on whether the mdev vendor
>> driver
>>> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>>      if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>>>          error_setg(errp, "x-balloon-allowed only potentially compatible "
>>>                     "with mdev devices");
>>> -        vfio_put_group(group);
>>>          goto error;
>>>      }
>>>
>>> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>>          name = g_strdup(vbasedev->name);
>>>      }
>>>
>>> -    ret = vfio_get_device(group, name, vbasedev, errp);
>>> +    ret = vfio_attach_device(name, vbasedev,
>>> +                             pci_device_iommu_address_space(pdev), errp);
>>>      g_free(name);
>>>      if (ret) {
>>> -        vfio_put_group(group);
>> independently on this patch, I think in case of error we leak
>> vbasedev->name. Please have a look and if confirmed you can send a
>> separate patch.
> In case of error, vbasedev->name is freed in vfio_put_device().
called in vfio_instance_finalize only. See comment below
>
>> Also I think if any subsequent action fail we shoudl properly detach the
>> detach the device so introduce an extra error goto label, my bad sorry.
> vfio_detach_device is called in vfio_instance_finalize(), this is just to follow
> the old code, group and container resource allocated in vfio_realize() are
> freed in vfio_instance_finalize().
>
> I agree this is strange, but I guess there may be some reason I'm unclear.
Yeah but if vfio_realize does fail, I am not sure the
vfio_instance_finalize gets called

Eric
>
> Thanks
> Zhenzhong
>
Duan, Zhenzhong Sept. 27, 2023, 12:32 p.m. UTC | #9
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Sent: Wednesday, September 27, 2023 8:23 PM
>Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>
>
>
>On 9/27/23 12:07, Duan, Zhenzhong wrote:
>> Hi Eric,
>>
>>> -----Original Message-----
>>> From: Eric Auger <eric.auger@redhat.com>
>>> Sent: Wednesday, September 27, 2023 5:02 PM
>>> Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>>>
>>> Hi Zhenzhong,
>>>
>>> On 9/26/23 13:32, Zhenzhong Duan wrote:
>>>> From: Eric Auger <eric.auger@redhat.com>
>>>>
>>>> We want the VFIO devices to be able to use two different
>>>> IOMMU backends, the legacy VFIO one and the new iommufd one.
>>>>
>>>> Introduce vfio_[attach/detach]_device which aim at hiding the
>>>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>>>
>>>> Once vfio_attach_device completes, the device is attached
>>>> to a security context and its fd can be used. Conversely
>>>> When vfio_detach_device completes, the device has been
>>>> detached from the security context.
>>>>
>>>> At the moment only the implementation based on the legacy
>>>> container/group exists. Let's use it from the vfio-pci device.
>>>> Subsequent patches will handle other devices.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>  include/hw/vfio/vfio-common.h |  3 ++
>>>>  hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>>>>  hw/vfio/pci.c                 | 50 +++-----------------------
>>>>  hw/vfio/trace-events          |  2 +-
>>>>  4 files changed, 77 insertions(+), 46 deletions(-)
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>>>> index c4e7c3b4a7..12fbfbc37d 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>>>>  struct vfio_device_info *vfio_get_device_info(int fd);
>>>>  int vfio_get_device(VFIOGroup *group, const char *name,
>>>>                      VFIODevice *vbasedev, Error **errp);
>>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>> +                       AddressSpace *as, Error **errp);
>>>> +void vfio_detach_device(VFIODevice *vbasedev);
>>>>
>>>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>> index 959b1362bb..7f3798b152 100644
>>>> --- a/hw/vfio/common.c
>>>> +++ b/hw/vfio/common.c
>>>> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t
>op)
>>>>      }
>>>>      return vfio_eeh_container_op(container, op);
>>>>  }
>>>> +
>>>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>>>> +{
>>>> +    char *tmp, group_path[PATH_MAX], *group_name;
>>>> +    int ret, groupid;
>>>> +    ssize_t len;
>>>> +
>>>> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>>> +    len = readlink(tmp, group_path, sizeof(group_path));
>>>> +    g_free(tmp);
>>>> +
>>>> +    if (len <= 0 || len >= sizeof(group_path)) {
>>>> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
>>>> +        error_setg_errno(errp, -ret, "no iommu_group found");
>>>> +        return ret;
>>>> +    }
>>>> +
>>>> +    group_path[len] = 0;
>>>> +
>>>> +    group_name = basename(group_path);
>>>> +    if (sscanf(group_name, "%d", &groupid) != 1) {
>>>> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
>>>> +        return -errno;
>>>> +    }
>>>> +    return groupid;
>>>> +}
>>>> +
>>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>> +                       AddressSpace *as, Error **errp)
>>>> +{
>>>> +    int groupid = vfio_device_groupid(vbasedev, errp);
>>>> +    VFIODevice *vbasedev_iter;
>>>> +    VFIOGroup *group;
>>>> +    int ret;
>>>> +
>>>> +    if (groupid < 0) {
>>>> +        return groupid;
>>>> +    }
>>>> +
>>>> +    trace_vfio_attach_device(vbasedev->name, groupid);
>>>> +
>>>> +    group = vfio_get_group(groupid, as, errp);
>>>> +    if (!group) {
>>>> +        return -ENOENT;
>>>> +    }
>>>> +
>>>> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>>> +        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>>> +            error_setg(errp, "device is already attached");
>>>> +            vfio_put_group(group);
>>>> +            return -EBUSY;
>>>> +        }
>>>> +    }
>>>> +    ret = vfio_get_device(group, name, vbasedev, errp);
>>>> +    if (ret) {
>>>> +        vfio_put_group(group);
>>>> +    }
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +void vfio_detach_device(VFIODevice *vbasedev)
>>>> +{
>>>> +    VFIOGroup *group = vbasedev->group;
>>>> +
>>>> +    vfio_put_base_device(vbasedev);
>>>> +    vfio_put_group(group);
>>>> +}
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index 3b2ca3c24c..fe56789893 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice
>>> *vdev, Error **errp)
>>>>  static void vfio_put_device(VFIOPCIDevice *vdev)
>>>>  {
>>>> +    vfio_detach_device(&vdev->vbasedev);
>>>> +
>>>>      g_free(vdev->vbasedev.name);
>>>>      g_free(vdev->msix);
>>>> -
>>>> -    vfio_put_base_device(&vdev->vbasedev);
>>>>  }
>>>>
>>>>  static void vfio_err_notifier_handler(void *opaque)
>>>> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>>> **errp)
>>>>  {
>>>>      VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>>>>      VFIODevice *vbasedev = &vdev->vbasedev;
>>>> -    VFIODevice *vbasedev_iter;
>>>> -    VFIOGroup *group;
>>>> -    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
>>>> +    char *tmp, *subsys;
>>>>      Error *err = NULL;
>>>> -    ssize_t len;
>>>>      struct stat st;
>>>> -    int groupid;
>>>>      int i, ret;
>>>>      bool is_mdev;
>>>>      char uuid[UUID_FMT_LEN];
>>>> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>>> **errp)
>>>>      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>>>      vbasedev->dev = DEVICE(vdev);
>>>>
>>>> -    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>>> -    len = readlink(tmp, group_path, sizeof(group_path));
>>>> -    g_free(tmp);
>>>> -
>>>> -    if (len <= 0 || len >= sizeof(group_path)) {
>>>> -        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>>>> -                         "no iommu_group found");
>>>> -        goto error;
>>>> -    }
>>>> -
>>>> -    group_path[len] = 0;
>>>> -
>>>> -    group_name = basename(group_path);
>>>> -    if (sscanf(group_name, "%d", &groupid) != 1) {
>>>> -        error_setg_errno(errp, errno, "failed to read %s", group_path);
>>>> -        goto error;
>>>> -    }
>>>> -
>>>> -    trace_vfio_realize(vbasedev->name, groupid);
>>>> -
>>>> -    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev),
>>> errp);
>>>> -    if (!group) {
>>>> -        goto error;
>>>> -    }
>>>> -
>>>> -    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>>> -        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>>> -            error_setg(errp, "device is already attached");
>>>> -            vfio_put_group(group);
>>>> -            goto error;
>>>> -        }
>>>> -    }
>>>> -
>>>>      /*
>>>>       * Mediated devices *might* operate compatibly with discarding of RAM,
>but
>>>>       * we cannot know for certain, it depends on whether the mdev vendor
>>> driver
>>>> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>>>      if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>>>>          error_setg(errp, "x-balloon-allowed only potentially compatible "
>>>>                     "with mdev devices");
>>>> -        vfio_put_group(group);
>>>>          goto error;
>>>>      }
>>>>
>>>> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error
>>> **errp)
>>>>          name = g_strdup(vbasedev->name);
>>>>      }
>>>>
>>>> -    ret = vfio_get_device(group, name, vbasedev, errp);
>>>> +    ret = vfio_attach_device(name, vbasedev,
>>>> +                             pci_device_iommu_address_space(pdev), errp);
>>>>      g_free(name);
>>>>      if (ret) {
>>>> -        vfio_put_group(group);
>>> independently on this patch, I think in case of error we leak
>>> vbasedev->name. Please have a look and if confirmed you can send a
>>> separate patch.
>> In case of error, vbasedev->name is freed in vfio_put_device().
>called in vfio_instance_finalize only. See comment below
>>
>>> Also I think if any subsequent action fail we shoudl properly detach the
>>> detach the device so introduce an extra error goto label, my bad sorry.
>> vfio_detach_device is called in vfio_instance_finalize(), this is just to follow
>> the old code, group and container resource allocated in vfio_realize() are
>> freed in vfio_instance_finalize().
>>
>> I agree this is strange, but I guess there may be some reason I'm unclear.
>Yeah but if vfio_realize does fail, I am not sure the
>vfio_instance_finalize gets called

If vfio_realize fails, fio_exitfn() will not be called but vfio_instance_finalize will.
Note vfio_instance_finalize() is called by object_unref().

Thanks
Zhenzhong
Eric Auger Sept. 27, 2023, 12:38 p.m. UTC | #10
On 9/27/23 14:32, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: Wednesday, September 27, 2023 8:23 PM
>> Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>>
>>
>>
>> On 9/27/23 12:07, Duan, Zhenzhong wrote:
>>> Hi Eric,
>>>
>>>> -----Original Message-----
>>>> From: Eric Auger <eric.auger@redhat.com>
>>>> Sent: Wednesday, September 27, 2023 5:02 PM
>>>> Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>>>>
>>>> Hi Zhenzhong,
>>>>
>>>> On 9/26/23 13:32, Zhenzhong Duan wrote:
>>>>> From: Eric Auger <eric.auger@redhat.com>
>>>>>
>>>>> We want the VFIO devices to be able to use two different
>>>>> IOMMU backends, the legacy VFIO one and the new iommufd one.
>>>>>
>>>>> Introduce vfio_[attach/detach]_device which aim at hiding the
>>>>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>>>>
>>>>> Once vfio_attach_device completes, the device is attached
>>>>> to a security context and its fd can be used. Conversely
>>>>> When vfio_detach_device completes, the device has been
>>>>> detached from the security context.
>>>>>
>>>>> At the moment only the implementation based on the legacy
>>>>> container/group exists. Let's use it from the vfio-pci device.
>>>>> Subsequent patches will handle other devices.
>>>>>
>>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>>  include/hw/vfio/vfio-common.h |  3 ++
>>>>>  hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>>>>>  hw/vfio/pci.c                 | 50 +++-----------------------
>>>>>  hw/vfio/trace-events          |  2 +-
>>>>>  4 files changed, 77 insertions(+), 46 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>> common.h
>>>>> index c4e7c3b4a7..12fbfbc37d 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>>>>>  struct vfio_device_info *vfio_get_device_info(int fd);
>>>>>  int vfio_get_device(VFIOGroup *group, const char *name,
>>>>>                      VFIODevice *vbasedev, Error **errp);
>>>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>> +                       AddressSpace *as, Error **errp);
>>>>> +void vfio_detach_device(VFIODevice *vbasedev);
>>>>>
>>>>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>>>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>> index 959b1362bb..7f3798b152 100644
>>>>> --- a/hw/vfio/common.c
>>>>> +++ b/hw/vfio/common.c
>>>>> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t
>> op)
>>>>>      }
>>>>>      return vfio_eeh_container_op(container, op);
>>>>>  }
>>>>> +
>>>>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>>>>> +{
>>>>> +    char *tmp, group_path[PATH_MAX], *group_name;
>>>>> +    int ret, groupid;
>>>>> +    ssize_t len;
>>>>> +
>>>>> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>>>> +    len = readlink(tmp, group_path, sizeof(group_path));
>>>>> +    g_free(tmp);
>>>>> +
>>>>> +    if (len <= 0 || len >= sizeof(group_path)) {
>>>>> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
>>>>> +        error_setg_errno(errp, -ret, "no iommu_group found");
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>> +    group_path[len] = 0;
>>>>> +
>>>>> +    group_name = basename(group_path);
>>>>> +    if (sscanf(group_name, "%d", &groupid) != 1) {
>>>>> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
>>>>> +        return -errno;
>>>>> +    }
>>>>> +    return groupid;
>>>>> +}
>>>>> +
>>>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>> +                       AddressSpace *as, Error **errp)
>>>>> +{
>>>>> +    int groupid = vfio_device_groupid(vbasedev, errp);
>>>>> +    VFIODevice *vbasedev_iter;
>>>>> +    VFIOGroup *group;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (groupid < 0) {
>>>>> +        return groupid;
>>>>> +    }
>>>>> +
>>>>> +    trace_vfio_attach_device(vbasedev->name, groupid);
>>>>> +
>>>>> +    group = vfio_get_group(groupid, as, errp);
>>>>> +    if (!group) {
>>>>> +        return -ENOENT;
>>>>> +    }
>>>>> +
>>>>> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>>>> +        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>>>> +            error_setg(errp, "device is already attached");
>>>>> +            vfio_put_group(group);
>>>>> +            return -EBUSY;
>>>>> +        }
>>>>> +    }
>>>>> +    ret = vfio_get_device(group, name, vbasedev, errp);
>>>>> +    if (ret) {
>>>>> +        vfio_put_group(group);
>>>>> +    }
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +void vfio_detach_device(VFIODevice *vbasedev)
>>>>> +{
>>>>> +    VFIOGroup *group = vbasedev->group;
>>>>> +
>>>>> +    vfio_put_base_device(vbasedev);
>>>>> +    vfio_put_group(group);
>>>>> +}
>>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>>> index 3b2ca3c24c..fe56789893 100644
>>>>> --- a/hw/vfio/pci.c
>>>>> +++ b/hw/vfio/pci.c
>>>>> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice
>>>> *vdev, Error **errp)
>>>>>  static void vfio_put_device(VFIOPCIDevice *vdev)
>>>>>  {
>>>>> +    vfio_detach_device(&vdev->vbasedev);
>>>>> +
>>>>>      g_free(vdev->vbasedev.name);
>>>>>      g_free(vdev->msix);
>>>>> -
>>>>> -    vfio_put_base_device(&vdev->vbasedev);
>>>>>  }
>>>>>
>>>>>  static void vfio_err_notifier_handler(void *opaque)
>>>>> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>>>> **errp)
>>>>>  {
>>>>>      VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>>>>>      VFIODevice *vbasedev = &vdev->vbasedev;
>>>>> -    VFIODevice *vbasedev_iter;
>>>>> -    VFIOGroup *group;
>>>>> -    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
>>>>> +    char *tmp, *subsys;
>>>>>      Error *err = NULL;
>>>>> -    ssize_t len;
>>>>>      struct stat st;
>>>>> -    int groupid;
>>>>>      int i, ret;
>>>>>      bool is_mdev;
>>>>>      char uuid[UUID_FMT_LEN];
>>>>> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>>>> **errp)
>>>>>      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>>>>      vbasedev->dev = DEVICE(vdev);
>>>>>
>>>>> -    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>>>> -    len = readlink(tmp, group_path, sizeof(group_path));
>>>>> -    g_free(tmp);
>>>>> -
>>>>> -    if (len <= 0 || len >= sizeof(group_path)) {
>>>>> -        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>>>>> -                         "no iommu_group found");
>>>>> -        goto error;
>>>>> -    }
>>>>> -
>>>>> -    group_path[len] = 0;
>>>>> -
>>>>> -    group_name = basename(group_path);
>>>>> -    if (sscanf(group_name, "%d", &groupid) != 1) {
>>>>> -        error_setg_errno(errp, errno, "failed to read %s", group_path);
>>>>> -        goto error;
>>>>> -    }
>>>>> -
>>>>> -    trace_vfio_realize(vbasedev->name, groupid);
>>>>> -
>>>>> -    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev),
>>>> errp);
>>>>> -    if (!group) {
>>>>> -        goto error;
>>>>> -    }
>>>>> -
>>>>> -    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>>>> -        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>>>> -            error_setg(errp, "device is already attached");
>>>>> -            vfio_put_group(group);
>>>>> -            goto error;
>>>>> -        }
>>>>> -    }
>>>>> -
>>>>>      /*
>>>>>       * Mediated devices *might* operate compatibly with discarding of RAM,
>> but
>>>>>       * we cannot know for certain, it depends on whether the mdev vendor
>>>> driver
>>>>> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>>>>      if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>>>>>          error_setg(errp, "x-balloon-allowed only potentially compatible "
>>>>>                     "with mdev devices");
>>>>> -        vfio_put_group(group);
>>>>>          goto error;
>>>>>      }
>>>>>
>>>>> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error
>>>> **errp)
>>>>>          name = g_strdup(vbasedev->name);
>>>>>      }
>>>>>
>>>>> -    ret = vfio_get_device(group, name, vbasedev, errp);
>>>>> +    ret = vfio_attach_device(name, vbasedev,
>>>>> +                             pci_device_iommu_address_space(pdev), errp);
>>>>>      g_free(name);
>>>>>      if (ret) {
>>>>> -        vfio_put_group(group);
>>>> independently on this patch, I think in case of error we leak
>>>> vbasedev->name. Please have a look and if confirmed you can send a
>>>> separate patch.
>>> In case of error, vbasedev->name is freed in vfio_put_device().
>> called in vfio_instance_finalize only. See comment below
>>>> Also I think if any subsequent action fail we shoudl properly detach the
>>>> detach the device so introduce an extra error goto label, my bad sorry.
>>> vfio_detach_device is called in vfio_instance_finalize(), this is just to follow
>>> the old code, group and container resource allocated in vfio_realize() are
>>> freed in vfio_instance_finalize().
>>>
>>> I agree this is strange, but I guess there may be some reason I'm unclear.
>> Yeah but if vfio_realize does fail, I am not sure the
>> vfio_instance_finalize gets called
> If vfio_realize fails, fio_exitfn() will not be called but vfio_instance_finalize will.
> Note vfio_instance_finalize() is called by object_unref().
ah OK that's what I forgot then :-(

Eric
>
> Thanks
> Zhenzhong
>
Eric Auger Oct. 2, 2023, 1:57 p.m. UTC | #11
Hi Zhenzhong,

On 9/27/23 14:32, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: Wednesday, September 27, 2023 8:23 PM
>> Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>>
>>
>>
>> On 9/27/23 12:07, Duan, Zhenzhong wrote:
>>> Hi Eric,
>>>
>>>> -----Original Message-----
>>>> From: Eric Auger <eric.auger@redhat.com>
>>>> Sent: Wednesday, September 27, 2023 5:02 PM
>>>> Subject: Re: [PATCH v2 06/12] vfio/pci: Introduce vfio_[attach/detach]_device
>>>>
>>>> Hi Zhenzhong,
>>>>
>>>> On 9/26/23 13:32, Zhenzhong Duan wrote:
>>>>> From: Eric Auger <eric.auger@redhat.com>
>>>>>
>>>>> We want the VFIO devices to be able to use two different
>>>>> IOMMU backends, the legacy VFIO one and the new iommufd one.
>>>>>
>>>>> Introduce vfio_[attach/detach]_device which aim at hiding the
>>>>> underlying IOMMU backend (IOCTLs, datatypes, ...).
>>>>>
>>>>> Once vfio_attach_device completes, the device is attached
>>>>> to a security context and its fd can be used. Conversely
>>>>> When vfio_detach_device completes, the device has been
>>>>> detached from the security context.
>>>>>
>>>>> At the moment only the implementation based on the legacy
>>>>> container/group exists. Let's use it from the vfio-pci device.
>>>>> Subsequent patches will handle other devices.
>>>>>
>>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>>  include/hw/vfio/vfio-common.h |  3 ++
>>>>>  hw/vfio/common.c              | 68 +++++++++++++++++++++++++++++++++++
>>>>>  hw/vfio/pci.c                 | 50 +++-----------------------
>>>>>  hw/vfio/trace-events          |  2 +-
>>>>>  4 files changed, 77 insertions(+), 46 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>> common.h
>>>>> index c4e7c3b4a7..12fbfbc37d 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group);
>>>>>  struct vfio_device_info *vfio_get_device_info(int fd);
>>>>>  int vfio_get_device(VFIOGroup *group, const char *name,
>>>>>                      VFIODevice *vbasedev, Error **errp);
>>>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>> +                       AddressSpace *as, Error **errp);
>>>>> +void vfio_detach_device(VFIODevice *vbasedev);
>>>>>
>>>>>  int vfio_kvm_device_add_fd(int fd, Error **errp);
>>>>>  int vfio_kvm_device_del_fd(int fd, Error **errp);
>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>> index 959b1362bb..7f3798b152 100644
>>>>> --- a/hw/vfio/common.c
>>>>> +++ b/hw/vfio/common.c
>>>>> @@ -2611,3 +2611,71 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t
>> op)
>>>>>      }
>>>>>      return vfio_eeh_container_op(container, op);
>>>>>  }
>>>>> +
>>>>> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
>>>>> +{
>>>>> +    char *tmp, group_path[PATH_MAX], *group_name;
>>>>> +    int ret, groupid;
>>>>> +    ssize_t len;
>>>>> +
>>>>> +    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>>>> +    len = readlink(tmp, group_path, sizeof(group_path));
>>>>> +    g_free(tmp);
>>>>> +
>>>>> +    if (len <= 0 || len >= sizeof(group_path)) {
>>>>> +        ret = len < 0 ? -errno : -ENAMETOOLONG;
>>>>> +        error_setg_errno(errp, -ret, "no iommu_group found");
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>> +    group_path[len] = 0;
>>>>> +
>>>>> +    group_name = basename(group_path);
>>>>> +    if (sscanf(group_name, "%d", &groupid) != 1) {
>>>>> +        error_setg_errno(errp, errno, "failed to read %s", group_path);
>>>>> +        return -errno;
>>>>> +    }
>>>>> +    return groupid;
>>>>> +}
>>>>> +
>>>>> +int vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>> +                       AddressSpace *as, Error **errp)
>>>>> +{
>>>>> +    int groupid = vfio_device_groupid(vbasedev, errp);
>>>>> +    VFIODevice *vbasedev_iter;
>>>>> +    VFIOGroup *group;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (groupid < 0) {
>>>>> +        return groupid;
>>>>> +    }
>>>>> +
>>>>> +    trace_vfio_attach_device(vbasedev->name, groupid);
>>>>> +
>>>>> +    group = vfio_get_group(groupid, as, errp);
>>>>> +    if (!group) {
>>>>> +        return -ENOENT;
>>>>> +    }
>>>>> +
>>>>> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>>>> +        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>>>> +            error_setg(errp, "device is already attached");
>>>>> +            vfio_put_group(group);
>>>>> +            return -EBUSY;
>>>>> +        }
>>>>> +    }
>>>>> +    ret = vfio_get_device(group, name, vbasedev, errp);
>>>>> +    if (ret) {
>>>>> +        vfio_put_group(group);
>>>>> +    }
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +void vfio_detach_device(VFIODevice *vbasedev)
>>>>> +{
>>>>> +    VFIOGroup *group = vbasedev->group;
>>>>> +
>>>>> +    vfio_put_base_device(vbasedev);
>>>>> +    vfio_put_group(group);
>>>>> +}
>>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>>> index 3b2ca3c24c..fe56789893 100644
>>>>> --- a/hw/vfio/pci.c
>>>>> +++ b/hw/vfio/pci.c
>>>>> @@ -2828,10 +2828,10 @@ static void vfio_populate_device(VFIOPCIDevice
>>>> *vdev, Error **errp)
>>>>>  static void vfio_put_device(VFIOPCIDevice *vdev)
>>>>>  {
>>>>> +    vfio_detach_device(&vdev->vbasedev);
>>>>> +
>>>>>      g_free(vdev->vbasedev.name);
>>>>>      g_free(vdev->msix);
>>>>> -
>>>>> -    vfio_put_base_device(&vdev->vbasedev);
>>>>>  }
>>>>>
>>>>>  static void vfio_err_notifier_handler(void *opaque)
>>>>> @@ -2978,13 +2978,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>>>> **errp)
>>>>>  {
>>>>>      VFIOPCIDevice *vdev = VFIO_PCI(pdev);
>>>>>      VFIODevice *vbasedev = &vdev->vbasedev;
>>>>> -    VFIODevice *vbasedev_iter;
>>>>> -    VFIOGroup *group;
>>>>> -    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
>>>>> +    char *tmp, *subsys;
>>>>>      Error *err = NULL;
>>>>> -    ssize_t len;
>>>>>      struct stat st;
>>>>> -    int groupid;
>>>>>      int i, ret;
>>>>>      bool is_mdev;
>>>>>      char uuid[UUID_FMT_LEN];
>>>>> @@ -3015,39 +3011,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>>>> **errp)
>>>>>      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>>>>>      vbasedev->dev = DEVICE(vdev);
>>>>>
>>>>> -    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
>>>>> -    len = readlink(tmp, group_path, sizeof(group_path));
>>>>> -    g_free(tmp);
>>>>> -
>>>>> -    if (len <= 0 || len >= sizeof(group_path)) {
>>>>> -        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>>>>> -                         "no iommu_group found");
>>>>> -        goto error;
>>>>> -    }
>>>>> -
>>>>> -    group_path[len] = 0;
>>>>> -
>>>>> -    group_name = basename(group_path);
>>>>> -    if (sscanf(group_name, "%d", &groupid) != 1) {
>>>>> -        error_setg_errno(errp, errno, "failed to read %s", group_path);
>>>>> -        goto error;
>>>>> -    }
>>>>> -
>>>>> -    trace_vfio_realize(vbasedev->name, groupid);
>>>>> -
>>>>> -    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev),
>>>> errp);
>>>>> -    if (!group) {
>>>>> -        goto error;
>>>>> -    }
>>>>> -
>>>>> -    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>>>> -        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
>>>>> -            error_setg(errp, "device is already attached");
>>>>> -            vfio_put_group(group);
>>>>> -            goto error;
>>>>> -        }
>>>>> -    }
>>>>> -
>>>>>      /*
>>>>>       * Mediated devices *might* operate compatibly with discarding of RAM,
>> but
>>>>>       * we cannot know for certain, it depends on whether the mdev vendor
>>>> driver
>>>>> @@ -3065,7 +3028,6 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>>>>>      if (vbasedev->ram_block_discard_allowed && !is_mdev) {
>>>>>          error_setg(errp, "x-balloon-allowed only potentially compatible "
>>>>>                     "with mdev devices");
>>>>> -        vfio_put_group(group);
>>>>>          goto error;
>>>>>      }
>>>>>
>>>>> @@ -3076,10 +3038,10 @@ static void vfio_realize(PCIDevice *pdev, Error
>>>> **errp)
>>>>>          name = g_strdup(vbasedev->name);
>>>>>      }
>>>>>
>>>>> -    ret = vfio_get_device(group, name, vbasedev, errp);
>>>>> +    ret = vfio_attach_device(name, vbasedev,
>>>>> +                             pci_device_iommu_address_space(pdev), errp);
>>>>>      g_free(name);
>>>>>      if (ret) {
>>>>> -        vfio_put_group(group);
>>>> independently on this patch, I think in case of error we leak
>>>> vbasedev->name. Please have a look and if confirmed you can send a
>>>> separate patch.
>>> In case of error, vbasedev->name is freed in vfio_put_device().
>> called in vfio_instance_finalize only. See comment below
>>>> Also I think if any subsequent action fail we shoudl properly detach the
>>>> detach the device so introduce an extra error goto label, my bad sorry.
>>> vfio_detach_device is called in vfio_instance_finalize(), this is just to follow
>>> the old code, group and container resource allocated in vfio_realize() are
>>> freed in vfio_instance_finalize().
>>>
>>> I agree this is strange, but I guess there may be some reason I'm unclear.
>> Yeah but if vfio_realize does fail, I am not sure the
>> vfio_instance_finalize gets called
> If vfio_realize fails, fio_exitfn() will not be called but vfio_instance_finalize will.
> Note vfio_instance_finalize() is called by object_unref().

I think this holds if the object has been properly initialized. With gdb
I don't see

vfio_instance_finalize() gets called on vfio_realize() failure.

Thanks

Eric

>
> Thanks
> Zhenzhong
>
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index c4e7c3b4a7..12fbfbc37d 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -225,6 +225,9 @@  void vfio_put_group(VFIOGroup *group);
 struct vfio_device_info *vfio_get_device_info(int fd);
 int vfio_get_device(VFIOGroup *group, const char *name,
                     VFIODevice *vbasedev, Error **errp);
+int vfio_attach_device(char *name, VFIODevice *vbasedev,
+                       AddressSpace *as, Error **errp);
+void vfio_detach_device(VFIODevice *vbasedev);
 
 int vfio_kvm_device_add_fd(int fd, Error **errp);
 int vfio_kvm_device_del_fd(int fd, Error **errp);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 959b1362bb..7f3798b152 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -2611,3 +2611,71 @@  int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
     }
     return vfio_eeh_container_op(container, op);
 }
+
+static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
+{
+    char *tmp, group_path[PATH_MAX], *group_name;
+    int ret, groupid;
+    ssize_t len;
+
+    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
+    len = readlink(tmp, group_path, sizeof(group_path));
+    g_free(tmp);
+
+    if (len <= 0 || len >= sizeof(group_path)) {
+        ret = len < 0 ? -errno : -ENAMETOOLONG;
+        error_setg_errno(errp, -ret, "no iommu_group found");
+        return ret;
+    }
+
+    group_path[len] = 0;
+
+    group_name = basename(group_path);
+    if (sscanf(group_name, "%d", &groupid) != 1) {
+        error_setg_errno(errp, errno, "failed to read %s", group_path);
+        return -errno;
+    }
+    return groupid;
+}
+
+int vfio_attach_device(char *name, VFIODevice *vbasedev,
+                       AddressSpace *as, Error **errp)
+{
+    int groupid = vfio_device_groupid(vbasedev, errp);
+    VFIODevice *vbasedev_iter;
+    VFIOGroup *group;
+    int ret;
+
+    if (groupid < 0) {
+        return groupid;
+    }
+
+    trace_vfio_attach_device(vbasedev->name, groupid);
+
+    group = vfio_get_group(groupid, as, errp);
+    if (!group) {
+        return -ENOENT;
+    }
+
+    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
+            error_setg(errp, "device is already attached");
+            vfio_put_group(group);
+            return -EBUSY;
+        }
+    }
+    ret = vfio_get_device(group, name, vbasedev, errp);
+    if (ret) {
+        vfio_put_group(group);
+    }
+
+    return ret;
+}
+
+void vfio_detach_device(VFIODevice *vbasedev)
+{
+    VFIOGroup *group = vbasedev->group;
+
+    vfio_put_base_device(vbasedev);
+    vfio_put_group(group);
+}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 3b2ca3c24c..fe56789893 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2828,10 +2828,10 @@  static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
 
 static void vfio_put_device(VFIOPCIDevice *vdev)
 {
+    vfio_detach_device(&vdev->vbasedev);
+
     g_free(vdev->vbasedev.name);
     g_free(vdev->msix);
-
-    vfio_put_base_device(&vdev->vbasedev);
 }
 
 static void vfio_err_notifier_handler(void *opaque)
@@ -2978,13 +2978,9 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
     VFIODevice *vbasedev = &vdev->vbasedev;
-    VFIODevice *vbasedev_iter;
-    VFIOGroup *group;
-    char *tmp, *subsys, group_path[PATH_MAX], *group_name;
+    char *tmp, *subsys;
     Error *err = NULL;
-    ssize_t len;
     struct stat st;
-    int groupid;
     int i, ret;
     bool is_mdev;
     char uuid[UUID_FMT_LEN];
@@ -3015,39 +3011,6 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     vbasedev->type = VFIO_DEVICE_TYPE_PCI;
     vbasedev->dev = DEVICE(vdev);
 
-    tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
-    len = readlink(tmp, group_path, sizeof(group_path));
-    g_free(tmp);
-
-    if (len <= 0 || len >= sizeof(group_path)) {
-        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
-                         "no iommu_group found");
-        goto error;
-    }
-
-    group_path[len] = 0;
-
-    group_name = basename(group_path);
-    if (sscanf(group_name, "%d", &groupid) != 1) {
-        error_setg_errno(errp, errno, "failed to read %s", group_path);
-        goto error;
-    }
-
-    trace_vfio_realize(vbasedev->name, groupid);
-
-    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp);
-    if (!group) {
-        goto error;
-    }
-
-    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
-        if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
-            error_setg(errp, "device is already attached");
-            vfio_put_group(group);
-            goto error;
-        }
-    }
-
     /*
      * Mediated devices *might* operate compatibly with discarding of RAM, but
      * we cannot know for certain, it depends on whether the mdev vendor driver
@@ -3065,7 +3028,6 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     if (vbasedev->ram_block_discard_allowed && !is_mdev) {
         error_setg(errp, "x-balloon-allowed only potentially compatible "
                    "with mdev devices");
-        vfio_put_group(group);
         goto error;
     }
 
@@ -3076,10 +3038,10 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
         name = g_strdup(vbasedev->name);
     }
 
-    ret = vfio_get_device(group, name, vbasedev, errp);
+    ret = vfio_attach_device(name, vbasedev,
+                             pci_device_iommu_address_space(pdev), errp);
     g_free(name);
     if (ret) {
-        vfio_put_group(group);
         goto error;
     }
 
@@ -3304,7 +3266,6 @@  error:
 static void vfio_instance_finalize(Object *obj)
 {
     VFIOPCIDevice *vdev = VFIO_PCI(obj);
-    VFIOGroup *group = vdev->vbasedev.group;
 
     vfio_display_finalize(vdev);
     vfio_bars_finalize(vdev);
@@ -3318,7 +3279,6 @@  static void vfio_instance_finalize(Object *obj)
      * g_free(vdev->igd_opregion);
      */
     vfio_put_device(vdev);
-    vfio_put_group(group);
 }
 
 static void vfio_exitfn(PCIDevice *pdev)
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index e64ca4a019..e710026a73 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -37,7 +37,7 @@  vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int
 vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
 vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
 vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
-vfio_realize(const char *name, int group_id) " (%s) group %d"
+vfio_attach_device(const char *name, int group_id) " (%s) group %d"
 vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
 vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
 vfio_pci_reset(const char *name) " (%s)"