diff mbox series

[v6,4/9] vfio/{iommufd, container}: Invoke HostIOMMUDevice::realize() during attach_device()

Message ID 20240722211326.70162-5-joao.m.martins@oracle.com (mailing list archive)
State New, archived
Headers show
Series hw/iommufd: IOMMUFD Dirty Tracking | expand

Commit Message

Joao Martins July 22, 2024, 9:13 p.m. UTC
Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
behind the device supports dirty tracking.

Note: The HostIOMMUDevice data from legacy backend is static and doesn't
need any information from the (type1-iommu) backend to be initialized.
In contrast however, the IOMMUFD HostIOMMUDevice data requires the
iommufd FD to be connected and having a devid to be able to successfully
GET_HW_INFO. This means vfio_device_hiod_realize() is called in
different places within the backend .attach_device() implementation.

Suggested-by: Cédric Le Goater <clg@redhat.cm>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 include/hw/vfio/vfio-common.h |  1 +
 hw/vfio/common.c              | 16 ++++++----------
 hw/vfio/container.c           |  4 ++++
 hw/vfio/helpers.c             | 11 +++++++++++
 hw/vfio/iommufd.c             |  4 ++++
 5 files changed, 26 insertions(+), 10 deletions(-)

Comments

Eric Auger July 23, 2024, 7:38 a.m. UTC | #1
Hi Joao,

On 7/22/24 23:13, Joao Martins wrote:
> Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
> before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
> behind the device supports dirty tracking.
>
> Note: The HostIOMMUDevice data from legacy backend is static and doesn't
> need any information from the (type1-iommu) backend to be initialized.
> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
> iommufd FD to be connected and having a devid to be able to successfully
Nit: maybe this comment shall be also added in iommufd.c before the call
to vfio_device_hiod_realize() to avoid someone else to move that call
earlier at some point
> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
> different places within the backend .attach_device() implementation.
>
> Suggested-by: Cédric Le Goater <clg@redhat.cm>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/vfio/vfio-common.h |  1 +
>  hw/vfio/common.c              | 16 ++++++----------
>  hw/vfio/container.c           |  4 ++++
>  hw/vfio/helpers.c             | 11 +++++++++++
>  hw/vfio/iommufd.c             |  4 ++++
>  5 files changed, 26 insertions(+), 10 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 1a96678f8c38..4e44b26d3c45 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>  void vfio_reset_handler(void *opaque);
>  struct vfio_device_info *vfio_get_device_info(int fd);
>  bool vfio_device_is_mdev(VFIODevice *vbasedev);
> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>  bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>                          AddressSpace *as, Error **errp);
>  void vfio_detach_device(VFIODevice *vbasedev);
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 784e266e6aab..da12cbd56408 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>  {
>      const VFIOIOMMUClass *ops =
>          VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
> -    HostIOMMUDevice *hiod;
> +    HostIOMMUDevice *hiod = NULL;
>  
>      if (vbasedev->iommufd) {
>          ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>  
>      assert(ops);
>  
> -    if (!ops->attach_device(name, vbasedev, as, errp)) {
> -        return false;
> -    }
>  
> -    if (vbasedev->mdev) {
> -        return true;
> +    if (!vbasedev->mdev) {
> +        hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
> +        vbasedev->hiod = hiod;
>      }
>  
> -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>          object_unref(hiod);
> -        ops->detach_device(vbasedev);
> +        vbasedev->hiod = NULL;
>          return false;
>      }
> -    vbasedev->hiod = hiod;
>  
>      return true;
>  }
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 10cb4b4320ac..9ccdb639ac84 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
>  
>      trace_vfio_attach_device(vbasedev->name, groupid);
>  
> +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
> +        return false;
don't you want to go to err_alloc_ioas instead?
> +    }
> +
>      group = vfio_get_group(groupid, as, errp);
>      if (!group) {
>          return false;
> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> index 7e23e9080c9d..ea15c79db0a3 100644
> --- a/hw/vfio/helpers.c
> +++ b/hw/vfio/helpers.c
> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>      subsys = realpath(tmp, NULL);
>      return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>  }
> +
> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
> +{
> +    HostIOMMUDevice *hiod = vbasedev->hiod;
> +
> +    if (!hiod) {
> +        return true;
> +    }
> +
> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
> +}
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 5e2fc1ce089d..2324bf892c56 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>  
>      space = vfio_get_address_space(as);
>  
> +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
> +        return false;
> +    }
> +
>      /* try to attach to an existing container in this space */
>      QLIST_FOREACH(bcontainer, &space->containers, next) {
>          container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
Eric
Cédric Le Goater July 23, 2024, 7:44 a.m. UTC | #2
On 7/23/24 09:38, Eric Auger wrote:
> Hi Joao,
> 
> On 7/22/24 23:13, Joao Martins wrote:
>> Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
>> behind the device supports dirty tracking.
>>
>> Note: The HostIOMMUDevice data from legacy backend is static and doesn't
>> need any information from the (type1-iommu) backend to be initialized.
>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>> iommufd FD to be connected and having a devid to be able to successfully
> Nit: maybe this comment shall be also added in iommufd.c before the call
> to vfio_device_hiod_realize() to avoid someone else to move that call
> earlier at some point
>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>> different places within the backend .attach_device() implementation.
>>
>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   include/hw/vfio/vfio-common.h |  1 +
>>   hw/vfio/common.c              | 16 ++++++----------
>>   hw/vfio/container.c           |  4 ++++
>>   hw/vfio/helpers.c             | 11 +++++++++++
>>   hw/vfio/iommufd.c             |  4 ++++
>>   5 files changed, 26 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 1a96678f8c38..4e44b26d3c45 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>   void vfio_reset_handler(void *opaque);
>>   struct vfio_device_info *vfio_get_device_info(int fd);
>>   bool vfio_device_is_mdev(VFIODevice *vbasedev);
>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>   bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>                           AddressSpace *as, Error **errp);
>>   void vfio_detach_device(VFIODevice *vbasedev);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 784e266e6aab..da12cbd56408 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>   {
>>       const VFIOIOMMUClass *ops =
>>           VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>> -    HostIOMMUDevice *hiod;
>> +    HostIOMMUDevice *hiod = NULL;
>>   
>>       if (vbasedev->iommufd) {
>>           ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>   
>>       assert(ops);
>>   
>> -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>> -        return false;
>> -    }
>>   
>> -    if (vbasedev->mdev) {
>> -        return true;
>> +    if (!vbasedev->mdev) {
>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>> +        vbasedev->hiod = hiod;
>>       }
>>   
>> -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>           object_unref(hiod);
>> -        ops->detach_device(vbasedev);
>> +        vbasedev->hiod = NULL;
>>           return false;
>>       }
>> -    vbasedev->hiod = hiod;
>>   
>>       return true;
>>   }
>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>> index 10cb4b4320ac..9ccdb639ac84 100644
>> --- a/hw/vfio/container.c
>> +++ b/hw/vfio/container.c
>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
>>   
>>       trace_vfio_attach_device(vbasedev->name, groupid);
>>   
>> +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> +        return false;
> don't you want to go to err_alloc_ioas instead?

hmm, the err_alloc_ioas label is in a different function iommufd_cdev_attach().

may be you meant the comment for routine iommufd_cdev_attach() and
label err_connect_bind ?


Thanks,

C.


>> +    }
>> +
>>       group = vfio_get_group(groupid, as, errp);
>>       if (!group) {
>>           return false;
>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>> index 7e23e9080c9d..ea15c79db0a3 100644
>> --- a/hw/vfio/helpers.c
>> +++ b/hw/vfio/helpers.c
>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>       subsys = realpath(tmp, NULL);
>>       return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>   }
>> +
>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>> +{
>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>> +
>> +    if (!hiod) {
>> +        return true;
>> +    }
>> +
>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
>> +}
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 5e2fc1ce089d..2324bf892c56 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>>   
>>       space = vfio_get_address_space(as);
>>   
>> +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> +        return false;
>> +    }
>> +
>>       /* try to attach to an existing container in this space */
>>       QLIST_FOREACH(bcontainer, &space->containers, next) {
>>           container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
> Eric
>
Joao Martins July 23, 2024, 7:53 a.m. UTC | #3
On 23/07/2024 08:38, Eric Auger wrote:
> Hi Joao,
> 
> On 7/22/24 23:13, Joao Martins wrote:
>> Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
>> behind the device supports dirty tracking.
>>
>> Note: The HostIOMMUDevice data from legacy backend is static and doesn't
>> need any information from the (type1-iommu) backend to be initialized.
>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>> iommufd FD to be connected and having a devid to be able to successfully
> Nit: maybe this comment shall be also added in iommufd.c before the call
> to vfio_device_hiod_realize() to avoid someone else to move that call
> earlier at some point
>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>> different places within the backend .attach_device() implementation.
>>
>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  include/hw/vfio/vfio-common.h |  1 +
>>  hw/vfio/common.c              | 16 ++++++----------
>>  hw/vfio/container.c           |  4 ++++
>>  hw/vfio/helpers.c             | 11 +++++++++++
>>  hw/vfio/iommufd.c             |  4 ++++
>>  5 files changed, 26 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 1a96678f8c38..4e44b26d3c45 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>  void vfio_reset_handler(void *opaque);
>>  struct vfio_device_info *vfio_get_device_info(int fd);
>>  bool vfio_device_is_mdev(VFIODevice *vbasedev);
>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>  bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>                          AddressSpace *as, Error **errp);
>>  void vfio_detach_device(VFIODevice *vbasedev);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 784e266e6aab..da12cbd56408 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>  {
>>      const VFIOIOMMUClass *ops =
>>          VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>> -    HostIOMMUDevice *hiod;
>> +    HostIOMMUDevice *hiod = NULL;
>>  
>>      if (vbasedev->iommufd) {
>>          ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>  
>>      assert(ops);
>>  
>> -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>> -        return false;
>> -    }
>>  
>> -    if (vbasedev->mdev) {
>> -        return true;
>> +    if (!vbasedev->mdev) {
>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>> +        vbasedev->hiod = hiod;
>>      }
>>  
>> -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>          object_unref(hiod);
>> -        ops->detach_device(vbasedev);
>> +        vbasedev->hiod = NULL;
>>          return false;
>>      }
>> -    vbasedev->hiod = hiod;
>>  
>>      return true;
>>  }
>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>> index 10cb4b4320ac..9ccdb639ac84 100644
>> --- a/hw/vfio/container.c
>> +++ b/hw/vfio/container.c
>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
>>  
>>      trace_vfio_attach_device(vbasedev->name, groupid);
>>  
>> +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> +        return false;
> don't you want to go to err_alloc_ioas instead?

Oh, yes, I thought I was doing that, but I am not :( Thanks for catching that

Your comment is spot on but in the wrong place.

vfio_legacy_attach_device() can just return false as it's at the top of the
function but here (...)

>> +    }
>> +
>>      group = vfio_get_group(groupid, as, errp);
>>      if (!group) {
>>          return false;
>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>> index 7e23e9080c9d..ea15c79db0a3 100644
>> --- a/hw/vfio/helpers.c
>> +++ b/hw/vfio/helpers.c
>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>      subsys = realpath(tmp, NULL);
>>      return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>  }
>> +
>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>> +{
>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>> +
>> +    if (!hiod) {
>> +        return true;
>> +    }
>> +
>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
>> +}
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 5e2fc1ce089d..2324bf892c56 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>>  
>>      space = vfio_get_address_space(as);
>>  
>> +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> +        return false;
>> +    }
>> +

(...) we definitely need a goto err_alloc_ioas here.

Snip below:

@@ -482,7 +483,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
*vbasedev,
     space = vfio_get_address_space(as);

     if (!vfio_device_hiod_realize(vbasedev, errp)) {
-        return false;
+        goto err_alloc_ioas;
     }

     /* try to attach to an existing container in this space */
Eric Auger July 23, 2024, 7:55 a.m. UTC | #4
On 7/23/24 09:44, Cédric Le Goater wrote:
> On 7/23/24 09:38, Eric Auger wrote:
>> Hi Joao,
>>
>> On 7/22/24 23:13, Joao Martins wrote:
>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>> of the device
>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>> allows the use
>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell
>>> if the IOMMU
>>> behind the device supports dirty tracking.
>>>
>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>> doesn't
>>> need any information from the (type1-iommu) backend to be initialized.
>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>> iommufd FD to be connected and having a devid to be able to
>>> successfully
>> Nit: maybe this comment shall be also added in iommufd.c before the call
>> to vfio_device_hiod_realize() to avoid someone else to move that call
>> earlier at some point
>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>> different places within the backend .attach_device() implementation.
>>>
>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>   include/hw/vfio/vfio-common.h |  1 +
>>>   hw/vfio/common.c              | 16 ++++++----------
>>>   hw/vfio/container.c           |  4 ++++
>>>   hw/vfio/helpers.c             | 11 +++++++++++
>>>   hw/vfio/iommufd.c             |  4 ++++
>>>   5 files changed, 26 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h
>>> b/include/hw/vfio/vfio-common.h
>>> index 1a96678f8c38..4e44b26d3c45 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>   void vfio_reset_handler(void *opaque);
>>>   struct vfio_device_info *vfio_get_device_info(int fd);
>>>   bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>   bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>                           AddressSpace *as, Error **errp);
>>>   void vfio_detach_device(VFIODevice *vbasedev);
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index 784e266e6aab..da12cbd56408 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice
>>> *vbasedev,
>>>   {
>>>       const VFIOIOMMUClass *ops =
>>>          
>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>> -    HostIOMMUDevice *hiod;
>>> +    HostIOMMUDevice *hiod = NULL;
>>>         if (vbasedev->iommufd) {
>>>           ops =
>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>> VFIODevice *vbasedev,
>>>         assert(ops);
>>>   -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>> -        return false;
>>> -    }
>>>   -    if (vbasedev->mdev) {
>>> -        return true;
>>> +    if (!vbasedev->mdev) {
>>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>> +        vbasedev->hiod = hiod;
>>>       }
>>>   -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev,
>>> errp)) {
>>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>           object_unref(hiod);
>>> -        ops->detach_device(vbasedev);
>>> +        vbasedev->hiod = NULL;
>>>           return false;
>>>       }
>>> -    vbasedev->hiod = hiod;
>>>         return true;
>>>   }
>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>> --- a/hw/vfio/container.c
>>> +++ b/hw/vfio/container.c
>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>> char *name, VFIODevice *vbasedev,
>>>         trace_vfio_attach_device(vbasedev->name, groupid);
>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> +        return false;
>> don't you want to go to err_alloc_ioas instead?
>
> hmm, the err_alloc_ioas label is in a different function
> iommufd_cdev_attach().
>
> may be you meant the comment for routine iommufd_cdev_attach() and
> label err_connect_bind ?
>
>
> Thanks,
>
> C.
>
>
>>> +    }
>>> +
>>>       group = vfio_get_group(groupid, as, errp);
>>>       if (!group) {
>>>           return false;
>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>> --- a/hw/vfio/helpers.c
>>> +++ b/hw/vfio/helpers.c
>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>>       subsys = realpath(tmp, NULL);
>>>       return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>   }
>>> +
>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>>> +
>>> +    if (!hiod) {
>>> +        return true;
>>> +    }
>>> +
>>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>> vbasedev, errp);
>>> +}
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 5e2fc1ce089d..2324bf892c56 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>> *name, VFIODevice *vbasedev,
>>>         space = vfio_get_address_space(as);
>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> +        return false;
Hum sorry my previous comment was targetting that place. I think
unrolling is needed up to put_address_space

so effectively this does not match err_alloc_ioas but I guess we would
need another label

Eric
>>> +    }
>>> +
>>>       /* try to attach to an existing container in this space */
>>>       QLIST_FOREACH(bcontainer, &space->containers, next) {
>>>           container = container_of(bcontainer, VFIOIOMMUFDContainer,
>>> bcontainer);
>> Eric
>>
>
Cédric Le Goater July 23, 2024, 8 a.m. UTC | #5
On 7/23/24 09:53, Joao Martins wrote:
> On 23/07/2024 08:38, Eric Auger wrote:
>> Hi Joao,
>>
>> On 7/22/24 23:13, Joao Martins wrote:
>>> Move the HostIOMMUDevice::realize() to be invoked during the attach of the device
>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This allows the use
>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell if the IOMMU
>>> behind the device supports dirty tracking.
>>>
>>> Note: The HostIOMMUDevice data from legacy backend is static and doesn't
>>> need any information from the (type1-iommu) backend to be initialized.
>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>> iommufd FD to be connected and having a devid to be able to successfully
>> Nit: maybe this comment shall be also added in iommufd.c before the call
>> to vfio_device_hiod_realize() to avoid someone else to move that call
>> earlier at some point
>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>> different places within the backend .attach_device() implementation.
>>>
>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>   include/hw/vfio/vfio-common.h |  1 +
>>>   hw/vfio/common.c              | 16 ++++++----------
>>>   hw/vfio/container.c           |  4 ++++
>>>   hw/vfio/helpers.c             | 11 +++++++++++
>>>   hw/vfio/iommufd.c             |  4 ++++
>>>   5 files changed, 26 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 1a96678f8c38..4e44b26d3c45 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>   void vfio_reset_handler(void *opaque);
>>>   struct vfio_device_info *vfio_get_device_info(int fd);
>>>   bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>   bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>                           AddressSpace *as, Error **errp);
>>>   void vfio_detach_device(VFIODevice *vbasedev);
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index 784e266e6aab..da12cbd56408 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>   {
>>>       const VFIOIOMMUClass *ops =
>>>           VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>> -    HostIOMMUDevice *hiod;
>>> +    HostIOMMUDevice *hiod = NULL;
>>>   
>>>       if (vbasedev->iommufd) {
>>>           ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>   
>>>       assert(ops);
>>>   
>>> -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>> -        return false;
>>> -    }
>>>   
>>> -    if (vbasedev->mdev) {
>>> -        return true;
>>> +    if (!vbasedev->mdev) {
>>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>> +        vbasedev->hiod = hiod;
>>>       }
>>>   
>>> -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
>>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>           object_unref(hiod);
>>> -        ops->detach_device(vbasedev);
>>> +        vbasedev->hiod = NULL;
>>>           return false;
>>>       }
>>> -    vbasedev->hiod = hiod;
>>>   
>>>       return true;
>>>   }
>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>> --- a/hw/vfio/container.c
>>> +++ b/hw/vfio/container.c
>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
>>>   
>>>       trace_vfio_attach_device(vbasedev->name, groupid);
>>>   
>>> +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> +        return false;
>> don't you want to go to err_alloc_ioas instead?
> 
> Oh, yes, I thought I was doing that, but I am not :( Thanks for catching that
> 
> Your comment is spot on but in the wrong place.
> 
> vfio_legacy_attach_device() can just return false as it's at the top of the
> function but here (...)
> 
>>> +    }
>>> +
>>>       group = vfio_get_group(groupid, as, errp);
>>>       if (!group) {
>>>           return false;
>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>> --- a/hw/vfio/helpers.c
>>> +++ b/hw/vfio/helpers.c
>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>>       subsys = realpath(tmp, NULL);
>>>       return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>   }
>>> +
>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>>> +
>>> +    if (!hiod) {
>>> +        return true;
>>> +    }
>>> +
>>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
>>> +}
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 5e2fc1ce089d..2324bf892c56 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>>>   
>>>       space = vfio_get_address_space(as);
>>>   
>>> +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> +        return false;
>>> +    }
>>> +
> 
> (...) we definitely need a goto err_alloc_ioas here.
> 
> Snip below:
> 
> @@ -482,7 +483,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
> *vbasedev,
>       space = vfio_get_address_space(as);
> 
>       if (!vfio_device_hiod_realize(vbasedev, errp)) {
> -        return false;
> +        goto err_alloc_ioas;
>       }
> 
>       /* try to attach to an existing container in this space */
> 

ok. Applied the changes.


Thanks,

C.
Joao Martins July 23, 2024, 8:05 a.m. UTC | #6
On 23/07/2024 08:55, Eric Auger wrote:
> 
> 
> On 7/23/24 09:44, Cédric Le Goater wrote:
>> On 7/23/24 09:38, Eric Auger wrote:
>>> Hi Joao,
>>>
>>> On 7/22/24 23:13, Joao Martins wrote:
>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>> of the device
>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>> allows the use
>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell
>>>> if the IOMMU
>>>> behind the device supports dirty tracking.
>>>>
>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>> doesn't
>>>> need any information from the (type1-iommu) backend to be initialized.
>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>>> iommufd FD to be connected and having a devid to be able to
>>>> successfully
>>> Nit: maybe this comment shall be also added in iommufd.c before the call
>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>> earlier at some point
>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>> different places within the backend .attach_device() implementation.
>>>>
>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>   include/hw/vfio/vfio-common.h |  1 +
>>>>   hw/vfio/common.c              | 16 ++++++----------
>>>>   hw/vfio/container.c           |  4 ++++
>>>>   hw/vfio/helpers.c             | 11 +++++++++++
>>>>   hw/vfio/iommufd.c             |  4 ++++
>>>>   5 files changed, 26 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>> b/include/hw/vfio/vfio-common.h
>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>   void vfio_reset_handler(void *opaque);
>>>>   struct vfio_device_info *vfio_get_device_info(int fd);
>>>>   bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>   bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>                           AddressSpace *as, Error **errp);
>>>>   void vfio_detach_device(VFIODevice *vbasedev);
>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>> index 784e266e6aab..da12cbd56408 100644
>>>> --- a/hw/vfio/common.c
>>>> +++ b/hw/vfio/common.c
>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice
>>>> *vbasedev,
>>>>   {
>>>>       const VFIOIOMMUClass *ops =
>>>>          
>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>> -    HostIOMMUDevice *hiod;
>>>> +    HostIOMMUDevice *hiod = NULL;
>>>>         if (vbasedev->iommufd) {
>>>>           ops =
>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>> VFIODevice *vbasedev,
>>>>         assert(ops);
>>>>   -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>> -        return false;
>>>> -    }
>>>>   -    if (vbasedev->mdev) {
>>>> -        return true;
>>>> +    if (!vbasedev->mdev) {
>>>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>> +        vbasedev->hiod = hiod;
>>>>       }
>>>>   -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev,
>>>> errp)) {
>>>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>           object_unref(hiod);
>>>> -        ops->detach_device(vbasedev);
>>>> +        vbasedev->hiod = NULL;
>>>>           return false;
>>>>       }
>>>> -    vbasedev->hiod = hiod;
>>>>         return true;
>>>>   }
>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>> --- a/hw/vfio/container.c
>>>> +++ b/hw/vfio/container.c
>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>> char *name, VFIODevice *vbasedev,
>>>>         trace_vfio_attach_device(vbasedev->name, groupid);
>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>> +        return false;
>>> don't you want to go to err_alloc_ioas instead?
>>
>> hmm, the err_alloc_ioas label is in a different function
>> iommufd_cdev_attach().
>>
>> may be you meant the comment for routine iommufd_cdev_attach() and
>> label err_connect_bind ?
>>
>>
>> Thanks,
>>
>> C.
>>
>>
>>>> +    }
>>>> +
>>>>       group = vfio_get_group(groupid, as, errp);
>>>>       if (!group) {
>>>>           return false;
>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>> --- a/hw/vfio/helpers.c
>>>> +++ b/hw/vfio/helpers.c
>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>>>       subsys = realpath(tmp, NULL);
>>>>       return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>   }
>>>> +
>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>> +{
>>>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>>>> +
>>>> +    if (!hiod) {
>>>> +        return true;
>>>> +    }
>>>> +
>>>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>> vbasedev, errp);
>>>> +}
>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>> --- a/hw/vfio/iommufd.c
>>>> +++ b/hw/vfio/iommufd.c
>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>> *name, VFIODevice *vbasedev,
>>>>         space = vfio_get_address_space(as);
>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>> +        return false;
> Hum sorry my previous comment was targetting that place. I think
> unrolling is needed up to put_address_space
> 
> so effectively this does not match err_alloc_ioas but I guess we would
> need another label
> 

You're right. We haven't yet attached rthe device and that's what err_alloc_ioas
would do. Adding another label not sure would make things cleaner given the
ordering requirement. So maybe this instead?

@@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
*vbasedev,
     space = vfio_get_address_space(as);

     if (!vfio_device_hiod_realize(vbasedev, errp)) {
-        return false;
+        vfio_put_address_space(space);
+        goto err_connect_bind;
     }

     /* try to attach to an existing container in this space */
Cédric Le Goater July 23, 2024, 8:08 a.m. UTC | #7
On 7/23/24 10:05, Joao Martins wrote:
> On 23/07/2024 08:55, Eric Auger wrote:
>>
>>
>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>> On 7/23/24 09:38, Eric Auger wrote:
>>>> Hi Joao,
>>>>
>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>> of the device
>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>> allows the use
>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell
>>>>> if the IOMMU
>>>>> behind the device supports dirty tracking.
>>>>>
>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>> doesn't
>>>>> need any information from the (type1-iommu) backend to be initialized.
>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>>>> iommufd FD to be connected and having a devid to be able to
>>>>> successfully
>>>> Nit: maybe this comment shall be also added in iommufd.c before the call
>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>> earlier at some point
>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>> different places within the backend .attach_device() implementation.
>>>>>
>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>>    include/hw/vfio/vfio-common.h |  1 +
>>>>>    hw/vfio/common.c              | 16 ++++++----------
>>>>>    hw/vfio/container.c           |  4 ++++
>>>>>    hw/vfio/helpers.c             | 11 +++++++++++
>>>>>    hw/vfio/iommufd.c             |  4 ++++
>>>>>    5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>> b/include/hw/vfio/vfio-common.h
>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>>    void vfio_reset_handler(void *opaque);
>>>>>    struct vfio_device_info *vfio_get_device_info(int fd);
>>>>>    bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>>    bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>>                            AddressSpace *as, Error **errp);
>>>>>    void vfio_detach_device(VFIODevice *vbasedev);
>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>> --- a/hw/vfio/common.c
>>>>> +++ b/hw/vfio/common.c
>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice
>>>>> *vbasedev,
>>>>>    {
>>>>>        const VFIOIOMMUClass *ops =
>>>>>           
>>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>> -    HostIOMMUDevice *hiod;
>>>>> +    HostIOMMUDevice *hiod = NULL;
>>>>>          if (vbasedev->iommufd) {
>>>>>            ops =
>>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>> VFIODevice *vbasedev,
>>>>>          assert(ops);
>>>>>    -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>> -        return false;
>>>>> -    }
>>>>>    -    if (vbasedev->mdev) {
>>>>> -        return true;
>>>>> +    if (!vbasedev->mdev) {
>>>>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> +        vbasedev->hiod = hiod;
>>>>>        }
>>>>>    -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev,
>>>>> errp)) {
>>>>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>            object_unref(hiod);
>>>>> -        ops->detach_device(vbasedev);
>>>>> +        vbasedev->hiod = NULL;
>>>>>            return false;
>>>>>        }
>>>>> -    vbasedev->hiod = hiod;
>>>>>          return true;
>>>>>    }
>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>> --- a/hw/vfio/container.c
>>>>> +++ b/hw/vfio/container.c
>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>> char *name, VFIODevice *vbasedev,
>>>>>          trace_vfio_attach_device(vbasedev->name, groupid);
>>>>>    +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> +        return false;
>>>> don't you want to go to err_alloc_ioas instead?
>>>
>>> hmm, the err_alloc_ioas label is in a different function
>>> iommufd_cdev_attach().
>>>
>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>> label err_connect_bind ?
>>>
>>>
>>> Thanks,
>>>
>>> C.
>>>
>>>
>>>>> +    }
>>>>> +
>>>>>        group = vfio_get_group(groupid, as, errp);
>>>>>        if (!group) {
>>>>>            return false;
>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>> --- a/hw/vfio/helpers.c
>>>>> +++ b/hw/vfio/helpers.c
>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>>>>        subsys = realpath(tmp, NULL);
>>>>>        return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>>    }
>>>>> +
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>> +{
>>>>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>> +
>>>>> +    if (!hiod) {
>>>>> +        return true;
>>>>> +    }
>>>>> +
>>>>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>> vbasedev, errp);
>>>>> +}
>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>> --- a/hw/vfio/iommufd.c
>>>>> +++ b/hw/vfio/iommufd.c
>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>> *name, VFIODevice *vbasedev,
>>>>>          space = vfio_get_address_space(as);
>>>>>    +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> +        return false;
>> Hum sorry my previous comment was targetting that place. I think
>> unrolling is needed up to put_address_space
>>
>> so effectively this does not match err_alloc_ioas but I guess we would
>> need another label
>>
> 
> You're right. We haven't yet attached rthe device and that's what err_alloc_ioas
> would do. Adding another label not sure would make things cleaner given the
> ordering requirement. So maybe this instead?
> 
> @@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
> *vbasedev,
>       space = vfio_get_address_space(as);
> 
>       if (!vfio_device_hiod_realize(vbasedev, errp)) {
> -        return false;
> +        vfio_put_address_space(space);
> +        goto err_connect_bind;
>       }
> 
>       /* try to attach to an existing container in this space */
> 

LGTM.


Thanks,

C.
Eric Auger July 23, 2024, 8:10 a.m. UTC | #8
On 7/23/24 10:05, Joao Martins wrote:
> On 23/07/2024 08:55, Eric Auger wrote:
>>
>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>> On 7/23/24 09:38, Eric Auger wrote:
>>>> Hi Joao,
>>>>
>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>> of the device
>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>> allows the use
>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially tell
>>>>> if the IOMMU
>>>>> behind the device supports dirty tracking.
>>>>>
>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>> doesn't
>>>>> need any information from the (type1-iommu) backend to be initialized.
>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires the
>>>>> iommufd FD to be connected and having a devid to be able to
>>>>> successfully
>>>> Nit: maybe this comment shall be also added in iommufd.c before the call
>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>> earlier at some point
>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>> different places within the backend .attach_device() implementation.
>>>>>
>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>>   include/hw/vfio/vfio-common.h |  1 +
>>>>>   hw/vfio/common.c              | 16 ++++++----------
>>>>>   hw/vfio/container.c           |  4 ++++
>>>>>   hw/vfio/helpers.c             | 11 +++++++++++
>>>>>   hw/vfio/iommufd.c             |  4 ++++
>>>>>   5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>> b/include/hw/vfio/vfio-common.h
>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>>   void vfio_reset_handler(void *opaque);
>>>>>   struct vfio_device_info *vfio_get_device_info(int fd);
>>>>>   bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>>   bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>>                           AddressSpace *as, Error **errp);
>>>>>   void vfio_detach_device(VFIODevice *vbasedev);
>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>> --- a/hw/vfio/common.c
>>>>> +++ b/hw/vfio/common.c
>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name, VFIODevice
>>>>> *vbasedev,
>>>>>   {
>>>>>       const VFIOIOMMUClass *ops =
>>>>>          
>>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>> -    HostIOMMUDevice *hiod;
>>>>> +    HostIOMMUDevice *hiod = NULL;
>>>>>         if (vbasedev->iommufd) {
>>>>>           ops =
>>>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>> VFIODevice *vbasedev,
>>>>>         assert(ops);
>>>>>   -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>> -        return false;
>>>>> -    }
>>>>>   -    if (vbasedev->mdev) {
>>>>> -        return true;
>>>>> +    if (!vbasedev->mdev) {
>>>>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> +        vbasedev->hiod = hiod;
>>>>>       }
>>>>>   -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev,
>>>>> errp)) {
>>>>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>           object_unref(hiod);
>>>>> -        ops->detach_device(vbasedev);
>>>>> +        vbasedev->hiod = NULL;
>>>>>           return false;
>>>>>       }
>>>>> -    vbasedev->hiod = hiod;
>>>>>         return true;
>>>>>   }
>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>> --- a/hw/vfio/container.c
>>>>> +++ b/hw/vfio/container.c
>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>> char *name, VFIODevice *vbasedev,
>>>>>         trace_vfio_attach_device(vbasedev->name, groupid);
>>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> +        return false;
>>>> don't you want to go to err_alloc_ioas instead?
>>> hmm, the err_alloc_ioas label is in a different function
>>> iommufd_cdev_attach().
>>>
>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>> label err_connect_bind ?
>>>
>>>
>>> Thanks,
>>>
>>> C.
>>>
>>>
>>>>> +    }
>>>>> +
>>>>>       group = vfio_get_group(groupid, as, errp);
>>>>>       if (!group) {
>>>>>           return false;
>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>> --- a/hw/vfio/helpers.c
>>>>> +++ b/hw/vfio/helpers.c
>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice *vbasedev)
>>>>>       subsys = realpath(tmp, NULL);
>>>>>       return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>>   }
>>>>> +
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>> +{
>>>>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>> +
>>>>> +    if (!hiod) {
>>>>> +        return true;
>>>>> +    }
>>>>> +
>>>>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>> vbasedev, errp);
>>>>> +}
>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>> --- a/hw/vfio/iommufd.c
>>>>> +++ b/hw/vfio/iommufd.c
>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>> *name, VFIODevice *vbasedev,
>>>>>         space = vfio_get_address_space(as);
>>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> +        return false;
>> Hum sorry my previous comment was targetting that place. I think
>> unrolling is needed up to put_address_space
>>
>> so effectively this does not match err_alloc_ioas but I guess we would
>> need another label
>>
> You're right. We haven't yet attached rthe device and that's what err_alloc_ioas
> would do. Adding another label not sure would make things cleaner given the
> ordering requirement. So maybe this instead?
>
> @@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice
> *vbasedev,
>      space = vfio_get_address_space(as);
>
>      if (!vfio_device_hiod_realize(vbasedev, errp)) {
> -        return false;
> +        vfio_put_address_space(space);
> +        goto err_connect_bind;
>      }
>
>      /* try to attach to an existing container in this space */
>
With that addition
Reviewed-by: Eric Auger <eric.auger@redhat.com>

Eric
Duan, Zhenzhong July 23, 2024, 8:20 a.m. UTC | #9
>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Subject: Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke
>HostIOMMUDevice::realize() during attach_device()
>
>On 23/07/2024 08:55, Eric Auger wrote:
>>
>>
>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>> On 7/23/24 09:38, Eric Auger wrote:
>>>> Hi Joao,
>>>>
>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>> of the device
>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>> allows the use
>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially
>tell
>>>>> if the IOMMU
>>>>> behind the device supports dirty tracking.
>>>>>
>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>> doesn't
>>>>> need any information from the (type1-iommu) backend to be
>initialized.
>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires
>the
>>>>> iommufd FD to be connected and having a devid to be able to
>>>>> successfully
>>>> Nit: maybe this comment shall be also added in iommufd.c before the
>call
>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>> earlier at some point
>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>> different places within the backend .attach_device() implementation.
>>>>>
>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>>   include/hw/vfio/vfio-common.h |  1 +
>>>>>   hw/vfio/common.c              | 16 ++++++----------
>>>>>   hw/vfio/container.c           |  4 ++++
>>>>>   hw/vfio/helpers.c             | 11 +++++++++++
>>>>>   hw/vfio/iommufd.c             |  4 ++++
>>>>>   5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>> b/include/hw/vfio/vfio-common.h
>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>>   void vfio_reset_handler(void *opaque);
>>>>>   struct vfio_device_info *vfio_get_device_info(int fd);
>>>>>   bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>>   bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>>                           AddressSpace *as, Error **errp);
>>>>>   void vfio_detach_device(VFIODevice *vbasedev);
>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>> --- a/hw/vfio/common.c
>>>>> +++ b/hw/vfio/common.c
>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name,
>VFIODevice
>>>>> *vbasedev,
>>>>>   {
>>>>>       const VFIOIOMMUClass *ops =
>>>>>
>>>>>
>VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>> -    HostIOMMUDevice *hiod;
>>>>> +    HostIOMMUDevice *hiod = NULL;
>>>>>         if (vbasedev->iommufd) {
>>>>>           ops =
>>>>>
>VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUF
>D));
>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>> VFIODevice *vbasedev,
>>>>>         assert(ops);
>>>>>   -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>> -        return false;
>>>>> -    }
>>>>>   -    if (vbasedev->mdev) {
>>>>> -        return true;
>>>>> +    if (!vbasedev->mdev) {
>>>>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops-
>>hiod_typename));
>>>>> +        vbasedev->hiod = hiod;
>>>>>       }
>>>>>   -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>vbasedev,
>>>>> errp)) {
>>>>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>           object_unref(hiod);
>>>>> -        ops->detach_device(vbasedev);
>>>>> +        vbasedev->hiod = NULL;
>>>>>           return false;
>>>>>       }
>>>>> -    vbasedev->hiod = hiod;
>>>>>         return true;
>>>>>   }
>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>> --- a/hw/vfio/container.c
>>>>> +++ b/hw/vfio/container.c
>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>> char *name, VFIODevice *vbasedev,
>>>>>         trace_vfio_attach_device(vbasedev->name, groupid);
>>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> +        return false;
>>>> don't you want to go to err_alloc_ioas instead?
>>>
>>> hmm, the err_alloc_ioas label is in a different function
>>> iommufd_cdev_attach().
>>>
>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>> label err_connect_bind ?
>>>
>>>
>>> Thanks,
>>>
>>> C.
>>>
>>>
>>>>> +    }
>>>>> +
>>>>>       group = vfio_get_group(groupid, as, errp);
>>>>>       if (!group) {
>>>>>           return false;
>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>> --- a/hw/vfio/helpers.c
>>>>> +++ b/hw/vfio/helpers.c
>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice
>*vbasedev)
>>>>>       subsys = realpath(tmp, NULL);
>>>>>       return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>>   }
>>>>> +
>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>> +{
>>>>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>> +
>>>>> +    if (!hiod) {
>>>>> +        return true;
>>>>> +    }
>>>>> +
>>>>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>> vbasedev, errp);
>>>>> +}
>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>> --- a/hw/vfio/iommufd.c
>>>>> +++ b/hw/vfio/iommufd.c
>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>> *name, VFIODevice *vbasedev,
>>>>>         space = vfio_get_address_space(as);
>>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>> +        return false;
>> Hum sorry my previous comment was targetting that place. I think
>> unrolling is needed up to put_address_space
>>
>> so effectively this does not match err_alloc_ioas but I guess we would
>> need another label
>>
>
>You're right. We haven't yet attached rthe device and that's what
>err_alloc_ioas
>would do. Adding another label not sure would make things cleaner given
>the
>ordering requirement. So maybe this instead?
>
>@@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name,
>VFIODevice
>*vbasedev,
>     space = vfio_get_address_space(as);
>
>     if (!vfio_device_hiod_realize(vbasedev, errp)) {
>-        return false;
>+        vfio_put_address_space(space);
>+        goto err_connect_bind;
>     }
>
>     /* try to attach to an existing container in this space */

I was confused though Cedric and Eric both ACK this change. Don't we miss the iommufd_cdev_unbind_and_disconnect() call?
Eric Auger July 23, 2024, 8:24 a.m. UTC | #10
On 7/23/24 10:20, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Joao Martins <joao.m.martins@oracle.com>
>> Subject: Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke
>> HostIOMMUDevice::realize() during attach_device()
>>
>> On 23/07/2024 08:55, Eric Auger wrote:
>>>
>>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>>> On 7/23/24 09:38, Eric Auger wrote:
>>>>> Hi Joao,
>>>>>
>>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>>> of the device
>>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>>> allows the use
>>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially
>> tell
>>>>>> if the IOMMU
>>>>>> behind the device supports dirty tracking.
>>>>>>
>>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>>> doesn't
>>>>>> need any information from the (type1-iommu) backend to be
>> initialized.
>>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires
>> the
>>>>>> iommufd FD to be connected and having a devid to be able to
>>>>>> successfully
>>>>> Nit: maybe this comment shall be also added in iommufd.c before the
>> call
>>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>>> earlier at some point
>>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>>> different places within the backend .attach_device() implementation.
>>>>>>
>>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>>> ---
>>>>>>   include/hw/vfio/vfio-common.h |  1 +
>>>>>>   hw/vfio/common.c              | 16 ++++++----------
>>>>>>   hw/vfio/container.c           |  4 ++++
>>>>>>   hw/vfio/helpers.c             | 11 +++++++++++
>>>>>>   hw/vfio/iommufd.c             |  4 ++++
>>>>>>   5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>>
>>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>>> b/include/hw/vfio/vfio-common.h
>>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>>>   void vfio_reset_handler(void *opaque);
>>>>>>   struct vfio_device_info *vfio_get_device_info(int fd);
>>>>>>   bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>>>   bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>>>                           AddressSpace *as, Error **errp);
>>>>>>   void vfio_detach_device(VFIODevice *vbasedev);
>>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>>> --- a/hw/vfio/common.c
>>>>>> +++ b/hw/vfio/common.c
>>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name,
>> VFIODevice
>>>>>> *vbasedev,
>>>>>>   {
>>>>>>       const VFIOIOMMUClass *ops =
>>>>>>
>>>>>>
>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>>> -    HostIOMMUDevice *hiod;
>>>>>> +    HostIOMMUDevice *hiod = NULL;
>>>>>>         if (vbasedev->iommufd) {
>>>>>>           ops =
>>>>>>
>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUF
>> D));
>>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>>> VFIODevice *vbasedev,
>>>>>>         assert(ops);
>>>>>>   -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>> -        return false;
>>>>>> -    }
>>>>>>   -    if (vbasedev->mdev) {
>>>>>> -        return true;
>>>>>> +    if (!vbasedev->mdev) {
>>>>>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops-
>>> hiod_typename));
>>>>>> +        vbasedev->hiod = hiod;
>>>>>>       }
>>>>>>   -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>> vbasedev,
>>>>>> errp)) {
>>>>>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>>           object_unref(hiod);
>>>>>> -        ops->detach_device(vbasedev);
>>>>>> +        vbasedev->hiod = NULL;
>>>>>>           return false;
>>>>>>       }
>>>>>> -    vbasedev->hiod = hiod;
>>>>>>         return true;
>>>>>>   }
>>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>>> --- a/hw/vfio/container.c
>>>>>> +++ b/hw/vfio/container.c
>>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>>> char *name, VFIODevice *vbasedev,
>>>>>>         trace_vfio_attach_device(vbasedev->name, groupid);
>>>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>>> +        return false;
>>>>> don't you want to go to err_alloc_ioas instead?
>>>> hmm, the err_alloc_ioas label is in a different function
>>>> iommufd_cdev_attach().
>>>>
>>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>>> label err_connect_bind ?
>>>>
>>>>
>>>> Thanks,
>>>>
>>>> C.
>>>>
>>>>
>>>>>> +    }
>>>>>> +
>>>>>>       group = vfio_get_group(groupid, as, errp);
>>>>>>       if (!group) {
>>>>>>           return false;
>>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>>> --- a/hw/vfio/helpers.c
>>>>>> +++ b/hw/vfio/helpers.c
>>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice
>> *vbasedev)
>>>>>>       subsys = realpath(tmp, NULL);
>>>>>>       return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>>>   }
>>>>>> +
>>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>>> +{
>>>>>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>>> +
>>>>>> +    if (!hiod) {
>>>>>> +        return true;
>>>>>> +    }
>>>>>> +
>>>>>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>>> vbasedev, errp);
>>>>>> +}
>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>>> --- a/hw/vfio/iommufd.c
>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>>> *name, VFIODevice *vbasedev,
>>>>>>         space = vfio_get_address_space(as);
>>>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>>> +        return false;
>>> Hum sorry my previous comment was targetting that place. I think
>>> unrolling is needed up to put_address_space
>>>
>>> so effectively this does not match err_alloc_ioas but I guess we would
>>> need another label
>>>
>> You're right. We haven't yet attached rthe device and that's what
>> err_alloc_ioas
>> would do. Adding another label not sure would make things cleaner given
>> the
>> ordering requirement. So maybe this instead?
>>
>> @@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name,
>> VFIODevice
>> *vbasedev,
>>     space = vfio_get_address_space(as);
>>
>>     if (!vfio_device_hiod_realize(vbasedev, errp)) {
>> -        return false;
>> +        vfio_put_address_space(space);
>> +        goto err_connect_bind;
>>     }
>>
>>     /* try to attach to an existing container in this space */
> I was confused though Cedric and Eric both ACK this change. Don't we miss the iommufd_cdev_unbind_and_disconnect() call?
Hum yes you're right. connect and bind was done. I thought this was done
later. so err_alloc_ioas label looks good

Eric
>
Joao Martins July 23, 2024, 8:26 a.m. UTC | #11
On 23/07/2024 09:24, Eric Auger wrote:
> 
> 
> On 7/23/24 10:20, Duan, Zhenzhong wrote:
>>
>>> -----Original Message-----
>>> From: Joao Martins <joao.m.martins@oracle.com>
>>> Subject: Re: [PATCH v6 4/9] vfio/{iommufd,container}: Invoke
>>> HostIOMMUDevice::realize() during attach_device()
>>>
>>> On 23/07/2024 08:55, Eric Auger wrote:
>>>>
>>>> On 7/23/24 09:44, Cédric Le Goater wrote:
>>>>> On 7/23/24 09:38, Eric Auger wrote:
>>>>>> Hi Joao,
>>>>>>
>>>>>> On 7/22/24 23:13, Joao Martins wrote:
>>>>>>> Move the HostIOMMUDevice::realize() to be invoked during the attach
>>>>>>> of the device
>>>>>>> before we allocate IOMMUFD hardware pagetable objects (HWPT). This
>>>>>>> allows the use
>>>>>>> of the hw_caps obtained by IOMMU_GET_HW_INFO that essentially
>>> tell
>>>>>>> if the IOMMU
>>>>>>> behind the device supports dirty tracking.
>>>>>>>
>>>>>>> Note: The HostIOMMUDevice data from legacy backend is static and
>>>>>>> doesn't
>>>>>>> need any information from the (type1-iommu) backend to be
>>> initialized.
>>>>>>> In contrast however, the IOMMUFD HostIOMMUDevice data requires
>>> the
>>>>>>> iommufd FD to be connected and having a devid to be able to
>>>>>>> successfully
>>>>>> Nit: maybe this comment shall be also added in iommufd.c before the
>>> call
>>>>>> to vfio_device_hiod_realize() to avoid someone else to move that call
>>>>>> earlier at some point
>>>>>>> GET_HW_INFO. This means vfio_device_hiod_realize() is called in
>>>>>>> different places within the backend .attach_device() implementation.
>>>>>>>
>>>>>>> Suggested-by: Cédric Le Goater <clg@redhat.cm>
>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>>>> ---
>>>>>>>   include/hw/vfio/vfio-common.h |  1 +
>>>>>>>   hw/vfio/common.c              | 16 ++++++----------
>>>>>>>   hw/vfio/container.c           |  4 ++++
>>>>>>>   hw/vfio/helpers.c             | 11 +++++++++++
>>>>>>>   hw/vfio/iommufd.c             |  4 ++++
>>>>>>>   5 files changed, 26 insertions(+), 10 deletions(-)
>>>>>>>
>>>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>>>> b/include/hw/vfio/vfio-common.h
>>>>>>> index 1a96678f8c38..4e44b26d3c45 100644
>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>> @@ -242,6 +242,7 @@ void vfio_region_finalize(VFIORegion *region);
>>>>>>>   void vfio_reset_handler(void *opaque);
>>>>>>>   struct vfio_device_info *vfio_get_device_info(int fd);
>>>>>>>   bool vfio_device_is_mdev(VFIODevice *vbasedev);
>>>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
>>>>>>>   bool vfio_attach_device(char *name, VFIODevice *vbasedev,
>>>>>>>                           AddressSpace *as, Error **errp);
>>>>>>>   void vfio_detach_device(VFIODevice *vbasedev);
>>>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>>>> index 784e266e6aab..da12cbd56408 100644
>>>>>>> --- a/hw/vfio/common.c
>>>>>>> +++ b/hw/vfio/common.c
>>>>>>> @@ -1537,7 +1537,7 @@ bool vfio_attach_device(char *name,
>>> VFIODevice
>>>>>>> *vbasedev,
>>>>>>>   {
>>>>>>>       const VFIOIOMMUClass *ops =
>>>>>>>
>>>>>>>
>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
>>>>>>> -    HostIOMMUDevice *hiod;
>>>>>>> +    HostIOMMUDevice *hiod = NULL;
>>>>>>>         if (vbasedev->iommufd) {
>>>>>>>           ops =
>>>>>>>
>>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUF
>>> D));
>>>>>>> @@ -1545,21 +1545,17 @@ bool vfio_attach_device(char *name,
>>>>>>> VFIODevice *vbasedev,
>>>>>>>         assert(ops);
>>>>>>>   -    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>>> -        return false;
>>>>>>> -    }
>>>>>>>   -    if (vbasedev->mdev) {
>>>>>>> -        return true;
>>>>>>> +    if (!vbasedev->mdev) {
>>>>>>> +        hiod = HOST_IOMMU_DEVICE(object_new(ops-
>>>> hiod_typename));
>>>>>>> +        vbasedev->hiod = hiod;
>>>>>>>       }
>>>>>>>   -    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
>>>>>>> -    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>> vbasedev,
>>>>>>> errp)) {
>>>>>>> +    if (!ops->attach_device(name, vbasedev, as, errp)) {
>>>>>>>           object_unref(hiod);
>>>>>>> -        ops->detach_device(vbasedev);
>>>>>>> +        vbasedev->hiod = NULL;
>>>>>>>           return false;
>>>>>>>       }
>>>>>>> -    vbasedev->hiod = hiod;
>>>>>>>         return true;
>>>>>>>   }
>>>>>>> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>>>>>>> index 10cb4b4320ac..9ccdb639ac84 100644
>>>>>>> --- a/hw/vfio/container.c
>>>>>>> +++ b/hw/vfio/container.c
>>>>>>> @@ -914,6 +914,10 @@ static bool vfio_legacy_attach_device(const
>>>>>>> char *name, VFIODevice *vbasedev,
>>>>>>>         trace_vfio_attach_device(vbasedev->name, groupid);
>>>>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>>>> +        return false;
>>>>>> don't you want to go to err_alloc_ioas instead?
>>>>> hmm, the err_alloc_ioas label is in a different function
>>>>> iommufd_cdev_attach().
>>>>>
>>>>> may be you meant the comment for routine iommufd_cdev_attach() and
>>>>> label err_connect_bind ?
>>>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>> C.
>>>>>
>>>>>
>>>>>>> +    }
>>>>>>> +
>>>>>>>       group = vfio_get_group(groupid, as, errp);
>>>>>>>       if (!group) {
>>>>>>>           return false;
>>>>>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
>>>>>>> index 7e23e9080c9d..ea15c79db0a3 100644
>>>>>>> --- a/hw/vfio/helpers.c
>>>>>>> +++ b/hw/vfio/helpers.c
>>>>>>> @@ -689,3 +689,14 @@ bool vfio_device_is_mdev(VFIODevice
>>> *vbasedev)
>>>>>>>       subsys = realpath(tmp, NULL);
>>>>>>>       return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
>>>>>>>   }
>>>>>>> +
>>>>>>> +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
>>>>>>> +{
>>>>>>> +    HostIOMMUDevice *hiod = vbasedev->hiod;
>>>>>>> +
>>>>>>> +    if (!hiod) {
>>>>>>> +        return true;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod,
>>>>>>> vbasedev, errp);
>>>>>>> +}
>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>> index 5e2fc1ce089d..2324bf892c56 100644
>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>> @@ -403,6 +403,10 @@ static bool iommufd_cdev_attach(const char
>>>>>>> *name, VFIODevice *vbasedev,
>>>>>>>         space = vfio_get_address_space(as);
>>>>>>>   +    if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>>>>>> +        return false;
>>>> Hum sorry my previous comment was targetting that place. I think
>>>> unrolling is needed up to put_address_space
>>>>
>>>> so effectively this does not match err_alloc_ioas but I guess we would
>>>> need another label
>>>>
>>> You're right. We haven't yet attached rthe device and that's what
>>> err_alloc_ioas
>>> would do. Adding another label not sure would make things cleaner given
>>> the
>>> ordering requirement. So maybe this instead?
>>>
>>> @@ -482,7 +483,8 @@ static bool iommufd_cdev_attach(const char *name,
>>> VFIODevice
>>> *vbasedev,
>>>     space = vfio_get_address_space(as);
>>>
>>>     if (!vfio_device_hiod_realize(vbasedev, errp)) {
>>> -        return false;
>>> +        vfio_put_address_space(space);
>>> +        goto err_connect_bind;
>>>     }
>>>
>>>     /* try to attach to an existing container in this space */
>> I was confused though Cedric and Eric both ACK this change. Don't we miss the iommufd_cdev_unbind_and_disconnect() call?
> Hum yes you're right. connect and bind was done. I thought this was done
> later. so err_alloc_ioas label looks good
> 
It seems you were right the first time.

I definitely haven't got coffee yet.
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 1a96678f8c38..4e44b26d3c45 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -242,6 +242,7 @@  void vfio_region_finalize(VFIORegion *region);
 void vfio_reset_handler(void *opaque);
 struct vfio_device_info *vfio_get_device_info(int fd);
 bool vfio_device_is_mdev(VFIODevice *vbasedev);
+bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
 bool vfio_attach_device(char *name, VFIODevice *vbasedev,
                         AddressSpace *as, Error **errp);
 void vfio_detach_device(VFIODevice *vbasedev);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 784e266e6aab..da12cbd56408 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1537,7 +1537,7 @@  bool vfio_attach_device(char *name, VFIODevice *vbasedev,
 {
     const VFIOIOMMUClass *ops =
         VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
-    HostIOMMUDevice *hiod;
+    HostIOMMUDevice *hiod = NULL;
 
     if (vbasedev->iommufd) {
         ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
@@ -1545,21 +1545,17 @@  bool vfio_attach_device(char *name, VFIODevice *vbasedev,
 
     assert(ops);
 
-    if (!ops->attach_device(name, vbasedev, as, errp)) {
-        return false;
-    }
 
-    if (vbasedev->mdev) {
-        return true;
+    if (!vbasedev->mdev) {
+        hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
+        vbasedev->hiod = hiod;
     }
 
-    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
-    if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
+    if (!ops->attach_device(name, vbasedev, as, errp)) {
         object_unref(hiod);
-        ops->detach_device(vbasedev);
+        vbasedev->hiod = NULL;
         return false;
     }
-    vbasedev->hiod = hiod;
 
     return true;
 }
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 10cb4b4320ac..9ccdb639ac84 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -914,6 +914,10 @@  static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
 
     trace_vfio_attach_device(vbasedev->name, groupid);
 
+    if (!vfio_device_hiod_realize(vbasedev, errp)) {
+        return false;
+    }
+
     group = vfio_get_group(groupid, as, errp);
     if (!group) {
         return false;
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 7e23e9080c9d..ea15c79db0a3 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -689,3 +689,14 @@  bool vfio_device_is_mdev(VFIODevice *vbasedev)
     subsys = realpath(tmp, NULL);
     return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
 }
+
+bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
+{
+    HostIOMMUDevice *hiod = vbasedev->hiod;
+
+    if (!hiod) {
+        return true;
+    }
+
+    return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 5e2fc1ce089d..2324bf892c56 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -403,6 +403,10 @@  static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
 
     space = vfio_get_address_space(as);
 
+    if (!vfio_device_hiod_realize(vbasedev, errp)) {
+        return false;
+    }
+
     /* try to attach to an existing container in this space */
     QLIST_FOREACH(bcontainer, &space->containers, next) {
         container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);