diff mbox series

[v5,12/13] vfio/migration: Don't block migration device dirty tracking is unsupported

Message ID 20240719120501.81279-13-joao.m.martins@oracle.com (mailing list archive)
State New, archived
Headers show
Series hw/iommufd: IOMMUFD Dirty Tracking | expand

Commit Message

Joao Martins July 19, 2024, 12:05 p.m. UTC
By default VFIO migration is set to auto, which will support live
migration if the migration capability is set *and* also dirty page
tracking is supported.

For testing purposes one can force enable without dirty page tracking
via enable-migration=on, but that option is generally left for testing
purposes.

So starting with IOMMU dirty tracking it can use to accomodate the lack of
VF dirty page tracking allowing us to minimize the VF requirements for
migration and thus enabling migration by default for those too.

While at it change the error messages to mention IOMMU dirty tracking as
well.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 include/hw/vfio/vfio-common.h |  1 +
 hw/vfio/iommufd.c             |  2 +-
 hw/vfio/migration.c           | 11 ++++++-----
 3 files changed, 8 insertions(+), 6 deletions(-)

Comments

Cédric Le Goater July 19, 2024, 2:17 p.m. UTC | #1
On 7/19/24 14:05, Joao Martins wrote:
> By default VFIO migration is set to auto, which will support live
> migration if the migration capability is set *and* also dirty page
> tracking is supported.
> 
> For testing purposes one can force enable without dirty page tracking
> via enable-migration=on, but that option is generally left for testing
> purposes.
> 
> So starting with IOMMU dirty tracking it can use to accomodate the lack of
> VF dirty page tracking allowing us to minimize the VF requirements for
> migration and thus enabling migration by default for those too.
> 
> While at it change the error messages to mention IOMMU dirty tracking as
> well.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>   include/hw/vfio/vfio-common.h |  1 +
>   hw/vfio/iommufd.c             |  2 +-
>   hw/vfio/migration.c           | 11 ++++++-----
>   3 files changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 7e530c7869dc..00b9e933449e 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
>                   VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>   int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>                             uint64_t size, ram_addr_t ram_addr, Error **errp);
> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>   
>   /* Returns 0 on success, or a negative errno. */
>   bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 7dd5d43ce06a..a998e8578552 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -111,7 +111,7 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
>       iommufd_backend_disconnect(vbasedev->iommufd);
>   }
>   
> -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>   {
>       return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>   }
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 34d4be2ce1b1..63ffa46c9652 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>           return !vfio_block_migration(vbasedev, err, errp);
>       }
>   
> -    if (!vbasedev->dirty_pages_supported) {
> +    if (!vbasedev->dirty_pages_supported &&
> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {


Some platforms do not have IOMMUFD support and this call will need
some kind of abstract wrapper to reflect dirty tracking support in
the IOMMU backend.

Thanks,

C.



>           if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
>               error_setg(&err,
> -                       "%s: VFIO device doesn't support device dirty tracking",
> -                       vbasedev->name);
> +                       "%s: VFIO device doesn't support device and "
> +                       "IOMMU dirty tracking", vbasedev->name);
>               goto add_blocker;
>           }
>   
> -        warn_report("%s: VFIO device doesn't support device dirty tracking",
> -                    vbasedev->name);
> +        warn_report("%s: VFIO device doesn't support device and "
> +                    "IOMMU dirty tracking", vbasedev->name);
>       }
>   
>       ret = vfio_block_multiple_devices_migration(vbasedev, errp);
Joao Martins July 19, 2024, 2:24 p.m. UTC | #2
On 19/07/2024 15:17, Cédric Le Goater wrote:
> On 7/19/24 14:05, Joao Martins wrote:
>> By default VFIO migration is set to auto, which will support live
>> migration if the migration capability is set *and* also dirty page
>> tracking is supported.
>>
>> For testing purposes one can force enable without dirty page tracking
>> via enable-migration=on, but that option is generally left for testing
>> purposes.
>>
>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>> VF dirty page tracking allowing us to minimize the VF requirements for
>> migration and thus enabling migration by default for those too.
>>
>> While at it change the error messages to mention IOMMU dirty tracking as
>> well.
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>>   include/hw/vfio/vfio-common.h |  1 +
>>   hw/vfio/iommufd.c             |  2 +-
>>   hw/vfio/migration.c           | 11 ++++++-----
>>   3 files changed, 8 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 7e530c7869dc..00b9e933449e 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>> VFIOContainerBase *bcontainer,
>>                   VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>   int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>>                             uint64_t size, ram_addr_t ram_addr, Error **errp);
>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>     /* Returns 0 on success, or a negative errno. */
>>   bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 7dd5d43ce06a..a998e8578552 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -111,7 +111,7 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice
>> *vbasedev)
>>       iommufd_backend_disconnect(vbasedev->iommufd);
>>   }
>>   -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>   {
>>       return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>   }
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 34d4be2ce1b1..63ffa46c9652 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>> Error **errp)
>>           return !vfio_block_migration(vbasedev, err, errp);
>>       }
>>   -    if (!vbasedev->dirty_pages_supported) {
>> +    if (!vbasedev->dirty_pages_supported &&
>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
> 
> 
> Some platforms do not have IOMMUFD support and this call will need
> some kind of abstract wrapper to reflect dirty tracking support in
> the IOMMU backend.
> 

This was actually on purpose because only IOMMUFD presents a view of hardware
whereas type1 supporting dirty page tracking is not used as means to 'migration
is supported'.

The hwpt is nil in type1 and the helper checks that, so it should return false.

> Thanks,
> 
> C.
> 
> 
> 
>>           if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
>>               error_setg(&err,
>> -                       "%s: VFIO device doesn't support device dirty tracking",
>> -                       vbasedev->name);
>> +                       "%s: VFIO device doesn't support device and "
>> +                       "IOMMU dirty tracking", vbasedev->name);
>>               goto add_blocker;
>>           }
>>   -        warn_report("%s: VFIO device doesn't support device dirty tracking",
>> -                    vbasedev->name);
>> +        warn_report("%s: VFIO device doesn't support device and "
>> +                    "IOMMU dirty tracking", vbasedev->name);
>>       }
>>         ret = vfio_block_multiple_devices_migration(vbasedev, errp);
>
Joao Martins July 19, 2024, 3:32 p.m. UTC | #3
On 19/07/2024 15:24, Joao Martins wrote:
> On 19/07/2024 15:17, Cédric Le Goater wrote:
>> On 7/19/24 14:05, Joao Martins wrote:
>>> By default VFIO migration is set to auto, which will support live
>>> migration if the migration capability is set *and* also dirty page
>>> tracking is supported.
>>>
>>> For testing purposes one can force enable without dirty page tracking
>>> via enable-migration=on, but that option is generally left for testing
>>> purposes.
>>>
>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>> migration and thus enabling migration by default for those too.
>>>
>>> While at it change the error messages to mention IOMMU dirty tracking as
>>> well.
>>>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> ---
>>>   include/hw/vfio/vfio-common.h |  1 +
>>>   hw/vfio/iommufd.c             |  2 +-
>>>   hw/vfio/migration.c           | 11 ++++++-----
>>>   3 files changed, 8 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 7e530c7869dc..00b9e933449e 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>> VFIOContainerBase *bcontainer,
>>>                   VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>>   int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>>>                             uint64_t size, ram_addr_t ram_addr, Error **errp);
>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>     /* Returns 0 on success, or a negative errno. */
>>>   bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 7dd5d43ce06a..a998e8578552 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -111,7 +111,7 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice
>>> *vbasedev)
>>>       iommufd_backend_disconnect(vbasedev->iommufd);
>>>   }
>>>   -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>   {
>>>       return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>   }
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>> Error **errp)
>>>           return !vfio_block_migration(vbasedev, err, errp);
>>>       }
>>>   -    if (!vbasedev->dirty_pages_supported) {
>>> +    if (!vbasedev->dirty_pages_supported &&
>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>
>>
>> Some platforms do not have IOMMUFD support and this call will need
>> some kind of abstract wrapper to reflect dirty tracking support in
>> the IOMMU backend.
>>
> 
> This was actually on purpose because only IOMMUFD presents a view of hardware
> whereas type1 supporting dirty page tracking is not used as means to 'migration
> is supported'.
> 
> The hwpt is nil in type1 and the helper checks that, so it should return false.
> 

Unless of course I misunderstood you.

This check is IOMMUFD specific, because it's one the mirroring hw support and
can be used to unblock live migration. Since initial VFIO live migration support
that type1 dirty tracking wasn't included in the checks that allow live
migration to occur. Another way of saying this is that: with type1 even if
dirty_pages_supported is true, we always add a live migration blocker in case
device doesn't have dirty page tracking. The change above is just meant to use
IOMMUFD dirty tracking which is hardware dependent and not block live migration.

>> Thanks,
>>
>> C.
>>
>>
>>
>>>           if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
>>>               error_setg(&err,
>>> -                       "%s: VFIO device doesn't support device dirty tracking",
>>> -                       vbasedev->name);
>>> +                       "%s: VFIO device doesn't support device and "
>>> +                       "IOMMU dirty tracking", vbasedev->name);
>>>               goto add_blocker;
>>>           }
>>>   -        warn_report("%s: VFIO device doesn't support device dirty tracking",
>>> -                    vbasedev->name);
>>> +        warn_report("%s: VFIO device doesn't support device and "
>>> +                    "IOMMU dirty tracking", vbasedev->name);
>>>       }
>>>         ret = vfio_block_multiple_devices_migration(vbasedev, errp);
>>
>
Joao Martins July 19, 2024, 5:26 p.m. UTC | #4
On 19/07/2024 15:24, Joao Martins wrote:
> On 19/07/2024 15:17, Cédric Le Goater wrote:
>> On 7/19/24 14:05, Joao Martins wrote:
>>> By default VFIO migration is set to auto, which will support live
>>> migration if the migration capability is set *and* also dirty page
>>> tracking is supported.
>>>
>>> For testing purposes one can force enable without dirty page tracking
>>> via enable-migration=on, but that option is generally left for testing
>>> purposes.
>>>
>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>> migration and thus enabling migration by default for those too.
>>>
>>> While at it change the error messages to mention IOMMU dirty tracking as
>>> well.
>>>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> ---
>>>   include/hw/vfio/vfio-common.h |  1 +
>>>   hw/vfio/iommufd.c             |  2 +-
>>>   hw/vfio/migration.c           | 11 ++++++-----
>>>   3 files changed, 8 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 7e530c7869dc..00b9e933449e 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>> VFIOContainerBase *bcontainer,
>>>                   VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>>   int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>>>                             uint64_t size, ram_addr_t ram_addr, Error **errp);
>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>     /* Returns 0 on success, or a negative errno. */
>>>   bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 7dd5d43ce06a..a998e8578552 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -111,7 +111,7 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice
>>> *vbasedev)
>>>       iommufd_backend_disconnect(vbasedev->iommufd);
>>>   }
>>>   -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>   {
>>>       return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>   }
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>> Error **errp)
>>>           return !vfio_block_migration(vbasedev, err, errp);
>>>       }
>>>   -    if (!vbasedev->dirty_pages_supported) {
>>> +    if (!vbasedev->dirty_pages_supported &&
>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>
>>
>> Some platforms do not have IOMMUFD support and this call will need
>> some kind of abstract wrapper to reflect dirty tracking support in
>> the IOMMU backend.
>>
> 
> This was actually on purpose because only IOMMUFD presents a view of hardware
> whereas type1 supporting dirty page tracking is not used as means to 'migration
> is supported'.
> 
> The hwpt is nil in type1 and the helper checks that, so it should return false.
> 

Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
consider. Maybe this would be a elegant way to address it? Looks to pass my
build with CONFIG_IOMMUFD=n

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 61dd48e79b71..422ad4a5bdd1 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const VFIOContainerBase
*bcontainer,
                 VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
 int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
                           uint64_t size, ram_addr_t ram_addr, Error **errp);
+#ifdef CONFIG_IOMMUFD
 bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
+#else
+static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
+{
+    return false;
+}
+#endif

 /* Returns 0 on success, or a negative errno. */
 bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
Cédric Le Goater July 22, 2024, 2:53 p.m. UTC | #5
On 7/19/24 19:26, Joao Martins wrote:
> On 19/07/2024 15:24, Joao Martins wrote:
>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>> On 7/19/24 14:05, Joao Martins wrote:
>>>> By default VFIO migration is set to auto, which will support live
>>>> migration if the migration capability is set *and* also dirty page
>>>> tracking is supported.
>>>>
>>>> For testing purposes one can force enable without dirty page tracking
>>>> via enable-migration=on, but that option is generally left for testing
>>>> purposes.
>>>>
>>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>> migration and thus enabling migration by default for those too.
>>>>
>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>> well.
>>>>
>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>> ---
>>>>    include/hw/vfio/vfio-common.h |  1 +
>>>>    hw/vfio/iommufd.c             |  2 +-
>>>>    hw/vfio/migration.c           | 11 ++++++-----
>>>>    3 files changed, 8 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>> index 7e530c7869dc..00b9e933449e 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>> VFIOContainerBase *bcontainer,
>>>>                    VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>>>    int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>>>>                              uint64_t size, ram_addr_t ram_addr, Error **errp);
>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>      /* Returns 0 on success, or a negative errno. */
>>>>    bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>> --- a/hw/vfio/iommufd.c
>>>> +++ b/hw/vfio/iommufd.c
>>>> @@ -111,7 +111,7 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>> *vbasedev)
>>>>        iommufd_backend_disconnect(vbasedev->iommufd);
>>>>    }
>>>>    -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>    {
>>>>        return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>    }
>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>> --- a/hw/vfio/migration.c
>>>> +++ b/hw/vfio/migration.c
>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>>> Error **errp)
>>>>            return !vfio_block_migration(vbasedev, err, errp);
>>>>        }
>>>>    -    if (!vbasedev->dirty_pages_supported) {
>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>
>>>
>>> Some platforms do not have IOMMUFD support and this call will need
>>> some kind of abstract wrapper to reflect dirty tracking support in
>>> the IOMMU backend.
>>>
>>
>> This was actually on purpose because only IOMMUFD presents a view of hardware
>> whereas type1 supporting dirty page tracking is not used as means to 'migration
>> is supported'.
>>
>> The hwpt is nil in type1 and the helper checks that, so it should return false.
>>
> 
> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
> consider. Maybe this would be a elegant way to address it? Looks to pass my
> build with CONFIG_IOMMUFD=n
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 61dd48e79b71..422ad4a5bdd1 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const VFIOContainerBase
> *bcontainer,
>                   VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>   int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>                             uint64_t size, ram_addr_t ram_addr, Error **errp);
> +#ifdef CONFIG_IOMMUFD
>   bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
> +#else
> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
> +{
> +    return false;
> +}
> +#endif
> 
>   /* Returns 0 on success, or a negative errno. */
>   bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
> 

hmm, no. You will need to introduce a new Host IOMMU device capability,
something like :

    HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,

Then, introduce an helper routine to check the capability  :

    return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
  
and replace the iommufd_hwpt_dirty_tracking call with it.

Yeah I know, it's cumbersome but it's cleaner !

That's not a major problem in the series. I can address it at the end
to avoid a resend. First, let's get a R-b on all other patches.

Thanks,

C.
Joao Martins July 22, 2024, 3:01 p.m. UTC | #6
On 22/07/2024 15:53, Cédric Le Goater wrote:
> On 7/19/24 19:26, Joao Martins wrote:
>> On 19/07/2024 15:24, Joao Martins wrote:
>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>> By default VFIO migration is set to auto, which will support live
>>>>> migration if the migration capability is set *and* also dirty page
>>>>> tracking is supported.
>>>>>
>>>>> For testing purposes one can force enable without dirty page tracking
>>>>> via enable-migration=on, but that option is generally left for testing
>>>>> purposes.
>>>>>
>>>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>> migration and thus enabling migration by default for those too.
>>>>>
>>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>>> well.
>>>>>
>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>> ---
>>>>>    include/hw/vfio/vfio-common.h |  1 +
>>>>>    hw/vfio/iommufd.c             |  2 +-
>>>>>    hw/vfio/migration.c           | 11 ++++++-----
>>>>>    3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>> VFIOContainerBase *bcontainer,
>>>>>                    VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>>>>    int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t
>>>>> iova,
>>>>>                              uint64_t size, ram_addr_t ram_addr, Error
>>>>> **errp);
>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>      /* Returns 0 on success, or a negative errno. */
>>>>>    bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>> --- a/hw/vfio/iommufd.c
>>>>> +++ b/hw/vfio/iommufd.c
>>>>> @@ -111,7 +111,7 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>> *vbasedev)
>>>>>        iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>    }
>>>>>    -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>    {
>>>>>        return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>    }
>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>> --- a/hw/vfio/migration.c
>>>>> +++ b/hw/vfio/migration.c
>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>>>> Error **errp)
>>>>>            return !vfio_block_migration(vbasedev, err, errp);
>>>>>        }
>>>>>    -    if (!vbasedev->dirty_pages_supported) {
>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>
>>>>
>>>> Some platforms do not have IOMMUFD support and this call will need
>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>> the IOMMU backend.
>>>>
>>>
>>> This was actually on purpose because only IOMMUFD presents a view of hardware
>>> whereas type1 supporting dirty page tracking is not used as means to 'migration
>>> is supported'.
>>>
>>> The hwpt is nil in type1 and the helper checks that, so it should return false.
>>>
>>
>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
>> consider. Maybe this would be a elegant way to address it? Looks to pass my
>> build with CONFIG_IOMMUFD=n
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 61dd48e79b71..422ad4a5bdd1 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const VFIOContainerBase
>> *bcontainer,
>>                   VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>   int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>>                             uint64_t size, ram_addr_t ram_addr, Error **errp);
>> +#ifdef CONFIG_IOMMUFD
>>   bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>> +#else
>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>> +{
>> +    return false;
>> +}
>> +#endif
>>
>>   /* Returns 0 on success, or a negative errno. */
>>   bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>
> 
> hmm, no. You will need to introduce a new Host IOMMU device capability,
> something like :
> 
>    HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
> 
> Then, introduce an helper routine to check the capability  :
> 
>    return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>  
> and replace the iommufd_hwpt_dirty_tracking call with it.
> 
> Yeah I know, it's cumbersome but it's cleaner !
> 

Funny you mention it, because that's what I did in v3:

https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/

But it was suggested to drop (I am assuming to avoid complexity)

> That's not a major problem in the series. I can address it at the end
> to avoid a resend. First, let's get a R-b on all other patches.
> 
> Thanks,
> 
> C.
> 
>
Cédric Le Goater July 22, 2024, 3:13 p.m. UTC | #7
On 7/22/24 17:01, Joao Martins wrote:
> On 22/07/2024 15:53, Cédric Le Goater wrote:
>> On 7/19/24 19:26, Joao Martins wrote:
>>> On 19/07/2024 15:24, Joao Martins wrote:
>>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>>> By default VFIO migration is set to auto, which will support live
>>>>>> migration if the migration capability is set *and* also dirty page
>>>>>> tracking is supported.
>>>>>>
>>>>>> For testing purposes one can force enable without dirty page tracking
>>>>>> via enable-migration=on, but that option is generally left for testing
>>>>>> purposes.
>>>>>>
>>>>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>>> migration and thus enabling migration by default for those too.
>>>>>>
>>>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>>>> well.
>>>>>>
>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>> ---
>>>>>>     include/hw/vfio/vfio-common.h |  1 +
>>>>>>     hw/vfio/iommufd.c             |  2 +-
>>>>>>     hw/vfio/migration.c           | 11 ++++++-----
>>>>>>     3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>>
>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>> VFIOContainerBase *bcontainer,
>>>>>>                     VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>>>>>     int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t
>>>>>> iova,
>>>>>>                               uint64_t size, ram_addr_t ram_addr, Error
>>>>>> **errp);
>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>       /* Returns 0 on success, or a negative errno. */
>>>>>>     bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>>> --- a/hw/vfio/iommufd.c
>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>> @@ -111,7 +111,7 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>>> *vbasedev)
>>>>>>         iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>>     }
>>>>>>     -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>     {
>>>>>>         return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>>     }
>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>>> --- a/hw/vfio/migration.c
>>>>>> +++ b/hw/vfio/migration.c
>>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>>>>> Error **errp)
>>>>>>             return !vfio_block_migration(vbasedev, err, errp);
>>>>>>         }
>>>>>>     -    if (!vbasedev->dirty_pages_supported) {
>>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>>
>>>>>
>>>>> Some platforms do not have IOMMUFD support and this call will need
>>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>>> the IOMMU backend.
>>>>>
>>>>
>>>> This was actually on purpose because only IOMMUFD presents a view of hardware
>>>> whereas type1 supporting dirty page tracking is not used as means to 'migration
>>>> is supported'.
>>>>
>>>> The hwpt is nil in type1 and the helper checks that, so it should return false.
>>>>
>>>
>>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
>>> consider. Maybe this would be a elegant way to address it? Looks to pass my
>>> build with CONFIG_IOMMUFD=n
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 61dd48e79b71..422ad4a5bdd1 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const VFIOContainerBase
>>> *bcontainer,
>>>                    VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>>    int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
>>>                              uint64_t size, ram_addr_t ram_addr, Error **errp);
>>> +#ifdef CONFIG_IOMMUFD
>>>    bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>> +#else
>>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>> +{
>>> +    return false;
>>> +}
>>> +#endif
>>>
>>>    /* Returns 0 on success, or a negative errno. */
>>>    bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>
>>
>> hmm, no. You will need to introduce a new Host IOMMU device capability,
>> something like :
>>
>>     HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
>>
>> Then, introduce an helper routine to check the capability  :
>>
>>     return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>>   
>> and replace the iommufd_hwpt_dirty_tracking call with it.
>>
>> Yeah I know, it's cumbersome but it's cleaner !
>>
> 
> Funny you mention it, because that's what I did in v3:
> 
> https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/
> 
> But it was suggested to drop (I am assuming to avoid complexity)

my bad if I did :/

we will need an helper such as :

   bool vfio_device_dirty_tracking(VFIODevice *vbasedev)
   {
       HostIOMMUDevice *hiod = vbasedev->hiod ;
       HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);

       return hiodc->get_cap &&
           hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING, NULL) == 1;
   }

and something like,

   static int hiod_iommufd_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
                                        Error **errp)
   {
       switch (cap) {
       case HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING:
           return !!(hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING);
       default:
           error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
           return -EINVAL;
       }
   }

Feel free to propose your own implementation,

Thanks,

C.



> 
>> That's not a major problem in the series. I can address it at the end
>> to avoid a resend. First, let's get a R-b on all other patches.
>>
>> Thanks,
>>
>> C.
>>
>>
>
Joao Martins July 22, 2024, 3:42 p.m. UTC | #8
On 22/07/2024 16:13, Cédric Le Goater wrote:
> On 7/22/24 17:01, Joao Martins wrote:
>> On 22/07/2024 15:53, Cédric Le Goater wrote:
>>> On 7/19/24 19:26, Joao Martins wrote:
>>>> On 19/07/2024 15:24, Joao Martins wrote:
>>>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>>>> By default VFIO migration is set to auto, which will support live
>>>>>>> migration if the migration capability is set *and* also dirty page
>>>>>>> tracking is supported.
>>>>>>>
>>>>>>> For testing purposes one can force enable without dirty page tracking
>>>>>>> via enable-migration=on, but that option is generally left for testing
>>>>>>> purposes.
>>>>>>>
>>>>>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>>>> migration and thus enabling migration by default for those too.
>>>>>>>
>>>>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>>>>> well.
>>>>>>>
>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>> ---
>>>>>>>     include/hw/vfio/vfio-common.h |  1 +
>>>>>>>     hw/vfio/iommufd.c             |  2 +-
>>>>>>>     hw/vfio/migration.c           | 11 ++++++-----
>>>>>>>     3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>>>
>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>> VFIOContainerBase *bcontainer,
>>>>>>>                     VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>> **errp);
>>>>>>>     int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t
>>>>>>> iova,
>>>>>>>                               uint64_t size, ram_addr_t ram_addr, Error
>>>>>>> **errp);
>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>       /* Returns 0 on success, or a negative errno. */
>>>>>>>     bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>> @@ -111,7 +111,7 @@ static void
>>>>>>> iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>>>> *vbasedev)
>>>>>>>         iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>>>     }
>>>>>>>     -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>     {
>>>>>>>         return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>>>     }
>>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>>>> --- a/hw/vfio/migration.c
>>>>>>> +++ b/hw/vfio/migration.c
>>>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>>>>>> Error **errp)
>>>>>>>             return !vfio_block_migration(vbasedev, err, errp);
>>>>>>>         }
>>>>>>>     -    if (!vbasedev->dirty_pages_supported) {
>>>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>>>
>>>>>>
>>>>>> Some platforms do not have IOMMUFD support and this call will need
>>>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>>>> the IOMMU backend.
>>>>>>
>>>>>
>>>>> This was actually on purpose because only IOMMUFD presents a view of hardware
>>>>> whereas type1 supporting dirty page tracking is not used as means to
>>>>> 'migration
>>>>> is supported'.
>>>>>
>>>>> The hwpt is nil in type1 and the helper checks that, so it should return
>>>>> false.
>>>>>
>>>>
>>>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
>>>> consider. Maybe this would be a elegant way to address it? Looks to pass my
>>>> build with CONFIG_IOMMUFD=n
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>> index 61dd48e79b71..422ad4a5bdd1 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const
>>>> VFIOContainerBase
>>>> *bcontainer,
>>>>                    VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>>>    int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t
>>>> iova,
>>>>                              uint64_t size, ram_addr_t ram_addr, Error **errp);
>>>> +#ifdef CONFIG_IOMMUFD
>>>>    bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>> +#else
>>>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>> +{
>>>> +    return false;
>>>> +}
>>>> +#endif
>>>>
>>>>    /* Returns 0 on success, or a negative errno. */
>>>>    bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>
>>>
>>> hmm, no. You will need to introduce a new Host IOMMU device capability,
>>> something like :
>>>
>>>     HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
>>>
>>> Then, introduce an helper routine to check the capability  :
>>>
>>>     return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>>>   and replace the iommufd_hwpt_dirty_tracking call with it.
>>>
>>> Yeah I know, it's cumbersome but it's cleaner !
>>>
>>
>> Funny you mention it, because that's what I did in v3:
>>
>> https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/
>>
>> But it was suggested to drop (I am assuming to avoid complexity)
> 
> my bad if I did :/
> 

No worries it is all part of review -- I think Zhenzhong proposed with good
intentions, and I probably didn't think too hard about the consequences on
layering with the HIOD.

> we will need an helper such as :
> 
>   bool vfio_device_dirty_tracking(VFIODevice *vbasedev)
>   {
>       HostIOMMUDevice *hiod = vbasedev->hiod ;
>       HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
> 
>       return hiodc->get_cap &&
>           hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING, NULL) == 1;
>   }
> 
> and something like,
> 
>   static int hiod_iommufd_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
>                                        Error **errp)
>   {
>       switch (cap) {
>       case HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING:
>           return !!(hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING);
>       default:
>           error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
>           return -EINVAL;
>       }
>   }
> 
> Feel free to propose your own implementation,
> 

Actually it's close to what I had in v3 link, except the new helper (the name
vfio_device_dirty_tracking is a bit misleading I would call it
vfio_device_iommu_dirty_tracking)

I can follow-up with this improvement in case this gets merged as is, or include
it in the next version if you prefer to adjourn this series into 9.2 (given the
lack of time to get everything right).

	Joao
Cédric Le Goater July 22, 2024, 3:58 p.m. UTC | #9
On 7/22/24 17:42, Joao Martins wrote:
> On 22/07/2024 16:13, Cédric Le Goater wrote:
>> On 7/22/24 17:01, Joao Martins wrote:
>>> On 22/07/2024 15:53, Cédric Le Goater wrote:
>>>> On 7/19/24 19:26, Joao Martins wrote:
>>>>> On 19/07/2024 15:24, Joao Martins wrote:
>>>>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>>>>> By default VFIO migration is set to auto, which will support live
>>>>>>>> migration if the migration capability is set *and* also dirty page
>>>>>>>> tracking is supported.
>>>>>>>>
>>>>>>>> For testing purposes one can force enable without dirty page tracking
>>>>>>>> via enable-migration=on, but that option is generally left for testing
>>>>>>>> purposes.
>>>>>>>>
>>>>>>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>>>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>>>>> migration and thus enabling migration by default for those too.
>>>>>>>>
>>>>>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>>>>>> well.
>>>>>>>>
>>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>>> ---
>>>>>>>>      include/hw/vfio/vfio-common.h |  1 +
>>>>>>>>      hw/vfio/iommufd.c             |  2 +-
>>>>>>>>      hw/vfio/migration.c           | 11 ++++++-----
>>>>>>>>      3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>> VFIOContainerBase *bcontainer,
>>>>>>>>                      VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>> **errp);
>>>>>>>>      int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t
>>>>>>>> iova,
>>>>>>>>                                uint64_t size, ram_addr_t ram_addr, Error
>>>>>>>> **errp);
>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>>        /* Returns 0 on success, or a negative errno. */
>>>>>>>>      bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>>> @@ -111,7 +111,7 @@ static void
>>>>>>>> iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>>>>> *vbasedev)
>>>>>>>>          iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>>>>      }
>>>>>>>>      -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>      {
>>>>>>>>          return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>>>>      }
>>>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>>>>> --- a/hw/vfio/migration.c
>>>>>>>> +++ b/hw/vfio/migration.c
>>>>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>>>>>>> Error **errp)
>>>>>>>>              return !vfio_block_migration(vbasedev, err, errp);
>>>>>>>>          }
>>>>>>>>      -    if (!vbasedev->dirty_pages_supported) {
>>>>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>>>>
>>>>>>>
>>>>>>> Some platforms do not have IOMMUFD support and this call will need
>>>>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>>>>> the IOMMU backend.
>>>>>>>
>>>>>>
>>>>>> This was actually on purpose because only IOMMUFD presents a view of hardware
>>>>>> whereas type1 supporting dirty page tracking is not used as means to
>>>>>> 'migration
>>>>>> is supported'.
>>>>>>
>>>>>> The hwpt is nil in type1 and the helper checks that, so it should return
>>>>>> false.
>>>>>>
>>>>>
>>>>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
>>>>> consider. Maybe this would be a elegant way to address it? Looks to pass my
>>>>> build with CONFIG_IOMMUFD=n
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>> index 61dd48e79b71..422ad4a5bdd1 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const
>>>>> VFIOContainerBase
>>>>> *bcontainer,
>>>>>                     VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
>>>>>     int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t
>>>>> iova,
>>>>>                               uint64_t size, ram_addr_t ram_addr, Error **errp);
>>>>> +#ifdef CONFIG_IOMMUFD
>>>>>     bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>> +#else
>>>>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>> +{
>>>>> +    return false;
>>>>> +}
>>>>> +#endif
>>>>>
>>>>>     /* Returns 0 on success, or a negative errno. */
>>>>>     bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>
>>>>
>>>> hmm, no. You will need to introduce a new Host IOMMU device capability,
>>>> something like :
>>>>
>>>>      HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
>>>>
>>>> Then, introduce an helper routine to check the capability  :
>>>>
>>>>      return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>>>>    and replace the iommufd_hwpt_dirty_tracking call with it.
>>>>
>>>> Yeah I know, it's cumbersome but it's cleaner !
>>>>
>>>
>>> Funny you mention it, because that's what I did in v3:
>>>
>>> https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/
>>>
>>> But it was suggested to drop (I am assuming to avoid complexity)
>>
>> my bad if I did :/
>>
> 
> No worries it is all part of review -- I think Zhenzhong proposed with good
> intentions, and I probably didn't think too hard about the consequences on
> layering with the HIOD.
> 
>> we will need an helper such as :
>>
>>    bool vfio_device_dirty_tracking(VFIODevice *vbasedev)
>>    {
>>        HostIOMMUDevice *hiod = vbasedev->hiod ;
>>        HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>
>>        return hiodc->get_cap &&
>>            hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING, NULL) == 1;
>>    }
>>
>> and something like,
>>
>>    static int hiod_iommufd_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
>>                                         Error **errp)
>>    {
>>        switch (cap) {
>>        case HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING:
>>            return !!(hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING);
>>        default:
>>            error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
>>            return -EINVAL;
>>        }
>>    }
>>
>> Feel free to propose your own implementation,
>>
> 
> Actually it's close to what I had in v3 link, except the new helper (the name
> vfio_device_dirty_tracking is a bit misleading I would call it
> vfio_device_iommu_dirty_tracking)

Let's call it vfio_device_iommu_dirty_tracking.

> I can follow-up with this improvement in case this gets merged as is, 

I can't merge as is since it break compiles (I am excluding the v5.1 patch).
Which means I would prefer a v6 please.

> or include
> it in the next version if you prefer to adjourn this series into 9.2 (given the
> lack of time to get everything right).

There aren't many open questions left.

* PATCH 5 lacks a R-b. I would feel more confortable if ZhenZhong or
   Eric acked the changes.
* PATCH 9 is slightly hacky with the use of vfio_device_get_aw_bits().
   I think it's minor. I would also feel more confortable if ZhenZhong
   acked the changes.
* PATCH 12 needs the fix we have been talking about.
* PATCH 13 is for dev/debug.


What's important is to avoid introducing regressions in the current behavior,
that is when not using IOMMUFD. It looks fine on that aspect AFAICT.

Thanks,

C.
Joao Martins July 22, 2024, 4:29 p.m. UTC | #10
On 22/07/2024 16:58, Cédric Le Goater wrote:
> On 7/22/24 17:42, Joao Martins wrote:
>> On 22/07/2024 16:13, Cédric Le Goater wrote:
>>> On 7/22/24 17:01, Joao Martins wrote:
>>>> On 22/07/2024 15:53, Cédric Le Goater wrote:
>>>>> On 7/19/24 19:26, Joao Martins wrote:
>>>>>> On 19/07/2024 15:24, Joao Martins wrote:
>>>>>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>>>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>>>>>> By default VFIO migration is set to auto, which will support live
>>>>>>>>> migration if the migration capability is set *and* also dirty page
>>>>>>>>> tracking is supported.
>>>>>>>>>
>>>>>>>>> For testing purposes one can force enable without dirty page tracking
>>>>>>>>> via enable-migration=on, but that option is generally left for testing
>>>>>>>>> purposes.
>>>>>>>>>
>>>>>>>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>>>>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>>>>>> migration and thus enabling migration by default for those too.
>>>>>>>>>
>>>>>>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>>>>>>> well.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>>>> ---
>>>>>>>>>      include/hw/vfio/vfio-common.h |  1 +
>>>>>>>>>      hw/vfio/iommufd.c             |  2 +-
>>>>>>>>>      hw/vfio/migration.c           | 11 ++++++-----
>>>>>>>>>      3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>>> VFIOContainerBase *bcontainer,
>>>>>>>>>                      VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>>> **errp);
>>>>>>>>>      int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>>>>>>> uint64_t
>>>>>>>>> iova,
>>>>>>>>>                                uint64_t size, ram_addr_t ram_addr, Error
>>>>>>>>> **errp);
>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>>>        /* Returns 0 on success, or a negative errno. */
>>>>>>>>>      bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>>>> @@ -111,7 +111,7 @@ static void
>>>>>>>>> iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>>>>>> *vbasedev)
>>>>>>>>>          iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>>>>>      }
>>>>>>>>>      -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>      {
>>>>>>>>>          return hwpt && hwpt->hwpt_flags &
>>>>>>>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>>>>>      }
>>>>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>>>>>> --- a/hw/vfio/migration.c
>>>>>>>>> +++ b/hw/vfio/migration.c
>>>>>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>>>>>>>> Error **errp)
>>>>>>>>>              return !vfio_block_migration(vbasedev, err, errp);
>>>>>>>>>          }
>>>>>>>>>      -    if (!vbasedev->dirty_pages_supported) {
>>>>>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>>>>>
>>>>>>>>
>>>>>>>> Some platforms do not have IOMMUFD support and this call will need
>>>>>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>>>>>> the IOMMU backend.
>>>>>>>>
>>>>>>>
>>>>>>> This was actually on purpose because only IOMMUFD presents a view of
>>>>>>> hardware
>>>>>>> whereas type1 supporting dirty page tracking is not used as means to
>>>>>>> 'migration
>>>>>>> is supported'.
>>>>>>>
>>>>>>> The hwpt is nil in type1 and the helper checks that, so it should return
>>>>>>> false.
>>>>>>>
>>>>>>
>>>>>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
>>>>>> consider. Maybe this would be a elegant way to address it? Looks to pass my
>>>>>> build with CONFIG_IOMMUFD=n
>>>>>>
>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>> index 61dd48e79b71..422ad4a5bdd1 100644
>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>> VFIOContainerBase
>>>>>> *bcontainer,
>>>>>>                     VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>> **errp);
>>>>>>     int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t
>>>>>> iova,
>>>>>>                               uint64_t size, ram_addr_t ram_addr, Error
>>>>>> **errp);
>>>>>> +#ifdef CONFIG_IOMMUFD
>>>>>>     bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>> +#else
>>>>>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>> +{
>>>>>> +    return false;
>>>>>> +}
>>>>>> +#endif
>>>>>>
>>>>>>     /* Returns 0 on success, or a negative errno. */
>>>>>>     bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>
>>>>>
>>>>> hmm, no. You will need to introduce a new Host IOMMU device capability,
>>>>> something like :
>>>>>
>>>>>      HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
>>>>>
>>>>> Then, introduce an helper routine to check the capability  :
>>>>>
>>>>>      return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>>>>>    and replace the iommufd_hwpt_dirty_tracking call with it.
>>>>>
>>>>> Yeah I know, it's cumbersome but it's cleaner !
>>>>>
>>>>
>>>> Funny you mention it, because that's what I did in v3:
>>>>
>>>> https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/
>>>>
>>>> But it was suggested to drop (I am assuming to avoid complexity)
>>>
>>> my bad if I did :/
>>>
>>
>> No worries it is all part of review -- I think Zhenzhong proposed with good
>> intentions, and I probably didn't think too hard about the consequences on
>> layering with the HIOD.
>>
>>> we will need an helper such as :
>>>
>>>    bool vfio_device_dirty_tracking(VFIODevice *vbasedev)
>>>    {
>>>        HostIOMMUDevice *hiod = vbasedev->hiod ;
>>>        HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>>
>>>        return hiodc->get_cap &&
>>>            hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING, NULL)
>>> == 1;
>>>    }
>>>
>>> and something like,
>>>
>>>    static int hiod_iommufd_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
>>>                                         Error **errp)
>>>    {
>>>        switch (cap) {
>>>        case HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING:
>>>            return !!(hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING);
>>>        default:
>>>            error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
>>>            return -EINVAL;
>>>        }
>>>    }
>>>
>>> Feel free to propose your own implementation,
>>>
>>
>> Actually it's close to what I had in v3 link, except the new helper (the name
>> vfio_device_dirty_tracking is a bit misleading I would call it
>> vfio_device_iommu_dirty_tracking)
> 
> Let's call it vfio_device_iommu_dirty_tracking.
> 

I thinking about this and I am not that sure it makes sense. That is the
.get_cap() stuff.

Using the hw_caps is only useful when choosing hwpt_flags, then the only thing
that matters for patch 12 is after the device is attached ... hence we gotta
look at hwpt_flags. That ultimately is what tells if dirty tracking can be done
in the device pagetable.

I can expand hiod_iommufd_vfio_get_cap() to return the hwpt flags, but it feels
just as hacky given that I am testing its enablement of the hardware pagetable
(HWPT), and not asking a HIOD capability.

e.g. hiod_iommufd_vfio_get_cap would make more sense in patch 9 for the
attach_device() flow[*], but not for vfio_migration_realize() flow.

[*] though feels unneeded as we only have a local callsite, not external user so
far.

Which would technically make v5.1 patch a more correct right check, perhaps with
better layering/naming.

>> I can follow-up with this improvement in case this gets merged as is, 
> 
> I can't merge as is since it break compiles (I am excluding the v5.1 patch).
> Which means I would prefer a v6 please.
> 

Ah OK -- I thought this discussion assumed v5.1 to be in which does fix the
compilation issue and all that remained were acks.

>> or include
>> it in the next version if you prefer to adjourn this series into 9.2 (given the
>> lack of time to get everything right).
> 
> There aren't many open questions left.
> 
> * PATCH 5 lacks a R-b. I would feel more confortable if ZhenZhong or
>   Eric acked the changes.
> * PATCH 9 is slightly hacky with the use of vfio_device_get_aw_bits().
>   I think it's minor. I would also feel more confortable if ZhenZhong
>   acked the changes.

I guess you meant patch 6 and not 9.

> * PATCH 12 needs the fix we have been talking about.
> * PATCH 13 is for dev/debug.
>
> 
> What's important is to avoid introducing regressions in the current behavior,
> that is when not using IOMMUFD. It looks fine on that aspect AFAICT.

OK
Cédric Le Goater July 22, 2024, 5:04 p.m. UTC | #11
On 7/22/24 18:29, Joao Martins wrote:
> On 22/07/2024 16:58, Cédric Le Goater wrote:
>> On 7/22/24 17:42, Joao Martins wrote:
>>> On 22/07/2024 16:13, Cédric Le Goater wrote:
>>>> On 7/22/24 17:01, Joao Martins wrote:
>>>>> On 22/07/2024 15:53, Cédric Le Goater wrote:
>>>>>> On 7/19/24 19:26, Joao Martins wrote:
>>>>>>> On 19/07/2024 15:24, Joao Martins wrote:
>>>>>>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>>>>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>>>>>>> By default VFIO migration is set to auto, which will support live
>>>>>>>>>> migration if the migration capability is set *and* also dirty page
>>>>>>>>>> tracking is supported.
>>>>>>>>>>
>>>>>>>>>> For testing purposes one can force enable without dirty page tracking
>>>>>>>>>> via enable-migration=on, but that option is generally left for testing
>>>>>>>>>> purposes.
>>>>>>>>>>
>>>>>>>>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>>>>>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>>>>>>> migration and thus enabling migration by default for those too.
>>>>>>>>>>
>>>>>>>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>>>>>>>> well.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>>>>> ---
>>>>>>>>>>       include/hw/vfio/vfio-common.h |  1 +
>>>>>>>>>>       hw/vfio/iommufd.c             |  2 +-
>>>>>>>>>>       hw/vfio/migration.c           | 11 ++++++-----
>>>>>>>>>>       3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>>>> VFIOContainerBase *bcontainer,
>>>>>>>>>>                       VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>>>> **errp);
>>>>>>>>>>       int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>>>>>>>> uint64_t
>>>>>>>>>> iova,
>>>>>>>>>>                                 uint64_t size, ram_addr_t ram_addr, Error
>>>>>>>>>> **errp);
>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>>>>         /* Returns 0 on success, or a negative errno. */
>>>>>>>>>>       bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>>>>> @@ -111,7 +111,7 @@ static void
>>>>>>>>>> iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>>>>>>> *vbasedev)
>>>>>>>>>>           iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>>>>>>       }
>>>>>>>>>>       -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>>       {
>>>>>>>>>>           return hwpt && hwpt->hwpt_flags &
>>>>>>>>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>>>>>>       }
>>>>>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>>>>>>> --- a/hw/vfio/migration.c
>>>>>>>>>> +++ b/hw/vfio/migration.c
>>>>>>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>>>>>>>>> Error **errp)
>>>>>>>>>>               return !vfio_block_migration(vbasedev, err, errp);
>>>>>>>>>>           }
>>>>>>>>>>       -    if (!vbasedev->dirty_pages_supported) {
>>>>>>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>>>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Some platforms do not have IOMMUFD support and this call will need
>>>>>>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>>>>>>> the IOMMU backend.
>>>>>>>>>
>>>>>>>>
>>>>>>>> This was actually on purpose because only IOMMUFD presents a view of
>>>>>>>> hardware
>>>>>>>> whereas type1 supporting dirty page tracking is not used as means to
>>>>>>>> 'migration
>>>>>>>> is supported'.
>>>>>>>>
>>>>>>>> The hwpt is nil in type1 and the helper checks that, so it should return
>>>>>>>> false.
>>>>>>>>
>>>>>>>
>>>>>>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
>>>>>>> consider. Maybe this would be a elegant way to address it? Looks to pass my
>>>>>>> build with CONFIG_IOMMUFD=n
>>>>>>>
>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>> index 61dd48e79b71..422ad4a5bdd1 100644
>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>> VFIOContainerBase
>>>>>>> *bcontainer,
>>>>>>>                      VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>> **errp);
>>>>>>>      int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t
>>>>>>> iova,
>>>>>>>                                uint64_t size, ram_addr_t ram_addr, Error
>>>>>>> **errp);
>>>>>>> +#ifdef CONFIG_IOMMUFD
>>>>>>>      bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>> +#else
>>>>>>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>> +{
>>>>>>> +    return false;
>>>>>>> +}
>>>>>>> +#endif
>>>>>>>
>>>>>>>      /* Returns 0 on success, or a negative errno. */
>>>>>>>      bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>
>>>>>>
>>>>>> hmm, no. You will need to introduce a new Host IOMMU device capability,
>>>>>> something like :
>>>>>>
>>>>>>       HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
>>>>>>
>>>>>> Then, introduce an helper routine to check the capability  :
>>>>>>
>>>>>>       return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>>>>>>     and replace the iommufd_hwpt_dirty_tracking call with it.
>>>>>>
>>>>>> Yeah I know, it's cumbersome but it's cleaner !
>>>>>>
>>>>>
>>>>> Funny you mention it, because that's what I did in v3:
>>>>>
>>>>> https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/
>>>>>
>>>>> But it was suggested to drop (I am assuming to avoid complexity)
>>>>
>>>> my bad if I did :/
>>>>
>>>
>>> No worries it is all part of review -- I think Zhenzhong proposed with good
>>> intentions, and I probably didn't think too hard about the consequences on
>>> layering with the HIOD.
>>>
>>>> we will need an helper such as :
>>>>
>>>>     bool vfio_device_dirty_tracking(VFIODevice *vbasedev)
>>>>     {
>>>>         HostIOMMUDevice *hiod = vbasedev->hiod ;
>>>>         HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>>>
>>>>         return hiodc->get_cap &&
>>>>             hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING, NULL)
>>>> == 1;
>>>>     }
>>>>
>>>> and something like,
>>>>
>>>>     static int hiod_iommufd_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
>>>>                                          Error **errp)
>>>>     {
>>>>         switch (cap) {
>>>>         case HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING:
>>>>             return !!(hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING);
>>>>         default:
>>>>             error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
>>>>             return -EINVAL;
>>>>         }
>>>>     }
>>>>
>>>> Feel free to propose your own implementation,
>>>>
>>>
>>> Actually it's close to what I had in v3 link, except the new helper (the name
>>> vfio_device_dirty_tracking is a bit misleading I would call it
>>> vfio_device_iommu_dirty_tracking)
>>
>> Let's call it vfio_device_iommu_dirty_tracking.
>>
> 
> I thinking about this and I am not that sure it makes sense. That is the
> .get_cap() stuff.
> 
> Using the hw_caps is only useful when choosing hwpt_flags, then the only thing
> that matters for patch 12 is after the device is attached ... hence we gotta
> look at hwpt_flags. That ultimately is what tells if dirty tracking can be done
> in the device pagetable.
> 
> I can expand hiod_iommufd_vfio_get_cap() to return the hwpt flags, but it feels
> just as hacky given that I am testing its enablement of the hardware pagetable
> (HWPT), and not asking a HIOD capability.

arf. yes.

> e.g. hiod_iommufd_vfio_get_cap would make more sense in patch 9 for the
> attach_device() flow[*], but not for vfio_migration_realize() flow.
> 
> [*] though feels unneeded as we only have a local callsite, not external user so
> far.
> 
> Which would technically make v5.1 patch a more correct right check, perhaps with
> better layering/naming.

The quick fix (plan B if needed) would be :

@@ -1038,8 +1038,11 @@ bool vfio_migration_realize(VFIODevice *
      }
  
      if ((!vbasedev->dirty_pages_supported ||
-         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
-        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
+         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF)
+#ifdef CONFIG_IOMMUFD
+        && !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)
+#endif
+        ) {
          if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
              error_setg(&err,
                         "%s: VFIO device doesn't support device and "

I would prefer to avoid the common component to reference IOMMUFD
directly. The only exception today is the use of the vbasedev->iommufd
pointer which is treated as opaque.

I guess a simple approach would be to store the result of
iommufd_hwpt_dirty_tracking(hwpt) under a 'dirty_tracking' attribute
of vbasedev and return the value in vfio_device_iommu_dirty_tracking() ?

if not, let's merge v5 (with more acks) and the fix of plan B.


>>> I can follow-up with this improvement in case this gets merged as is,
>>
>> I can't merge as is since it break compiles (I am excluding the v5.1 patch).
>> Which means I would prefer a v6 please.
>>
> 
> Ah OK -- I thought this discussion assumed v5.1 to be in which does fix the
> compilation issue and all that remained were acks.

v5.1 proposes a CONFIG_IOMMUFD in a header file which is error prone.
  
>>> or include
>>> it in the next version if you prefer to adjourn this series into 9.2 (given the
>>> lack of time to get everything right).
>>
>> There aren't many open questions left.
>>
>> * PATCH 5 lacks a R-b. I would feel more confortable if ZhenZhong or
>>    Eric acked the changes.
>> * PATCH 9 is slightly hacky with the use of vfio_device_get_aw_bits().
>>    I think it's minor. I would also feel more confortable if ZhenZhong
>>    acked the changes.
> 
> I guess you meant patch 6 and not 9.

yes.

Thanks,

C.



> 
>> * PATCH 12 needs the fix we have been talking about.
>> * PATCH 13 is for dev/debug.
>>
>>
>> What's important is to avoid introducing regressions in the current behavior,
>> that is when not using IOMMUFD. It looks fine on that aspect AFAICT.
> 
> OK
>
Cédric Le Goater July 22, 2024, 5:15 p.m. UTC | #12
On 7/22/24 19:04, Cédric Le Goater wrote:
> On 7/22/24 18:29, Joao Martins wrote:
>> On 22/07/2024 16:58, Cédric Le Goater wrote:
>>> On 7/22/24 17:42, Joao Martins wrote:
>>>> On 22/07/2024 16:13, Cédric Le Goater wrote:
>>>>> On 7/22/24 17:01, Joao Martins wrote:
>>>>>> On 22/07/2024 15:53, Cédric Le Goater wrote:
>>>>>>> On 7/19/24 19:26, Joao Martins wrote:
>>>>>>>> On 19/07/2024 15:24, Joao Martins wrote:
>>>>>>>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>>>>>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>>>>>>>> By default VFIO migration is set to auto, which will support live
>>>>>>>>>>> migration if the migration capability is set *and* also dirty page
>>>>>>>>>>> tracking is supported.
>>>>>>>>>>>
>>>>>>>>>>> For testing purposes one can force enable without dirty page tracking
>>>>>>>>>>> via enable-migration=on, but that option is generally left for testing
>>>>>>>>>>> purposes.
>>>>>>>>>>>
>>>>>>>>>>> So starting with IOMMU dirty tracking it can use to accomodate the lack of
>>>>>>>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>>>>>>>> migration and thus enabling migration by default for those too.
>>>>>>>>>>>
>>>>>>>>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>>>>>>>>> well.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>>>>>> ---
>>>>>>>>>>>       include/hw/vfio/vfio-common.h |  1 +
>>>>>>>>>>>       hw/vfio/iommufd.c             |  2 +-
>>>>>>>>>>>       hw/vfio/migration.c           | 11 ++++++-----
>>>>>>>>>>>       3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>>>>> VFIOContainerBase *bcontainer,
>>>>>>>>>>>                       VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>>>>> **errp);
>>>>>>>>>>>       int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>>>>>>>>> uint64_t
>>>>>>>>>>> iova,
>>>>>>>>>>>                                 uint64_t size, ram_addr_t ram_addr, Error
>>>>>>>>>>> **errp);
>>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>>>>>         /* Returns 0 on success, or a negative errno. */
>>>>>>>>>>>       bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>>>>>> @@ -111,7 +111,7 @@ static void
>>>>>>>>>>> iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>>>>>>>> *vbasedev)
>>>>>>>>>>>           iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>>>>>>>       }
>>>>>>>>>>>       -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>>>       {
>>>>>>>>>>>           return hwpt && hwpt->hwpt_flags &
>>>>>>>>>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>>>>>>>       }
>>>>>>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>>>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>>>>>>>> --- a/hw/vfio/migration.c
>>>>>>>>>>> +++ b/hw/vfio/migration.c
>>>>>>>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice *vbasedev,
>>>>>>>>>>> Error **errp)
>>>>>>>>>>>               return !vfio_block_migration(vbasedev, err, errp);
>>>>>>>>>>>           }
>>>>>>>>>>>       -    if (!vbasedev->dirty_pages_supported) {
>>>>>>>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>>>>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Some platforms do not have IOMMUFD support and this call will need
>>>>>>>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>>>>>>>> the IOMMU backend.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> This was actually on purpose because only IOMMUFD presents a view of
>>>>>>>>> hardware
>>>>>>>>> whereas type1 supporting dirty page tracking is not used as means to
>>>>>>>>> 'migration
>>>>>>>>> is supported'.
>>>>>>>>>
>>>>>>>>> The hwpt is nil in type1 and the helper checks that, so it should return
>>>>>>>>> false.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
>>>>>>>> consider. Maybe this would be a elegant way to address it? Looks to pass my
>>>>>>>> build with CONFIG_IOMMUFD=n
>>>>>>>>
>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>>> index 61dd48e79b71..422ad4a5bdd1 100644
>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>> VFIOContainerBase
>>>>>>>> *bcontainer,
>>>>>>>>                      VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>> **errp);
>>>>>>>>      int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t
>>>>>>>> iova,
>>>>>>>>                                uint64_t size, ram_addr_t ram_addr, Error
>>>>>>>> **errp);
>>>>>>>> +#ifdef CONFIG_IOMMUFD
>>>>>>>>      bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>> +#else
>>>>>>>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>> +{
>>>>>>>> +    return false;
>>>>>>>> +}
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>>      /* Returns 0 on success, or a negative errno. */
>>>>>>>>      bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>
>>>>>>>
>>>>>>> hmm, no. You will need to introduce a new Host IOMMU device capability,
>>>>>>> something like :
>>>>>>>
>>>>>>>       HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
>>>>>>>
>>>>>>> Then, introduce an helper routine to check the capability  :
>>>>>>>
>>>>>>>       return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>>>>>>>     and replace the iommufd_hwpt_dirty_tracking call with it.
>>>>>>>
>>>>>>> Yeah I know, it's cumbersome but it's cleaner !
>>>>>>>
>>>>>>
>>>>>> Funny you mention it, because that's what I did in v3:
>>>>>>
>>>>>> https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/
>>>>>>
>>>>>> But it was suggested to drop (I am assuming to avoid complexity)
>>>>>
>>>>> my bad if I did :/
>>>>>
>>>>
>>>> No worries it is all part of review -- I think Zhenzhong proposed with good
>>>> intentions, and I probably didn't think too hard about the consequences on
>>>> layering with the HIOD.
>>>>
>>>>> we will need an helper such as :
>>>>>
>>>>>     bool vfio_device_dirty_tracking(VFIODevice *vbasedev)
>>>>>     {
>>>>>         HostIOMMUDevice *hiod = vbasedev->hiod ;
>>>>>         HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>>>>
>>>>>         return hiodc->get_cap &&
>>>>>             hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING, NULL)
>>>>> == 1;
>>>>>     }
>>>>>
>>>>> and something like,
>>>>>
>>>>>     static int hiod_iommufd_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
>>>>>                                          Error **errp)
>>>>>     {
>>>>>         switch (cap) {
>>>>>         case HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING:
>>>>>             return !!(hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING);
>>>>>         default:
>>>>>             error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
>>>>>             return -EINVAL;
>>>>>         }
>>>>>     }
>>>>>
>>>>> Feel free to propose your own implementation,
>>>>>
>>>>
>>>> Actually it's close to what I had in v3 link, except the new helper (the name
>>>> vfio_device_dirty_tracking is a bit misleading I would call it
>>>> vfio_device_iommu_dirty_tracking)
>>>
>>> Let's call it vfio_device_iommu_dirty_tracking.
>>>
>>
>> I thinking about this and I am not that sure it makes sense. That is the
>> .get_cap() stuff.
>>
>> Using the hw_caps is only useful when choosing hwpt_flags, then the only thing
>> that matters for patch 12 is after the device is attached ... hence we gotta
>> look at hwpt_flags. That ultimately is what tells if dirty tracking can be done
>> in the device pagetable.
>>
>> I can expand hiod_iommufd_vfio_get_cap() to return the hwpt flags, but it feels
>> just as hacky given that I am testing its enablement of the hardware pagetable
>> (HWPT), and not asking a HIOD capability.
> 
> arf. yes.
> 
>> e.g. hiod_iommufd_vfio_get_cap would make more sense in patch 9 for the
>> attach_device() flow[*], but not for vfio_migration_realize() flow.
>>
>> [*] though feels unneeded as we only have a local callsite, not external user so
>> far.
>>
>> Which would technically make v5.1 patch a more correct right check, perhaps with
>> better layering/naming.
> 
> The quick fix (plan B if needed) would be :
> 
> @@ -1038,8 +1038,11 @@ bool vfio_migration_realize(VFIODevice *
>       }
> 
>       if ((!vbasedev->dirty_pages_supported ||
> -         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
> -        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
> +         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF)
> +#ifdef CONFIG_IOMMUFD
> +        && !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)
> +#endif
> +        ) {
>           if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
>               error_setg(&err,
>                          "%s: VFIO device doesn't support device and "
> 
> I would prefer to avoid the common component to reference IOMMUFD
> directly. The only exception today is the use of the vbasedev->iommufd
> pointer which is treated as opaque.
> 
> I guess a simple approach would be to store the result of
> iommufd_hwpt_dirty_tracking(hwpt) under a 'dirty_tracking' attribute

'iommu_dirty_tracking' may be. 'dirty_tracking' is already used to
track ongoing logging.




> of vbasedev and return the value in vfio_device_iommu_dirty_tracking() ?
> 
> if not, let's merge v5 (with more acks) and the fix of plan B.
> 
> 
>>>> I can follow-up with this improvement in case this gets merged as is,
>>>
>>> I can't merge as is since it break compiles (I am excluding the v5.1 patch).
>>> Which means I would prefer a v6 please.
>>>
>>
>> Ah OK -- I thought this discussion assumed v5.1 to be in which does fix the
>> compilation issue and all that remained were acks.
> 
> v5.1 proposes a CONFIG_IOMMUFD in a header file which is error prone.
> 
>>>> or include
>>>> it in the next version if you prefer to adjourn this series into 9.2 (given the
>>>> lack of time to get everything right).
>>>
>>> There aren't many open questions left.
>>>
>>> * PATCH 5 lacks a R-b. I would feel more confortable if ZhenZhong or
>>>    Eric acked the changes.
>>> * PATCH 9 is slightly hacky with the use of vfio_device_get_aw_bits().
>>>    I think it's minor. I would also feel more confortable if ZhenZhong
>>>    acked the changes.
>>
>> I guess you meant patch 6 and not 9.
> 
> yes.
> 
> Thanks,
> 
> C.
> 
> 
> 
>>
>>> * PATCH 12 needs the fix we have been talking about.
>>> * PATCH 13 is for dev/debug.
>>>
>>>
>>> What's important is to avoid introducing regressions in the current behavior,
>>> that is when not using IOMMUFD. It looks fine on that aspect AFAICT.
>>
>> OK
>>
>
Joao Martins July 22, 2024, 6:01 p.m. UTC | #13
On 22/07/2024 18:04, Cédric Le Goater wrote:
> On 7/22/24 18:29, Joao Martins wrote:
>> On 22/07/2024 16:58, Cédric Le Goater wrote:
>>> On 7/22/24 17:42, Joao Martins wrote:
>>>> On 22/07/2024 16:13, Cédric Le Goater wrote:
>>>>> On 7/22/24 17:01, Joao Martins wrote:
>>>>>> On 22/07/2024 15:53, Cédric Le Goater wrote:
>>>>>>> On 7/19/24 19:26, Joao Martins wrote:
>>>>>>>> On 19/07/2024 15:24, Joao Martins wrote:
>>>>>>>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>>>>>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>>>>>>>> By default VFIO migration is set to auto, which will support live
>>>>>>>>>>> migration if the migration capability is set *and* also dirty page
>>>>>>>>>>> tracking is supported.
>>>>>>>>>>>
>>>>>>>>>>> For testing purposes one can force enable without dirty page tracking
>>>>>>>>>>> via enable-migration=on, but that option is generally left for testing
>>>>>>>>>>> purposes.
>>>>>>>>>>>
>>>>>>>>>>> So starting with IOMMU dirty tracking it can use to accomodate the
>>>>>>>>>>> lack of
>>>>>>>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>>>>>>>> migration and thus enabling migration by default for those too.
>>>>>>>>>>>
>>>>>>>>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>>>>>>>>> well.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>>>>>> ---
>>>>>>>>>>>       include/hw/vfio/vfio-common.h |  1 +
>>>>>>>>>>>       hw/vfio/iommufd.c             |  2 +-
>>>>>>>>>>>       hw/vfio/migration.c           | 11 ++++++-----
>>>>>>>>>>>       3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>>>>>>>> b/include/hw/vfio/vfio-common.h
>>>>>>>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>>>>> VFIOContainerBase *bcontainer,
>>>>>>>>>>>                       VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>>>>> **errp);
>>>>>>>>>>>       int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>>>>>>>>> uint64_t
>>>>>>>>>>> iova,
>>>>>>>>>>>                                 uint64_t size, ram_addr_t ram_addr,
>>>>>>>>>>> Error
>>>>>>>>>>> **errp);
>>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>>>>>         /* Returns 0 on success, or a negative errno. */
>>>>>>>>>>>       bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>>>>>> @@ -111,7 +111,7 @@ static void
>>>>>>>>>>> iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>>>>>>>> *vbasedev)
>>>>>>>>>>>           iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>>>>>>>       }
>>>>>>>>>>>       -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>>>       {
>>>>>>>>>>>           return hwpt && hwpt->hwpt_flags &
>>>>>>>>>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>>>>>>>       }
>>>>>>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>>>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>>>>>>>> --- a/hw/vfio/migration.c
>>>>>>>>>>> +++ b/hw/vfio/migration.c
>>>>>>>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice
>>>>>>>>>>> *vbasedev,
>>>>>>>>>>> Error **errp)
>>>>>>>>>>>               return !vfio_block_migration(vbasedev, err, errp);
>>>>>>>>>>>           }
>>>>>>>>>>>       -    if (!vbasedev->dirty_pages_supported) {
>>>>>>>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>>>>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Some platforms do not have IOMMUFD support and this call will need
>>>>>>>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>>>>>>>> the IOMMU backend.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> This was actually on purpose because only IOMMUFD presents a view of
>>>>>>>>> hardware
>>>>>>>>> whereas type1 supporting dirty page tracking is not used as means to
>>>>>>>>> 'migration
>>>>>>>>> is supported'.
>>>>>>>>>
>>>>>>>>> The hwpt is nil in type1 and the helper checks that, so it should return
>>>>>>>>> false.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
>>>>>>>> consider. Maybe this would be a elegant way to address it? Looks to pass my
>>>>>>>> build with CONFIG_IOMMUFD=n
>>>>>>>>
>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>>> index 61dd48e79b71..422ad4a5bdd1 100644
>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>> VFIOContainerBase
>>>>>>>> *bcontainer,
>>>>>>>>                      VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>> **errp);
>>>>>>>>      int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>>>>>> uint64_t
>>>>>>>> iova,
>>>>>>>>                                uint64_t size, ram_addr_t ram_addr, Error
>>>>>>>> **errp);
>>>>>>>> +#ifdef CONFIG_IOMMUFD
>>>>>>>>      bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>> +#else
>>>>>>>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>> +{
>>>>>>>> +    return false;
>>>>>>>> +}
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>>      /* Returns 0 on success, or a negative errno. */
>>>>>>>>      bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>
>>>>>>>
>>>>>>> hmm, no. You will need to introduce a new Host IOMMU device capability,
>>>>>>> something like :
>>>>>>>
>>>>>>>       HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
>>>>>>>
>>>>>>> Then, introduce an helper routine to check the capability  :
>>>>>>>
>>>>>>>       return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>>>>>>>     and replace the iommufd_hwpt_dirty_tracking call with it.
>>>>>>>
>>>>>>> Yeah I know, it's cumbersome but it's cleaner !
>>>>>>>
>>>>>>
>>>>>> Funny you mention it, because that's what I did in v3:
>>>>>>
>>>>>> https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/
>>>>>>
>>>>>> But it was suggested to drop (I am assuming to avoid complexity)
>>>>>
>>>>> my bad if I did :/
>>>>>
>>>>
>>>> No worries it is all part of review -- I think Zhenzhong proposed with good
>>>> intentions, and I probably didn't think too hard about the consequences on
>>>> layering with the HIOD.
>>>>
>>>>> we will need an helper such as :
>>>>>
>>>>>     bool vfio_device_dirty_tracking(VFIODevice *vbasedev)
>>>>>     {
>>>>>         HostIOMMUDevice *hiod = vbasedev->hiod ;
>>>>>         HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>>>>
>>>>>         return hiodc->get_cap &&
>>>>>             hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING, NULL)
>>>>> == 1;
>>>>>     }
>>>>>
>>>>> and something like,
>>>>>
>>>>>     static int hiod_iommufd_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
>>>>>                                          Error **errp)
>>>>>     {
>>>>>         switch (cap) {
>>>>>         case HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING:
>>>>>             return !!(hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING);
>>>>>         default:
>>>>>             error_setg(errp, "%s: unsupported capability %x", hiod->name,
>>>>> cap);
>>>>>             return -EINVAL;
>>>>>         }
>>>>>     }
>>>>>
>>>>> Feel free to propose your own implementation,
>>>>>
>>>>
>>>> Actually it's close to what I had in v3 link, except the new helper (the name
>>>> vfio_device_dirty_tracking is a bit misleading I would call it
>>>> vfio_device_iommu_dirty_tracking)
>>>
>>> Let's call it vfio_device_iommu_dirty_tracking.
>>>
>>
>> I thinking about this and I am not that sure it makes sense. That is the
>> .get_cap() stuff.
>>
>> Using the hw_caps is only useful when choosing hwpt_flags, then the only thing
>> that matters for patch 12 is after the device is attached ... hence we gotta
>> look at hwpt_flags. That ultimately is what tells if dirty tracking can be done
>> in the device pagetable.
>>
>> I can expand hiod_iommufd_vfio_get_cap() to return the hwpt flags, but it feels
>> just as hacky given that I am testing its enablement of the hardware pagetable
>> (HWPT), and not asking a HIOD capability.
> 
> arf. yes.
> 
>> e.g. hiod_iommufd_vfio_get_cap would make more sense in patch 9 for the
>> attach_device() flow[*], but not for vfio_migration_realize() flow.
>>
>> [*] though feels unneeded as we only have a local callsite, not external user so
>> far.
>>
>> Which would technically make v5.1 patch a more correct right check, perhaps with
>> better layering/naming.
> 
> The quick fix (plan B if needed) would be :
> 
> @@ -1038,8 +1038,11 @@ bool vfio_migration_realize(VFIODevice *
>      }
>  
>      if ((!vbasedev->dirty_pages_supported ||
> -         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
> -        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
> +         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF)
> +#ifdef CONFIG_IOMMUFD
> +        && !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)
> +#endif
> +        ) {
>          if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
>              error_setg(&err,
>                         "%s: VFIO device doesn't support device and "
> 
> I would prefer to avoid the common component to reference IOMMUFD
> directly. The only exception today is the use of the vbasedev->iommufd
> pointer which is treated as opaque.
> 
> I guess a simple approach would be to store the result of
> iommufd_hwpt_dirty_tracking(hwpt) under a 'dirty_tracking' attribute
> of vbasedev and return the value in vfio_device_iommu_dirty_tracking() ?
> 
> if not, let's merge v5 (with more acks) and the fix of plan B.
> 
> 
>>>> I can follow-up with this improvement in case this gets merged as is,
>>>
>>> I can't merge as is since it break compiles (I am excluding the v5.1 patch).
>>> Which means I would prefer a v6 please.
>>>
>>
>> Ah OK -- I thought this discussion assumed v5.1 to be in which does fix the
>> compilation issue and all that remained were acks.
> 
> v5.1 proposes a CONFIG_IOMMUFD in a header file which is error prone.
>  

hmmm, ok, that's strage. It does look quite common in Qemu? e.g. We even have
CONFIG_LINUX in the vfio-common.h header file.

>>>> or include
>>>> it in the next version if you prefer to adjourn this series into 9.2 (given the
>>>> lack of time to get everything right).
>>>
>>> There aren't many open questions left.
>>>
>>> * PATCH 5 lacks a R-b. I would feel more confortable if ZhenZhong or
>>>    Eric acked the changes.
>>> * PATCH 9 is slightly hacky with the use of vfio_device_get_aw_bits().
>>>    I think it's minor. I would also feel more confortable if ZhenZhong
>>>    acked the changes.
>>
>> I guess you meant patch 6 and not 9.
> 
> yes.
> 
> Thanks,
> 
> C.
> 
> 
> 
>>
>>> * PATCH 12 needs the fix we have been talking about.
>>> * PATCH 13 is for dev/debug.
>>>
>>>
>>> What's important is to avoid introducing regressions in the current behavior,
>>> that is when not using IOMMUFD. It looks fine on that aspect AFAICT.
>>
>> OK
>>
>
Joao Martins July 22, 2024, 6:08 p.m. UTC | #14
On 22/07/2024 18:15, Cédric Le Goater wrote:
> On 7/22/24 19:04, Cédric Le Goater wrote:
>> On 7/22/24 18:29, Joao Martins wrote:
>>> On 22/07/2024 16:58, Cédric Le Goater wrote:
>>>> On 7/22/24 17:42, Joao Martins wrote:
>>>>> On 22/07/2024 16:13, Cédric Le Goater wrote:
>>>>>> On 7/22/24 17:01, Joao Martins wrote:
>>>>>>> On 22/07/2024 15:53, Cédric Le Goater wrote:
>>>>>>>> On 7/19/24 19:26, Joao Martins wrote:
>>>>>>>>> On 19/07/2024 15:24, Joao Martins wrote:
>>>>>>>>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>>>>>>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>>>>>>>>> By default VFIO migration is set to auto, which will support live
>>>>>>>>>>>> migration if the migration capability is set *and* also dirty page
>>>>>>>>>>>> tracking is supported.
>>>>>>>>>>>>
>>>>>>>>>>>> For testing purposes one can force enable without dirty page tracking
>>>>>>>>>>>> via enable-migration=on, but that option is generally left for testing
>>>>>>>>>>>> purposes.
>>>>>>>>>>>>
>>>>>>>>>>>> So starting with IOMMU dirty tracking it can use to accomodate the
>>>>>>>>>>>> lack of
>>>>>>>>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>>>>>>>>> migration and thus enabling migration by default for those too.
>>>>>>>>>>>>
>>>>>>>>>>>> While at it change the error messages to mention IOMMU dirty
>>>>>>>>>>>> tracking as
>>>>>>>>>>>> well.
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>       include/hw/vfio/vfio-common.h |  1 +
>>>>>>>>>>>>       hw/vfio/iommufd.c             |  2 +-
>>>>>>>>>>>>       hw/vfio/migration.c           | 11 ++++++-----
>>>>>>>>>>>>       3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>>>>>>>>> b/include/hw/vfio/vfio-common.h
>>>>>>>>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>>>>>> VFIOContainerBase *bcontainer,
>>>>>>>>>>>>                       VFIOBitmap *vbmap, hwaddr iova, hwaddr size,
>>>>>>>>>>>> Error
>>>>>>>>>>>> **errp);
>>>>>>>>>>>>       int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>>>>>>>>>> uint64_t
>>>>>>>>>>>> iova,
>>>>>>>>>>>>                                 uint64_t size, ram_addr_t ram_addr,
>>>>>>>>>>>> Error
>>>>>>>>>>>> **errp);
>>>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>>>>>>         /* Returns 0 on success, or a negative errno. */
>>>>>>>>>>>>       bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>>>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>>>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>>>>>>> @@ -111,7 +111,7 @@ static void
>>>>>>>>>>>> iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>>>>>>>>> *vbasedev)
>>>>>>>>>>>>           iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>>>>>>>>       }
>>>>>>>>>>>>       -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>>>>       {
>>>>>>>>>>>>           return hwpt && hwpt->hwpt_flags &
>>>>>>>>>>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>>>>>>>>       }
>>>>>>>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>>>>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>>>>>>>>> --- a/hw/vfio/migration.c
>>>>>>>>>>>> +++ b/hw/vfio/migration.c
>>>>>>>>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice
>>>>>>>>>>>> *vbasedev,
>>>>>>>>>>>> Error **errp)
>>>>>>>>>>>>               return !vfio_block_migration(vbasedev, err, errp);
>>>>>>>>>>>>           }
>>>>>>>>>>>>       -    if (!vbasedev->dirty_pages_supported) {
>>>>>>>>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>>>>>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Some platforms do not have IOMMUFD support and this call will need
>>>>>>>>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>>>>>>>>> the IOMMU backend.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> This was actually on purpose because only IOMMUFD presents a view of
>>>>>>>>>> hardware
>>>>>>>>>> whereas type1 supporting dirty page tracking is not used as means to
>>>>>>>>>> 'migration
>>>>>>>>>> is supported'.
>>>>>>>>>>
>>>>>>>>>> The hwpt is nil in type1 and the helper checks that, so it should return
>>>>>>>>>> false.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally
>>>>>>>>> didn't
>>>>>>>>> consider. Maybe this would be a elegant way to address it? Looks to
>>>>>>>>> pass my
>>>>>>>>> build with CONFIG_IOMMUFD=n
>>>>>>>>>
>>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>>>> index 61dd48e79b71..422ad4a5bdd1 100644
>>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>>> VFIOContainerBase
>>>>>>>>> *bcontainer,
>>>>>>>>>                      VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>>> **errp);
>>>>>>>>>      int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>>>>>>> uint64_t
>>>>>>>>> iova,
>>>>>>>>>                                uint64_t size, ram_addr_t ram_addr, Error
>>>>>>>>> **errp);
>>>>>>>>> +#ifdef CONFIG_IOMMUFD
>>>>>>>>>      bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>>> +#else
>>>>>>>>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>> +{
>>>>>>>>> +    return false;
>>>>>>>>> +}
>>>>>>>>> +#endif
>>>>>>>>>
>>>>>>>>>      /* Returns 0 on success, or a negative errno. */
>>>>>>>>>      bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>>
>>>>>>>>
>>>>>>>> hmm, no. You will need to introduce a new Host IOMMU device capability,
>>>>>>>> something like :
>>>>>>>>
>>>>>>>>       HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
>>>>>>>>
>>>>>>>> Then, introduce an helper routine to check the capability  :
>>>>>>>>
>>>>>>>>       return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>>>>>>>>     and replace the iommufd_hwpt_dirty_tracking call with it.
>>>>>>>>
>>>>>>>> Yeah I know, it's cumbersome but it's cleaner !
>>>>>>>>
>>>>>>>
>>>>>>> Funny you mention it, because that's what I did in v3:
>>>>>>>
>>>>>>> https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/
>>>>>>>
>>>>>>> But it was suggested to drop (I am assuming to avoid complexity)
>>>>>>
>>>>>> my bad if I did :/
>>>>>>
>>>>>
>>>>> No worries it is all part of review -- I think Zhenzhong proposed with good
>>>>> intentions, and I probably didn't think too hard about the consequences on
>>>>> layering with the HIOD.
>>>>>
>>>>>> we will need an helper such as :
>>>>>>
>>>>>>     bool vfio_device_dirty_tracking(VFIODevice *vbasedev)
>>>>>>     {
>>>>>>         HostIOMMUDevice *hiod = vbasedev->hiod ;
>>>>>>         HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>>>>>
>>>>>>         return hiodc->get_cap &&
>>>>>>             hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING, NULL)
>>>>>> == 1;
>>>>>>     }
>>>>>>
>>>>>> and something like,
>>>>>>
>>>>>>     static int hiod_iommufd_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
>>>>>>                                          Error **errp)
>>>>>>     {
>>>>>>         switch (cap) {
>>>>>>         case HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING:
>>>>>>             return !!(hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING);
>>>>>>         default:
>>>>>>             error_setg(errp, "%s: unsupported capability %x", hiod->name,
>>>>>> cap);
>>>>>>             return -EINVAL;
>>>>>>         }
>>>>>>     }
>>>>>>
>>>>>> Feel free to propose your own implementation,
>>>>>>
>>>>>
>>>>> Actually it's close to what I had in v3 link, except the new helper (the name
>>>>> vfio_device_dirty_tracking is a bit misleading I would call it
>>>>> vfio_device_iommu_dirty_tracking)
>>>>
>>>> Let's call it vfio_device_iommu_dirty_tracking.
>>>>
>>>
>>> I thinking about this and I am not that sure it makes sense. That is the
>>> .get_cap() stuff.
>>>
>>> Using the hw_caps is only useful when choosing hwpt_flags, then the only thing
>>> that matters for patch 12 is after the device is attached ... hence we gotta
>>> look at hwpt_flags. That ultimately is what tells if dirty tracking can be done
>>> in the device pagetable.
>>>
>>> I can expand hiod_iommufd_vfio_get_cap() to return the hwpt flags, but it feels
>>> just as hacky given that I am testing its enablement of the hardware pagetable
>>> (HWPT), and not asking a HIOD capability.
>>
>> arf. yes.
>>
>>> e.g. hiod_iommufd_vfio_get_cap would make more sense in patch 9 for the
>>> attach_device() flow[*], but not for vfio_migration_realize() flow.
>>>
>>> [*] though feels unneeded as we only have a local callsite, not external user so
>>> far.
>>>
>>> Which would technically make v5.1 patch a more correct right check, perhaps with
>>> better layering/naming.
>>
>> The quick fix (plan B if needed) would be :
>>
>> @@ -1038,8 +1038,11 @@ bool vfio_migration_realize(VFIODevice *
>>       }
>>
>>       if ((!vbasedev->dirty_pages_supported ||
>> -         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
>> -        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>> +         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF)
>> +#ifdef CONFIG_IOMMUFD
>> +        && !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)
>> +#endif
>> +        ) {
>>           if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
>>               error_setg(&err,
>>                          "%s: VFIO device doesn't support device and "
>>
>> I would prefer to avoid the common component to reference IOMMUFD
>> directly. The only exception today is the use of the vbasedev->iommufd
>> pointer which is treated as opaque.
>>
>> I guess a simple approach would be to store the result of
>> iommufd_hwpt_dirty_tracking(hwpt) under a 'dirty_tracking' attribute
> 
> 'iommu_dirty_tracking' may be. 'dirty_tracking' is already used to
> track ongoing logging.
> 

I can consolidate all that into a new VFIODevice attribute, and drop the
hwpt_flags it that helps.

I'll try to restructure and try to submit a new version before Zhenzhong wakes up.

> 
> 
> 
>> of vbasedev and return the value in vfio_device_iommu_dirty_tracking() ?
>>
>> if not, let's merge v5 (with more acks) and the fix of plan B.
>>
>>
>>>>> I can follow-up with this improvement in case this gets merged as is,
>>>>
>>>> I can't merge as is since it break compiles (I am excluding the v5.1 patch).
>>>> Which means I would prefer a v6 please.
>>>>
>>>
>>> Ah OK -- I thought this discussion assumed v5.1 to be in which does fix the
>>> compilation issue and all that remained were acks.
>>
>> v5.1 proposes a CONFIG_IOMMUFD in a header file which is error prone.
>>
>>>>> or include
>>>>> it in the next version if you prefer to adjourn this series into 9.2 (given
>>>>> the
>>>>> lack of time to get everything right).
>>>>
>>>> There aren't many open questions left.
>>>>
>>>> * PATCH 5 lacks a R-b. I would feel more confortable if ZhenZhong or
>>>>    Eric acked the changes.
>>>> * PATCH 9 is slightly hacky with the use of vfio_device_get_aw_bits().
>>>>    I think it's minor. I would also feel more confortable if ZhenZhong
>>>>    acked the changes.
>>>
>>> I guess you meant patch 6 and not 9.
>>
>> yes.
>>
>> Thanks,
>>
>> C.
>>
>>
>>
>>>
>>>> * PATCH 12 needs the fix we have been talking about.
>>>> * PATCH 13 is for dev/debug.
>>>>
>>>>
>>>> What's important is to avoid introducing regressions in the current behavior,
>>>> that is when not using IOMMUFD. It looks fine on that aspect AFAICT.
>>>
>>> OK
>>>
>>
>
Cédric Le Goater July 23, 2024, 6:38 a.m. UTC | #15
On 7/22/24 20:01, Joao Martins wrote:
> On 22/07/2024 18:04, Cédric Le Goater wrote:
>> On 7/22/24 18:29, Joao Martins wrote:
>>> On 22/07/2024 16:58, Cédric Le Goater wrote:
>>>> On 7/22/24 17:42, Joao Martins wrote:
>>>>> On 22/07/2024 16:13, Cédric Le Goater wrote:
>>>>>> On 7/22/24 17:01, Joao Martins wrote:
>>>>>>> On 22/07/2024 15:53, Cédric Le Goater wrote:
>>>>>>>> On 7/19/24 19:26, Joao Martins wrote:
>>>>>>>>> On 19/07/2024 15:24, Joao Martins wrote:
>>>>>>>>>> On 19/07/2024 15:17, Cédric Le Goater wrote:
>>>>>>>>>>> On 7/19/24 14:05, Joao Martins wrote:
>>>>>>>>>>>> By default VFIO migration is set to auto, which will support live
>>>>>>>>>>>> migration if the migration capability is set *and* also dirty page
>>>>>>>>>>>> tracking is supported.
>>>>>>>>>>>>
>>>>>>>>>>>> For testing purposes one can force enable without dirty page tracking
>>>>>>>>>>>> via enable-migration=on, but that option is generally left for testing
>>>>>>>>>>>> purposes.
>>>>>>>>>>>>
>>>>>>>>>>>> So starting with IOMMU dirty tracking it can use to accomodate the
>>>>>>>>>>>> lack of
>>>>>>>>>>>> VF dirty page tracking allowing us to minimize the VF requirements for
>>>>>>>>>>>> migration and thus enabling migration by default for those too.
>>>>>>>>>>>>
>>>>>>>>>>>> While at it change the error messages to mention IOMMU dirty tracking as
>>>>>>>>>>>> well.
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>        include/hw/vfio/vfio-common.h |  1 +
>>>>>>>>>>>>        hw/vfio/iommufd.c             |  2 +-
>>>>>>>>>>>>        hw/vfio/migration.c           | 11 ++++++-----
>>>>>>>>>>>>        3 files changed, 8 insertions(+), 6 deletions(-)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h
>>>>>>>>>>>> b/include/hw/vfio/vfio-common.h
>>>>>>>>>>>> index 7e530c7869dc..00b9e933449e 100644
>>>>>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>>>>>> @@ -299,6 +299,7 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>>>>>> VFIOContainerBase *bcontainer,
>>>>>>>>>>>>                        VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>>>>>> **errp);
>>>>>>>>>>>>        int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>>>>>>>>>> uint64_t
>>>>>>>>>>>> iova,
>>>>>>>>>>>>                                  uint64_t size, ram_addr_t ram_addr,
>>>>>>>>>>>> Error
>>>>>>>>>>>> **errp);
>>>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>>>>>>          /* Returns 0 on success, or a negative errno. */
>>>>>>>>>>>>        bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>>>>>>> index 7dd5d43ce06a..a998e8578552 100644
>>>>>>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>>>>>>> @@ -111,7 +111,7 @@ static void
>>>>>>>>>>>> iommufd_cdev_unbind_and_disconnect(VFIODevice
>>>>>>>>>>>> *vbasedev)
>>>>>>>>>>>>            iommufd_backend_disconnect(vbasedev->iommufd);
>>>>>>>>>>>>        }
>>>>>>>>>>>>        -static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>>>> +bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>>>>>        {
>>>>>>>>>>>>            return hwpt && hwpt->hwpt_flags &
>>>>>>>>>>>> IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>>>>>>>>>>>        }
>>>>>>>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>>>>>>>> index 34d4be2ce1b1..63ffa46c9652 100644
>>>>>>>>>>>> --- a/hw/vfio/migration.c
>>>>>>>>>>>> +++ b/hw/vfio/migration.c
>>>>>>>>>>>> @@ -1036,16 +1036,17 @@ bool vfio_migration_realize(VFIODevice
>>>>>>>>>>>> *vbasedev,
>>>>>>>>>>>> Error **errp)
>>>>>>>>>>>>                return !vfio_block_migration(vbasedev, err, errp);
>>>>>>>>>>>>            }
>>>>>>>>>>>>        -    if (!vbasedev->dirty_pages_supported) {
>>>>>>>>>>>> +    if (!vbasedev->dirty_pages_supported &&
>>>>>>>>>>>> +        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Some platforms do not have IOMMUFD support and this call will need
>>>>>>>>>>> some kind of abstract wrapper to reflect dirty tracking support in
>>>>>>>>>>> the IOMMU backend.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> This was actually on purpose because only IOMMUFD presents a view of
>>>>>>>>>> hardware
>>>>>>>>>> whereas type1 supporting dirty page tracking is not used as means to
>>>>>>>>>> 'migration
>>>>>>>>>> is supported'.
>>>>>>>>>>
>>>>>>>>>> The hwpt is nil in type1 and the helper checks that, so it should return
>>>>>>>>>> false.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Oh wait, maybe you're talking about CONFIG_IOMMUFD=n which I totally didn't
>>>>>>>>> consider. Maybe this would be a elegant way to address it? Looks to pass my
>>>>>>>>> build with CONFIG_IOMMUFD=n
>>>>>>>>>
>>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>>>> index 61dd48e79b71..422ad4a5bdd1 100644
>>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>>> @@ -300,7 +300,14 @@ int vfio_devices_query_dirty_bitmap(const
>>>>>>>>> VFIOContainerBase
>>>>>>>>> *bcontainer,
>>>>>>>>>                       VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>>>>>>>>> **errp);
>>>>>>>>>       int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer,
>>>>>>>>> uint64_t
>>>>>>>>> iova,
>>>>>>>>>                                 uint64_t size, ram_addr_t ram_addr, Error
>>>>>>>>> **errp);
>>>>>>>>> +#ifdef CONFIG_IOMMUFD
>>>>>>>>>       bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
>>>>>>>>> +#else
>>>>>>>>> +static inline bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
>>>>>>>>> +{
>>>>>>>>> +    return false;
>>>>>>>>> +}
>>>>>>>>> +#endif
>>>>>>>>>
>>>>>>>>>       /* Returns 0 on success, or a negative errno. */
>>>>>>>>>       bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>>>>>>>>
>>>>>>>>
>>>>>>>> hmm, no. You will need to introduce a new Host IOMMU device capability,
>>>>>>>> something like :
>>>>>>>>
>>>>>>>>        HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING,
>>>>>>>>
>>>>>>>> Then, introduce an helper routine to check the capability  :
>>>>>>>>
>>>>>>>>        return hiodc->get_cap( ... HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING...)
>>>>>>>>      and replace the iommufd_hwpt_dirty_tracking call with it.
>>>>>>>>
>>>>>>>> Yeah I know, it's cumbersome but it's cleaner !
>>>>>>>>
>>>>>>>
>>>>>>> Funny you mention it, because that's what I did in v3:
>>>>>>>
>>>>>>> https://lore.kernel.org/qemu-devel/20240708143420.16953-9-joao.m.martins@oracle.com/
>>>>>>>
>>>>>>> But it was suggested to drop (I am assuming to avoid complexity)
>>>>>>
>>>>>> my bad if I did :/
>>>>>>
>>>>>
>>>>> No worries it is all part of review -- I think Zhenzhong proposed with good
>>>>> intentions, and I probably didn't think too hard about the consequences on
>>>>> layering with the HIOD.
>>>>>
>>>>>> we will need an helper such as :
>>>>>>
>>>>>>      bool vfio_device_dirty_tracking(VFIODevice *vbasedev)
>>>>>>      {
>>>>>>          HostIOMMUDevice *hiod = vbasedev->hiod ;
>>>>>>          HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>>>>>
>>>>>>          return hiodc->get_cap &&
>>>>>>              hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING, NULL)
>>>>>> == 1;
>>>>>>      }
>>>>>>
>>>>>> and something like,
>>>>>>
>>>>>>      static int hiod_iommufd_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
>>>>>>                                           Error **errp)
>>>>>>      {
>>>>>>          switch (cap) {
>>>>>>          case HOST_IOMMU_DEVICE_CAP_DIRTY_TRACKING:
>>>>>>              return !!(hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING);
>>>>>>          default:
>>>>>>              error_setg(errp, "%s: unsupported capability %x", hiod->name,
>>>>>> cap);
>>>>>>              return -EINVAL;
>>>>>>          }
>>>>>>      }
>>>>>>
>>>>>> Feel free to propose your own implementation,
>>>>>>
>>>>>
>>>>> Actually it's close to what I had in v3 link, except the new helper (the name
>>>>> vfio_device_dirty_tracking is a bit misleading I would call it
>>>>> vfio_device_iommu_dirty_tracking)
>>>>
>>>> Let's call it vfio_device_iommu_dirty_tracking.
>>>>
>>>
>>> I thinking about this and I am not that sure it makes sense. That is the
>>> .get_cap() stuff.
>>>
>>> Using the hw_caps is only useful when choosing hwpt_flags, then the only thing
>>> that matters for patch 12 is after the device is attached ... hence we gotta
>>> look at hwpt_flags. That ultimately is what tells if dirty tracking can be done
>>> in the device pagetable.
>>>
>>> I can expand hiod_iommufd_vfio_get_cap() to return the hwpt flags, but it feels
>>> just as hacky given that I am testing its enablement of the hardware pagetable
>>> (HWPT), and not asking a HIOD capability.
>>
>> arf. yes.
>>
>>> e.g. hiod_iommufd_vfio_get_cap would make more sense in patch 9 for the
>>> attach_device() flow[*], but not for vfio_migration_realize() flow.
>>>
>>> [*] though feels unneeded as we only have a local callsite, not external user so
>>> far.
>>>
>>> Which would technically make v5.1 patch a more correct right check, perhaps with
>>> better layering/naming.
>>
>> The quick fix (plan B if needed) would be :
>>
>> @@ -1038,8 +1038,11 @@ bool vfio_migration_realize(VFIODevice *
>>       }
>>   
>>       if ((!vbasedev->dirty_pages_supported ||
>> -         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
>> -        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
>> +         vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF)
>> +#ifdef CONFIG_IOMMUFD
>> +        && !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)
>> +#endif
>> +        ) {
>>           if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
>>               error_setg(&err,
>>                          "%s: VFIO device doesn't support device and "
>>
>> I would prefer to avoid the common component to reference IOMMUFD
>> directly. The only exception today is the use of the vbasedev->iommufd
>> pointer which is treated as opaque.
>>
>> I guess a simple approach would be to store the result of
>> iommufd_hwpt_dirty_tracking(hwpt) under a 'dirty_tracking' attribute
>> of vbasedev and return the value in vfio_device_iommu_dirty_tracking() ?
>>
>> if not, let's merge v5 (with more acks) and the fix of plan B.
>>
>>
>>>>> I can follow-up with this improvement in case this gets merged as is,
>>>>
>>>> I can't merge as is since it break compiles (I am excluding the v5.1 patch).
>>>> Which means I would prefer a v6 please.
>>>>
>>>
>>> Ah OK -- I thought this discussion assumed v5.1 to be in which does fix the
>>> compilation issue and all that remained were acks.
>>
>> v5.1 proposes a CONFIG_IOMMUFD in a header file which is error prone.
>>   
> 
> hmmm, ok, that's strage. It does look quite common in Qemu? e.g. We even have
> CONFIG_LINUX in the vfio-common.h header file.

Yes. there are some high level CONFIG options like LINUX, TCG, KVM,
etc in header files. It's different for device CONFIG options, you
first need to include CONFIG_DEVICES.

Thanks,

C.



> 
>>>>> or include
>>>>> it in the next version if you prefer to adjourn this series into 9.2 (given the
>>>>> lack of time to get everything right).
>>>>
>>>> There aren't many open questions left.
>>>>
>>>> * PATCH 5 lacks a R-b. I would feel more confortable if ZhenZhong or
>>>>     Eric acked the changes.
>>>> * PATCH 9 is slightly hacky with the use of vfio_device_get_aw_bits().
>>>>     I think it's minor. I would also feel more confortable if ZhenZhong
>>>>     acked the changes.
>>>
>>> I guess you meant patch 6 and not 9.
>>
>> yes.
>>
>> Thanks,
>>
>> C.
>>
>>
>>
>>>
>>>> * PATCH 12 needs the fix we have been talking about.
>>>> * PATCH 13 is for dev/debug.
>>>>
>>>>
>>>> What's important is to avoid introducing regressions in the current behavior,
>>>> that is when not using IOMMUFD. It looks fine on that aspect AFAICT.
>>>
>>> OK
>>>
>>
>
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 7e530c7869dc..00b9e933449e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -299,6 +299,7 @@  int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
                 VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
 int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
                           uint64_t size, ram_addr_t ram_addr, Error **errp);
+bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt);
 
 /* Returns 0 on success, or a negative errno. */
 bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 7dd5d43ce06a..a998e8578552 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -111,7 +111,7 @@  static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
     iommufd_backend_disconnect(vbasedev->iommufd);
 }
 
-static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
+bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
 {
     return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
 }
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 34d4be2ce1b1..63ffa46c9652 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -1036,16 +1036,17 @@  bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
         return !vfio_block_migration(vbasedev, err, errp);
     }
 
-    if (!vbasedev->dirty_pages_supported) {
+    if (!vbasedev->dirty_pages_supported &&
+        !iommufd_hwpt_dirty_tracking(vbasedev->hwpt)) {
         if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
             error_setg(&err,
-                       "%s: VFIO device doesn't support device dirty tracking",
-                       vbasedev->name);
+                       "%s: VFIO device doesn't support device and "
+                       "IOMMU dirty tracking", vbasedev->name);
             goto add_blocker;
         }
 
-        warn_report("%s: VFIO device doesn't support device dirty tracking",
-                    vbasedev->name);
+        warn_report("%s: VFIO device doesn't support device and "
+                    "IOMMU dirty tracking", vbasedev->name);
     }
 
     ret = vfio_block_multiple_devices_migration(vbasedev, errp);