diff mbox series

[v2] vfio/migration: Refactor and fix print of "Migration disabled"

Message ID 20230619084446.399059-1-zhenzhong.duan@intel.com (mailing list archive)
State New, archived
Headers show
Series [v2] vfio/migration: Refactor and fix print of "Migration disabled" | expand

Commit Message

Duan, Zhenzhong June 19, 2023, 8:44 a.m. UTC
This patch refactors vfio_migration_realize() and its dependend code
as follows:

1. Change vfio_block_*_migration() to be only a checker.
2. It's redundant in vfio_migration_realize() to registers multiple blockers,
   refactor it to register one migration blocker per device.
3. Remove useless vfio_unblock_*_migration() and *_migration_blocker,
   remove empty vfio_migration_finalize().
4. Change vfio_migration_realize() and dependent vfio_block_*_migration() to
   return bool type.
5. Change to print "Migration disabled" only after adding blocker succeed.

migrate_add_blocker() returns 0 when successfully adding the migration blocker.
However, the caller of vfio_migration_realize() considers that migration was
blocked when the latter returned an error. What matters for migration is that
the blocker is added in core migration, so this cleans up usability such that
user sees "Migrate disabled" when any of the vfio migration blockers are active.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
v2: Based on suggestions from Joao and Cedric, refactored vfio migration
    blocker related code.
    Use Joao's words to update description.

Tested vfio hotplug/unplug with vfio migration supported and unsupported cases.

 hw/vfio/common.c              | 66 +++--------------------------------
 hw/vfio/migration.c           | 36 +++++++++----------
 hw/vfio/pci.c                 |  6 +---
 include/hw/vfio/vfio-common.h |  8 ++---
 4 files changed, 25 insertions(+), 91 deletions(-)

Comments

Avihai Horon June 19, 2023, 11:13 a.m. UTC | #1
Hi Zhenzhong,

On 19/06/2023 11:44, Zhenzhong Duan wrote:
> External email: Use caution opening links or attachments
>
>
> This patch refactors vfio_migration_realize() and its dependend code
> as follows:
>
> 1. Change vfio_block_*_migration() to be only a checker.
> 2. It's redundant in vfio_migration_realize() to registers multiple blockers,
>     refactor it to register one migration blocker per device.
> 3. Remove useless vfio_unblock_*_migration() and *_migration_blocker,
>     remove empty vfio_migration_finalize().
> 4. Change vfio_migration_realize() and dependent vfio_block_*_migration() to
>     return bool type.
> 5. Change to print "Migration disabled" only after adding blocker succeed.
>
> migrate_add_blocker() returns 0 when successfully adding the migration blocker.
> However, the caller of vfio_migration_realize() considers that migration was
> blocked when the latter returned an error. What matters for migration is that
> the blocker is added in core migration, so this cleans up usability such that
> user sees "Migrate disabled" when any of the vfio migration blockers are active.
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> v2: Based on suggestions from Joao and Cedric, refactored vfio migration
>      blocker related code.
>      Use Joao's words to update description.
>
> Tested vfio hotplug/unplug with vfio migration supported and unsupported cases.
>
>   hw/vfio/common.c              | 66 +++--------------------------------
>   hw/vfio/migration.c           | 36 +++++++++----------
>   hw/vfio/pci.c                 |  6 +---
>   include/hw/vfio/vfio-common.h |  8 ++---
>   4 files changed, 25 insertions(+), 91 deletions(-)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index fa8fd949b1cf..606e23a66514 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -361,9 +361,6 @@ bool vfio_mig_active(void)
>       return true;
>   }
>
> -static Error *multiple_devices_migration_blocker;
> -static Error *giommu_migration_blocker;
> -
>   static unsigned int vfio_migratable_device_num(void)
>   {
>       VFIOGroup *group;
> @@ -381,37 +378,9 @@ static unsigned int vfio_migratable_device_num(void)
>       return device_num;
>   }
>
> -int vfio_block_multiple_devices_migration(Error **errp)
> -{
> -    int ret;
> -
> -    if (multiple_devices_migration_blocker ||
> -        vfio_migratable_device_num() <= 1) {
> -        return 0;
> -    }
> -
> -    error_setg(&multiple_devices_migration_blocker,
> -               "Migration is currently not supported with multiple "
> -               "VFIO devices");
> -    ret = migrate_add_blocker(multiple_devices_migration_blocker, errp);
> -    if (ret < 0) {
> -        error_free(multiple_devices_migration_blocker);
> -        multiple_devices_migration_blocker = NULL;
> -    }
> -
> -    return ret;
> -}
> -
> -void vfio_unblock_multiple_devices_migration(void)
> +bool vfio_block_multiple_devices_migration(Error **errp)
>   {
> -    if (!multiple_devices_migration_blocker ||
> -        vfio_migratable_device_num() > 1) {
> -        return;
> -    }
> -
> -    migrate_del_blocker(multiple_devices_migration_blocker);
> -    error_free(multiple_devices_migration_blocker);
> -    multiple_devices_migration_blocker = NULL;
> +    return vfio_migratable_device_num() > 1;
>   }
>
>   static bool vfio_viommu_preset(void)
> @@ -427,36 +396,9 @@ static bool vfio_viommu_preset(void)
>       return false;
>   }
>
> -int vfio_block_giommu_migration(Error **errp)
> +bool vfio_block_giommu_migration(Error **errp)
>   {
> -    int ret;
> -
> -    if (giommu_migration_blocker ||
> -        !vfio_viommu_preset()) {
> -        return 0;
> -    }
> -
> -    error_setg(&giommu_migration_blocker,
> -               "Migration is currently not supported with vIOMMU enabled");
> -    ret = migrate_add_blocker(giommu_migration_blocker, errp);
> -    if (ret < 0) {
> -        error_free(giommu_migration_blocker);
> -        giommu_migration_blocker = NULL;
> -    }
> -
> -    return ret;
> -}
> -
> -void vfio_migration_finalize(void)
> -{
> -    if (!giommu_migration_blocker ||
> -        vfio_viommu_preset()) {
> -        return;
> -    }
> -
> -    migrate_del_blocker(giommu_migration_blocker);
> -    error_free(giommu_migration_blocker);
> -    giommu_migration_blocker = NULL;
> +    return vfio_viommu_preset();
>   }
>
>   static void vfio_set_migration_error(int err)
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 6b58dddb8859..bc51aa765cb8 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -632,42 +632,41 @@ int64_t vfio_mig_bytes_transferred(void)
>       return bytes_transferred;
>   }
>
> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>   {
> -    int ret = -ENOTSUP;
> +    int ret;
>
> -    if (!vbasedev->enable_migration) {
> +    if (!vbasedev->enable_migration || vfio_migration_init(vbasedev)) {
> +        error_setg(&vbasedev->migration_blocker,
> +                   "VFIO device doesn't support migration");
>           goto add_blocker;
>       }
>
> -    ret = vfio_migration_init(vbasedev);
> -    if (ret) {
> +    if (vfio_block_multiple_devices_migration(errp)) {
> +        error_setg(&vbasedev->migration_blocker,
> +                   "Migration is currently not supported with multiple "
> +                   "VFIO devices");
>           goto add_blocker;
>       }

Here you are tying the multiple devices blocker to a specific device.
This could be problematic:
If you add vfio device #1 and then device #2 then the blocker will be 
added to device #2. If you then remove device #1, migration will still 
be blocked although it shouldn't.

I think we should keep it as a global blocker and not a per-device blocker.

Thanks.

>
> -    ret = vfio_block_multiple_devices_migration(errp);
> -    if (ret) {
> -        return ret;
> -    }
> -
> -    ret = vfio_block_giommu_migration(errp);
> -    if (ret) {
> -        return ret;
> +    if (vfio_block_giommu_migration(errp)) {
> +        error_setg(&vbasedev->migration_blocker,
> +                   "Migration is currently not supported with vIOMMU enabled");
> +        goto add_blocker;
>       }
>
>       trace_vfio_migration_probe(vbasedev->name);
> -    return 0;
> +    return true;
>
>   add_blocker:
> -    error_setg(&vbasedev->migration_blocker,
> -               "VFIO device doesn't support migration");
> -
>       ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
>       if (ret < 0) {
>           error_free(vbasedev->migration_blocker);
>           vbasedev->migration_blocker = NULL;
> +    } else {
> +        error_report("%s: Migration disabled", vbasedev->name);
>       }
> -    return ret;
> +    return false;
>   }
>
>   void vfio_migration_exit(VFIODevice *vbasedev)
> @@ -679,7 +678,6 @@ void vfio_migration_exit(VFIODevice *vbasedev)
>           qemu_del_vm_change_state_handler(migration->vm_state);
>           unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
>           vfio_migration_free(vbasedev);
> -        vfio_unblock_multiple_devices_migration();
>       }
>
>       if (vbasedev->migration_blocker) {
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 73874a94de12..12b82b3667ef 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3207,10 +3207,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>       }
>
>       if (!pdev->failover_pair_id) {
> -        ret = vfio_migration_realize(vbasedev, errp);
> -        if (ret) {
> -            error_report("%s: Migration disabled", vbasedev->name);
> -        }
> +        vfio_migration_realize(vbasedev, errp);
>       }
>
>       vfio_register_err_notifier(vdev);
> @@ -3247,7 +3244,6 @@ static void vfio_instance_finalize(Object *obj)
>        */
>       vfio_put_device(vdev);
>       vfio_put_group(group);
> -    vfio_migration_finalize();
>   }
>
>   static void vfio_exitfn(PCIDevice *pdev)
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index eed244f25f34..23ba3654acda 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -220,9 +220,8 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
>   extern VFIOGroupList vfio_group_list;
>
>   bool vfio_mig_active(void);
> -int vfio_block_multiple_devices_migration(Error **errp);
> -void vfio_unblock_multiple_devices_migration(void);
> -int vfio_block_giommu_migration(Error **errp);
> +bool vfio_block_multiple_devices_migration(Error **errp);
> +bool vfio_block_giommu_migration(Error **errp);
>   int64_t vfio_mig_bytes_transferred(void);
>
>   #ifdef CONFIG_LINUX
> @@ -246,8 +245,7 @@ int vfio_spapr_create_window(VFIOContainer *container,
>   int vfio_spapr_remove_window(VFIOContainer *container,
>                                hwaddr offset_within_address_space);
>
> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>   void vfio_migration_exit(VFIODevice *vbasedev);
> -void vfio_migration_finalize(void);
>
>   #endif /* HW_VFIO_VFIO_COMMON_H */
> --
> 2.34.1
>
>
Duan, Zhenzhong June 20, 2023, 3:04 a.m. UTC | #2
>-----Original Message-----
>From: Avihai Horon <avihaih@nvidia.com>
>Sent: Monday, June 19, 2023 7:14 PM
...
>> a/hw/vfio/migration.c b/hw/vfio/migration.c index
>> 6b58dddb8859..bc51aa765cb8 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -632,42 +632,41 @@ int64_t vfio_mig_bytes_transferred(void)
>>       return bytes_transferred;
>>   }
>>
>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>   {
>> -    int ret = -ENOTSUP;
>> +    int ret;
>>
>> -    if (!vbasedev->enable_migration) {
>> +    if (!vbasedev->enable_migration || vfio_migration_init(vbasedev)) {
>> +        error_setg(&vbasedev->migration_blocker,
>> +                   "VFIO device doesn't support migration");
>>           goto add_blocker;
>>       }
>>
>> -    ret = vfio_migration_init(vbasedev);
>> -    if (ret) {
>> +    if (vfio_block_multiple_devices_migration(errp)) {
>> +        error_setg(&vbasedev->migration_blocker,
>> +                   "Migration is currently not supported with multiple "
>> +                   "VFIO devices");
>>           goto add_blocker;
>>       }
>
>Here you are tying the multiple devices blocker to a specific device.
>This could be problematic:
>If you add vfio device #1 and then device #2 then the blocker will be added to
>device #2. If you then remove device #1, migration will still be blocked
>although it shouldn't.
>
>I think we should keep it as a global blocker and not a per-device blocker.

Thanks for point out, you are right, seems I need to restore the multiple devices part code.

Regards
Zhenzhong
Joao Martins June 20, 2023, 8:23 a.m. UTC | #3
On 20/06/2023 04:04, Duan, Zhenzhong wrote:
>> -----Original Message-----
>> From: Avihai Horon <avihaih@nvidia.com>
>> Sent: Monday, June 19, 2023 7:14 PM
> ...
>>> a/hw/vfio/migration.c b/hw/vfio/migration.c index
>>> 6b58dddb8859..bc51aa765cb8 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -632,42 +632,41 @@ int64_t vfio_mig_bytes_transferred(void)
>>>       return bytes_transferred;
>>>   }
>>>
>>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>>   {
>>> -    int ret = -ENOTSUP;
>>> +    int ret;
>>>
>>> -    if (!vbasedev->enable_migration) {
>>> +    if (!vbasedev->enable_migration || vfio_migration_init(vbasedev)) {
>>> +        error_setg(&vbasedev->migration_blocker,
>>> +                   "VFIO device doesn't support migration");
>>>           goto add_blocker;
>>>       }
>>>
>>> -    ret = vfio_migration_init(vbasedev);
>>> -    if (ret) {
>>> +    if (vfio_block_multiple_devices_migration(errp)) {
>>> +        error_setg(&vbasedev->migration_blocker,
>>> +                   "Migration is currently not supported with multiple "
>>> +                   "VFIO devices");
>>>           goto add_blocker;
>>>       }
>>
>> Here you are tying the multiple devices blocker to a specific device.
>> This could be problematic:
>> If you add vfio device #1 and then device #2 then the blocker will be added to
>> device #2. If you then remove device #1, migration will still be blocked
>> although it shouldn't.
>>
>> I think we should keep it as a global blocker and not a per-device blocker.
> 
> Thanks for point out, you are right, seems I need to restore the multiple devices part code.

It's the same for vIOMMU migration blocker. You could have a machine with
default_bus_bypass_iommu=on and add device #1 with bypass_iommu=off attribute in
pxb PCI port, and then add device #2 with bypass_iommu=on. The blocker is added
because of device #1 but then it will remain blocked if you remove it.
Duan, Zhenzhong June 20, 2023, 8:55 a.m. UTC | #4
>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Sent: Tuesday, June 20, 2023 4:23 PM
>To: Duan, Zhenzhong <zhenzhong.duan@intel.com>; Avihai Horon
><avihaih@nvidia.com>; qemu-devel@nongnu.org
>Cc: alex.williamson@redhat.com; clg@redhat.com; Peng, Chao P
><chao.p.peng@intel.com>
>Subject: Re: [PATCH v2] vfio/migration: Refactor and fix print of "Migration
>disabled"
>
>On 20/06/2023 04:04, Duan, Zhenzhong wrote:
>>> -----Original Message-----
>>> From: Avihai Horon <avihaih@nvidia.com>
>>> Sent: Monday, June 19, 2023 7:14 PM
>> ...
>>>> a/hw/vfio/migration.c b/hw/vfio/migration.c index
>>>> 6b58dddb8859..bc51aa765cb8 100644
>>>> --- a/hw/vfio/migration.c
>>>> +++ b/hw/vfio/migration.c
>>>> @@ -632,42 +632,41 @@ int64_t vfio_mig_bytes_transferred(void)
>>>>       return bytes_transferred;
>>>>   }
>>>>
>>>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>>>   {
>>>> -    int ret = -ENOTSUP;
>>>> +    int ret;
>>>>
>>>> -    if (!vbasedev->enable_migration) {
>>>> +    if (!vbasedev->enable_migration || vfio_migration_init(vbasedev)) {
>>>> +        error_setg(&vbasedev->migration_blocker,
>>>> +                   "VFIO device doesn't support migration");
>>>>           goto add_blocker;
>>>>       }
>>>>
>>>> -    ret = vfio_migration_init(vbasedev);
>>>> -    if (ret) {
>>>> +    if (vfio_block_multiple_devices_migration(errp)) {
>>>> +        error_setg(&vbasedev->migration_blocker,
>>>> +                   "Migration is currently not supported with multiple "
>>>> +                   "VFIO devices");
>>>>           goto add_blocker;
>>>>       }
>>>
>>> Here you are tying the multiple devices blocker to a specific device.
>>> This could be problematic:
>>> If you add vfio device #1 and then device #2 then the blocker will be
>>> added to device #2. If you then remove device #1, migration will
>>> still be blocked although it shouldn't.
>>>
>>> I think we should keep it as a global blocker and not a per-device blocker.
>>
>> Thanks for point out, you are right, seems I need to restore the multiple
>devices part code.
>
>It's the same for vIOMMU migration blocker. You could have a machine with
>default_bus_bypass_iommu=on and add device #1 with bypass_iommu=off
>attribute in pxb PCI port, and then add device #2 with bypass_iommu=on. The
>blocker is added because of device #1 but then it will remain blocked if you
>remove it.

Right, thanks for point out, I'm thinking about changing vfio_viommu_preset()
to check corresponding device's address space rather than all vfio devices'.

Let me know if you prefer to restore vIOMMU blocker as global too, then I'll not
try with my idea furtherly.

Regards
Zhenzhong
Joao Martins June 20, 2023, 9:28 a.m. UTC | #5
On 20/06/2023 09:55, Duan, Zhenzhong wrote:
>> -----Original Message-----
>> From: Joao Martins <joao.m.martins@oracle.com>
>> Sent: Tuesday, June 20, 2023 4:23 PM
>> To: Duan, Zhenzhong <zhenzhong.duan@intel.com>; Avihai Horon
>> <avihaih@nvidia.com>; qemu-devel@nongnu.org
>> Cc: alex.williamson@redhat.com; clg@redhat.com; Peng, Chao P
>> <chao.p.peng@intel.com>
>> Subject: Re: [PATCH v2] vfio/migration: Refactor and fix print of "Migration
>> disabled"
>>
>> On 20/06/2023 04:04, Duan, Zhenzhong wrote:
>>>> -----Original Message-----
>>>> From: Avihai Horon <avihaih@nvidia.com>
>>>> Sent: Monday, June 19, 2023 7:14 PM
>>> ...
>>>>> a/hw/vfio/migration.c b/hw/vfio/migration.c index
>>>>> 6b58dddb8859..bc51aa765cb8 100644
>>>>> --- a/hw/vfio/migration.c
>>>>> +++ b/hw/vfio/migration.c
>>>>> @@ -632,42 +632,41 @@ int64_t vfio_mig_bytes_transferred(void)
>>>>>       return bytes_transferred;
>>>>>   }
>>>>>
>>>>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>>>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>>>>   {
>>>>> -    int ret = -ENOTSUP;
>>>>> +    int ret;
>>>>>
>>>>> -    if (!vbasedev->enable_migration) {
>>>>> +    if (!vbasedev->enable_migration || vfio_migration_init(vbasedev)) {
>>>>> +        error_setg(&vbasedev->migration_blocker,
>>>>> +                   "VFIO device doesn't support migration");
>>>>>           goto add_blocker;
>>>>>       }
>>>>>
>>>>> -    ret = vfio_migration_init(vbasedev);
>>>>> -    if (ret) {
>>>>> +    if (vfio_block_multiple_devices_migration(errp)) {
>>>>> +        error_setg(&vbasedev->migration_blocker,
>>>>> +                   "Migration is currently not supported with multiple "
>>>>> +                   "VFIO devices");
>>>>>           goto add_blocker;
>>>>>       }
>>>>
>>>> Here you are tying the multiple devices blocker to a specific device.
>>>> This could be problematic:
>>>> If you add vfio device #1 and then device #2 then the blocker will be
>>>> added to device #2. If you then remove device #1, migration will
>>>> still be blocked although it shouldn't.
>>>>
>>>> I think we should keep it as a global blocker and not a per-device blocker.
>>>
>>> Thanks for point out, you are right, seems I need to restore the multiple
>> devices part code.
>>
>> It's the same for vIOMMU migration blocker. You could have a machine with
>> default_bus_bypass_iommu=on and add device #1 with bypass_iommu=off
>> attribute in pxb PCI port, and then add device #2 with bypass_iommu=on. The
>> blocker is added because of device #1 but then it will remain blocked if you
>> remove it.
> 
> Right, thanks for point out, I'm thinking about changing vfio_viommu_preset()
> to check corresponding device's address space rather than all vfio devices'.
> 
> Let me know if you prefer to restore vIOMMU blocker as global too, then I'll not
> try with my idea furtherly.

The vIOMMU migration blocker doesn't need to be global, true, as it doesn't care
about others address space -- if each device has a blocker as long as the one
device blocker is removed it should become make VM migratable again (but atm we
will be blocked by the multi device blocker anyway). This should consolidate
things into a single migration blocker and avoid the special path. I am not
enterily sure if the refactor will give *that* much gain but that's probably
because I haven't seen the final result.

IIUC the problem with this patch is that you remove what unblocks the migration,
and I guess that need to stay there for the global case.
Duan, Zhenzhong June 21, 2023, 1:16 a.m. UTC | #6
>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Sent: Tuesday, June 20, 2023 5:28 PM
>To: Duan, Zhenzhong <zhenzhong.duan@intel.com>; Avihai Horon
><avihaih@nvidia.com>; qemu-devel@nongnu.org
>Cc: alex.williamson@redhat.com; clg@redhat.com; Peng, Chao P
><chao.p.peng@intel.com>
>Subject: Re: [PATCH v2] vfio/migration: Refactor and fix print of "Migration
>disabled"
>
>On 20/06/2023 09:55, Duan, Zhenzhong wrote:
>>> -----Original Message-----
>>> From: Joao Martins <joao.m.martins@oracle.com>
>>> Sent: Tuesday, June 20, 2023 4:23 PM
>>> To: Duan, Zhenzhong <zhenzhong.duan@intel.com>; Avihai Horon
>>> <avihaih@nvidia.com>; qemu-devel@nongnu.org
>>> Cc: alex.williamson@redhat.com; clg@redhat.com; Peng, Chao P
>>> <chao.p.peng@intel.com>
>>> Subject: Re: [PATCH v2] vfio/migration: Refactor and fix print of
>>> "Migration disabled"
>>>
>>> On 20/06/2023 04:04, Duan, Zhenzhong wrote:
>>>>> -----Original Message-----
>>>>> From: Avihai Horon <avihaih@nvidia.com>
>>>>> Sent: Monday, June 19, 2023 7:14 PM
>>>> ...
>>>>>> a/hw/vfio/migration.c b/hw/vfio/migration.c index
>>>>>> 6b58dddb8859..bc51aa765cb8 100644
>>>>>> --- a/hw/vfio/migration.c
>>>>>> +++ b/hw/vfio/migration.c
>>>>>> @@ -632,42 +632,41 @@ int64_t vfio_mig_bytes_transferred(void)
>>>>>>       return bytes_transferred;
>>>>>>   }
>>>>>>
>>>>>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>>>>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>>>>>   {
>>>>>> -    int ret = -ENOTSUP;
>>>>>> +    int ret;
>>>>>>
>>>>>> -    if (!vbasedev->enable_migration) {
>>>>>> +    if (!vbasedev->enable_migration || vfio_migration_init(vbasedev)) {
>>>>>> +        error_setg(&vbasedev->migration_blocker,
>>>>>> +                   "VFIO device doesn't support migration");
>>>>>>           goto add_blocker;
>>>>>>       }
>>>>>>
>>>>>> -    ret = vfio_migration_init(vbasedev);
>>>>>> -    if (ret) {
>>>>>> +    if (vfio_block_multiple_devices_migration(errp)) {
>>>>>> +        error_setg(&vbasedev->migration_blocker,
>>>>>> +                   "Migration is currently not supported with multiple "
>>>>>> +                   "VFIO devices");
>>>>>>           goto add_blocker;
>>>>>>       }
>>>>>
>>>>> Here you are tying the multiple devices blocker to a specific device.
>>>>> This could be problematic:
>>>>> If you add vfio device #1 and then device #2 then the blocker will
>>>>> be added to device #2. If you then remove device #1, migration will
>>>>> still be blocked although it shouldn't.
>>>>>
>>>>> I think we should keep it as a global blocker and not a per-device blocker.
>>>>
>>>> Thanks for point out, you are right, seems I need to restore the
>>>> multiple
>>> devices part code.
>>>
>>> It's the same for vIOMMU migration blocker. You could have a machine
>>> with default_bus_bypass_iommu=on and add device #1 with
>>> bypass_iommu=off attribute in pxb PCI port, and then add device #2
>>> with bypass_iommu=on. The blocker is added because of device #1 but
>>> then it will remain blocked if you remove it.
>>
>> Right, thanks for point out, I'm thinking about changing
>> vfio_viommu_preset() to check corresponding device's address space rather
>than all vfio devices'.
>>
>> Let me know if you prefer to restore vIOMMU blocker as global too,
>> then I'll not try with my idea furtherly.
>
>The vIOMMU migration blocker doesn't need to be global, true, as it doesn't
>care about others address space -- if each device has a blocker as long as the
>one device blocker is removed it should become make VM migratable again
>(but atm we will be blocked by the multi device blocker anyway). This should
>consolidate things into a single migration blocker and avoid the special path. I
>am not enterily sure if the refactor will give *that* much gain but that's
>probably because I haven't seen the final result.

OK, let me write one for discuss, having per device vIOMMU blocker, global multiple devices blocker, etc.
>
>IIUC the problem with this patch is that you remove what unblocks the
>migration, and I guess that need to stay there for the global case.
Yes.

Thanks
Zhenzhong
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index fa8fd949b1cf..606e23a66514 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -361,9 +361,6 @@  bool vfio_mig_active(void)
     return true;
 }
 
-static Error *multiple_devices_migration_blocker;
-static Error *giommu_migration_blocker;
-
 static unsigned int vfio_migratable_device_num(void)
 {
     VFIOGroup *group;
@@ -381,37 +378,9 @@  static unsigned int vfio_migratable_device_num(void)
     return device_num;
 }
 
-int vfio_block_multiple_devices_migration(Error **errp)
-{
-    int ret;
-
-    if (multiple_devices_migration_blocker ||
-        vfio_migratable_device_num() <= 1) {
-        return 0;
-    }
-
-    error_setg(&multiple_devices_migration_blocker,
-               "Migration is currently not supported with multiple "
-               "VFIO devices");
-    ret = migrate_add_blocker(multiple_devices_migration_blocker, errp);
-    if (ret < 0) {
-        error_free(multiple_devices_migration_blocker);
-        multiple_devices_migration_blocker = NULL;
-    }
-
-    return ret;
-}
-
-void vfio_unblock_multiple_devices_migration(void)
+bool vfio_block_multiple_devices_migration(Error **errp)
 {
-    if (!multiple_devices_migration_blocker ||
-        vfio_migratable_device_num() > 1) {
-        return;
-    }
-
-    migrate_del_blocker(multiple_devices_migration_blocker);
-    error_free(multiple_devices_migration_blocker);
-    multiple_devices_migration_blocker = NULL;
+    return vfio_migratable_device_num() > 1;
 }
 
 static bool vfio_viommu_preset(void)
@@ -427,36 +396,9 @@  static bool vfio_viommu_preset(void)
     return false;
 }
 
-int vfio_block_giommu_migration(Error **errp)
+bool vfio_block_giommu_migration(Error **errp)
 {
-    int ret;
-
-    if (giommu_migration_blocker ||
-        !vfio_viommu_preset()) {
-        return 0;
-    }
-
-    error_setg(&giommu_migration_blocker,
-               "Migration is currently not supported with vIOMMU enabled");
-    ret = migrate_add_blocker(giommu_migration_blocker, errp);
-    if (ret < 0) {
-        error_free(giommu_migration_blocker);
-        giommu_migration_blocker = NULL;
-    }
-
-    return ret;
-}
-
-void vfio_migration_finalize(void)
-{
-    if (!giommu_migration_blocker ||
-        vfio_viommu_preset()) {
-        return;
-    }
-
-    migrate_del_blocker(giommu_migration_blocker);
-    error_free(giommu_migration_blocker);
-    giommu_migration_blocker = NULL;
+    return vfio_viommu_preset();
 }
 
 static void vfio_set_migration_error(int err)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 6b58dddb8859..bc51aa765cb8 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -632,42 +632,41 @@  int64_t vfio_mig_bytes_transferred(void)
     return bytes_transferred;
 }
 
-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
 {
-    int ret = -ENOTSUP;
+    int ret;
 
-    if (!vbasedev->enable_migration) {
+    if (!vbasedev->enable_migration || vfio_migration_init(vbasedev)) {
+        error_setg(&vbasedev->migration_blocker,
+                   "VFIO device doesn't support migration");
         goto add_blocker;
     }
 
-    ret = vfio_migration_init(vbasedev);
-    if (ret) {
+    if (vfio_block_multiple_devices_migration(errp)) {
+        error_setg(&vbasedev->migration_blocker,
+                   "Migration is currently not supported with multiple "
+                   "VFIO devices");
         goto add_blocker;
     }
 
-    ret = vfio_block_multiple_devices_migration(errp);
-    if (ret) {
-        return ret;
-    }
-
-    ret = vfio_block_giommu_migration(errp);
-    if (ret) {
-        return ret;
+    if (vfio_block_giommu_migration(errp)) {
+        error_setg(&vbasedev->migration_blocker,
+                   "Migration is currently not supported with vIOMMU enabled");
+        goto add_blocker;
     }
 
     trace_vfio_migration_probe(vbasedev->name);
-    return 0;
+    return true;
 
 add_blocker:
-    error_setg(&vbasedev->migration_blocker,
-               "VFIO device doesn't support migration");
-
     ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
     if (ret < 0) {
         error_free(vbasedev->migration_blocker);
         vbasedev->migration_blocker = NULL;
+    } else {
+        error_report("%s: Migration disabled", vbasedev->name);
     }
-    return ret;
+    return false;
 }
 
 void vfio_migration_exit(VFIODevice *vbasedev)
@@ -679,7 +678,6 @@  void vfio_migration_exit(VFIODevice *vbasedev)
         qemu_del_vm_change_state_handler(migration->vm_state);
         unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
         vfio_migration_free(vbasedev);
-        vfio_unblock_multiple_devices_migration();
     }
 
     if (vbasedev->migration_blocker) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 73874a94de12..12b82b3667ef 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3207,10 +3207,7 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     }
 
     if (!pdev->failover_pair_id) {
-        ret = vfio_migration_realize(vbasedev, errp);
-        if (ret) {
-            error_report("%s: Migration disabled", vbasedev->name);
-        }
+        vfio_migration_realize(vbasedev, errp);
     }
 
     vfio_register_err_notifier(vdev);
@@ -3247,7 +3244,6 @@  static void vfio_instance_finalize(Object *obj)
      */
     vfio_put_device(vdev);
     vfio_put_group(group);
-    vfio_migration_finalize();
 }
 
 static void vfio_exitfn(PCIDevice *pdev)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index eed244f25f34..23ba3654acda 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -220,9 +220,8 @@  typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
 extern VFIOGroupList vfio_group_list;
 
 bool vfio_mig_active(void);
-int vfio_block_multiple_devices_migration(Error **errp);
-void vfio_unblock_multiple_devices_migration(void);
-int vfio_block_giommu_migration(Error **errp);
+bool vfio_block_multiple_devices_migration(Error **errp);
+bool vfio_block_giommu_migration(Error **errp);
 int64_t vfio_mig_bytes_transferred(void);
 
 #ifdef CONFIG_LINUX
@@ -246,8 +245,7 @@  int vfio_spapr_create_window(VFIOContainer *container,
 int vfio_spapr_remove_window(VFIOContainer *container,
                              hwaddr offset_within_address_space);
 
-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
 void vfio_migration_exit(VFIODevice *vbasedev);
-void vfio_migration_finalize(void);
 
 #endif /* HW_VFIO_VFIO_COMMON_H */