diff mbox series

[2/3] vfio/migration: Emit VFIO device migration state change QAPI event

Message ID 20240430051621.19597-3-avihaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series qapi/vfio: Add VFIO device migration state change QAPI event | expand

Commit Message

Avihai Horon April 30, 2024, 5:16 a.m. UTC
Emit VFIO device migration state change QAPI event when a VFIO device
changes its migration state. This can be used by management applications
to get updates on the current state of the VFIO device for their own
purposes.

A new per VFIO device capability, "migration-events", is added so events
can be enabled only for the required devices. It is disabled by default.

Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
 include/hw/vfio/vfio-common.h |  1 +
 hw/vfio/migration.c           | 44 +++++++++++++++++++++++++++++++++++
 hw/vfio/pci.c                 |  2 ++
 3 files changed, 47 insertions(+)

Comments

Joao Martins May 1, 2024, 11:50 a.m. UTC | #1
On 30/04/2024 06:16, Avihai Horon wrote:
> Emit VFIO device migration state change QAPI event when a VFIO device
> changes its migration state. This can be used by management applications
> to get updates on the current state of the VFIO device for their own
> purposes.
> 
> A new per VFIO device capability, "migration-events", is added so events
> can be enabled only for the required devices. It is disabled by default.
> 
> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
> ---
>  include/hw/vfio/vfio-common.h |  1 +
>  hw/vfio/migration.c           | 44 +++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.c                 |  2 ++
>  3 files changed, 47 insertions(+)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index b9da6c08ef..3ec5f2425e 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -115,6 +115,7 @@ typedef struct VFIODevice {
>      bool no_mmap;
>      bool ram_block_discard_allowed;
>      OnOffAuto enable_migration;
> +    bool migration_events;
>      VFIODeviceOps *ops;
>      unsigned int num_irqs;
>      unsigned int num_regions;
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 06ae40969b..6bbccf6545 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -24,6 +24,7 @@
>  #include "migration/register.h"
>  #include "migration/blocker.h"
>  #include "qapi/error.h"
> +#include "qapi/qapi-events-vfio.h"
>  #include "exec/ramlist.h"
>  #include "exec/ram_addr.h"
>  #include "pci.h"
> @@ -80,6 +81,46 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
>      }
>  }
>  
> +static VFIODeviceMigState
> +mig_state_to_qapi_state(enum vfio_device_mig_state state)
> +{
> +    switch (state) {
> +    case VFIO_DEVICE_STATE_STOP:
> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP;
> +    case VFIO_DEVICE_STATE_RUNNING:
> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING;
> +    case VFIO_DEVICE_STATE_STOP_COPY:
> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP_COPY;
> +    case VFIO_DEVICE_STATE_RESUMING:
> +        return QAPI_VFIO_DEVICE_MIG_STATE_RESUMING;
> +    case VFIO_DEVICE_STATE_RUNNING_P2P:
> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING_P2P;
> +    case VFIO_DEVICE_STATE_PRE_COPY:
> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY;
> +    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY_P2P;
> +    default:
> +        g_assert_not_reached();
> +    }
> +}
> +
> +static void vfio_migration_send_state_change_event(VFIODevice *vbasedev)
> +{
> +    VFIOMigration *migration = vbasedev->migration;
> +    const char *id;
> +    Object *obj;
> +
> +    if (!vbasedev->migration_events) {
> +        return;
> +    }
> +

Shouldn't this leap frog migrate_events() capability instead of introducing its
vfio equivalent i.e.

	if (!migrate_events()) {
	    return;
	}

?

Applications that don't understand the event string (migration related or not)
will just discard it (AIUI)

> +    obj = vbasedev->ops->vfio_get_object(vbasedev);
> +    id = object_get_canonical_path_component(obj);
> +
> +    qapi_event_send_vfio_device_mig_state_changed(
> +        id, mig_state_to_qapi_state(migration->device_state));
> +}
> +
>  static int vfio_migration_set_state(VFIODevice *vbasedev,
>                                      enum vfio_device_mig_state new_state,
>                                      enum vfio_device_mig_state recover_state)
> @@ -126,11 +167,13 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>          }
>  
>          migration->device_state = recover_state;
> +        vfio_migration_send_state_change_event(vbasedev);
>  
>          return ret;
>      }
>  
>      migration->device_state = new_state;
> +    vfio_migration_send_state_change_event(vbasedev);
>      if (mig_state->data_fd != -1) {
>          if (migration->data_fd != -1) {
>              /*
> @@ -157,6 +200,7 @@ reset_device:
>      }
>  
>      migration->device_state = VFIO_DEVICE_STATE_RUNNING;
> +    vfio_migration_send_state_change_event(vbasedev);
>  
>      return ret;
>  }
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 64780d1b79..8840602c50 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3362,6 +3362,8 @@ static Property vfio_pci_dev_properties[] = {
>                      VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
>      DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
>                              vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
> +    DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
> +                     vbasedev.migration_events, false),
>      DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
>      DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
>                       vbasedev.ram_block_discard_allowed, false),
Avihai Horon May 1, 2024, 12:28 p.m. UTC | #2
On 01/05/2024 14:50, Joao Martins wrote:
> External email: Use caution opening links or attachments
>
>
> On 30/04/2024 06:16, Avihai Horon wrote:
>> Emit VFIO device migration state change QAPI event when a VFIO device
>> changes its migration state. This can be used by management applications
>> to get updates on the current state of the VFIO device for their own
>> purposes.
>>
>> A new per VFIO device capability, "migration-events", is added so events
>> can be enabled only for the required devices. It is disabled by default.
>>
>> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
>> ---
>>   include/hw/vfio/vfio-common.h |  1 +
>>   hw/vfio/migration.c           | 44 +++++++++++++++++++++++++++++++++++
>>   hw/vfio/pci.c                 |  2 ++
>>   3 files changed, 47 insertions(+)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index b9da6c08ef..3ec5f2425e 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -115,6 +115,7 @@ typedef struct VFIODevice {
>>       bool no_mmap;
>>       bool ram_block_discard_allowed;
>>       OnOffAuto enable_migration;
>> +    bool migration_events;
>>       VFIODeviceOps *ops;
>>       unsigned int num_irqs;
>>       unsigned int num_regions;
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 06ae40969b..6bbccf6545 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -24,6 +24,7 @@
>>   #include "migration/register.h"
>>   #include "migration/blocker.h"
>>   #include "qapi/error.h"
>> +#include "qapi/qapi-events-vfio.h"
>>   #include "exec/ramlist.h"
>>   #include "exec/ram_addr.h"
>>   #include "pci.h"
>> @@ -80,6 +81,46 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
>>       }
>>   }
>>
>> +static VFIODeviceMigState
>> +mig_state_to_qapi_state(enum vfio_device_mig_state state)
>> +{
>> +    switch (state) {
>> +    case VFIO_DEVICE_STATE_STOP:
>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP;
>> +    case VFIO_DEVICE_STATE_RUNNING:
>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING;
>> +    case VFIO_DEVICE_STATE_STOP_COPY:
>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP_COPY;
>> +    case VFIO_DEVICE_STATE_RESUMING:
>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RESUMING;
>> +    case VFIO_DEVICE_STATE_RUNNING_P2P:
>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING_P2P;
>> +    case VFIO_DEVICE_STATE_PRE_COPY:
>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY;
>> +    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY_P2P;
>> +    default:
>> +        g_assert_not_reached();
>> +    }
>> +}
>> +
>> +static void vfio_migration_send_state_change_event(VFIODevice *vbasedev)
>> +{
>> +    VFIOMigration *migration = vbasedev->migration;
>> +    const char *id;
>> +    Object *obj;
>> +
>> +    if (!vbasedev->migration_events) {
>> +        return;
>> +    }
>> +
> Shouldn't this leap frog migrate_events() capability instead of introducing its
> vfio equivalent i.e.
>
>          if (!migrate_events()) {
>              return;
>          }
>
> ?

I used a per VFIO device cap so the events can be fine tuned for each 
device (maybe one device should send events while the other not).
This gives the most flexibility and I don't think it complicates the 
configuration (one downside, though, is that it can't be 
enabled/disabled dynamically during runtime).

I don't think events add much overhead, so if you prefer a global cap, I 
can change it.
However, I'm not sure overloading the existing migrate_events() is valid?

>
> Applications that don't understand the event string (migration related or not)
> will just discard it (AIUI)
>
>> +    obj = vbasedev->ops->vfio_get_object(vbasedev);
>> +    id = object_get_canonical_path_component(obj);
>> +
>> +    qapi_event_send_vfio_device_mig_state_changed(
>> +        id, mig_state_to_qapi_state(migration->device_state));
>> +}
>> +
>>   static int vfio_migration_set_state(VFIODevice *vbasedev,
>>                                       enum vfio_device_mig_state new_state,
>>                                       enum vfio_device_mig_state recover_state)
>> @@ -126,11 +167,13 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>>           }
>>
>>           migration->device_state = recover_state;
>> +        vfio_migration_send_state_change_event(vbasedev);
>>
>>           return ret;
>>       }
>>
>>       migration->device_state = new_state;
>> +    vfio_migration_send_state_change_event(vbasedev);
>>       if (mig_state->data_fd != -1) {
>>           if (migration->data_fd != -1) {
>>               /*
>> @@ -157,6 +200,7 @@ reset_device:
>>       }
>>
>>       migration->device_state = VFIO_DEVICE_STATE_RUNNING;
>> +    vfio_migration_send_state_change_event(vbasedev);
>>
>>       return ret;
>>   }
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 64780d1b79..8840602c50 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -3362,6 +3362,8 @@ static Property vfio_pci_dev_properties[] = {
>>                       VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
>>       DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
>>                               vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
>> +    DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
>> +                     vbasedev.migration_events, false),
>>       DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
>>       DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
>>                        vbasedev.ram_block_discard_allowed, false),
Joao Martins May 2, 2024, 10:22 a.m. UTC | #3
On 01/05/2024 13:28, Avihai Horon wrote:
> 
> On 01/05/2024 14:50, Joao Martins wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 30/04/2024 06:16, Avihai Horon wrote:
>>> Emit VFIO device migration state change QAPI event when a VFIO device
>>> changes its migration state. This can be used by management applications
>>> to get updates on the current state of the VFIO device for their own
>>> purposes.
>>>
>>> A new per VFIO device capability, "migration-events", is added so events
>>> can be enabled only for the required devices. It is disabled by default.
>>>
>>> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
>>> ---
>>>   include/hw/vfio/vfio-common.h |  1 +
>>>   hw/vfio/migration.c           | 44 +++++++++++++++++++++++++++++++++++
>>>   hw/vfio/pci.c                 |  2 ++
>>>   3 files changed, 47 insertions(+)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index b9da6c08ef..3ec5f2425e 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -115,6 +115,7 @@ typedef struct VFIODevice {
>>>       bool no_mmap;
>>>       bool ram_block_discard_allowed;
>>>       OnOffAuto enable_migration;
>>> +    bool migration_events;
>>>       VFIODeviceOps *ops;
>>>       unsigned int num_irqs;
>>>       unsigned int num_regions;
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index 06ae40969b..6bbccf6545 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -24,6 +24,7 @@
>>>   #include "migration/register.h"
>>>   #include "migration/blocker.h"
>>>   #include "qapi/error.h"
>>> +#include "qapi/qapi-events-vfio.h"
>>>   #include "exec/ramlist.h"
>>>   #include "exec/ram_addr.h"
>>>   #include "pci.h"
>>> @@ -80,6 +81,46 @@ static const char *mig_state_to_str(enum
>>> vfio_device_mig_state state)
>>>       }
>>>   }
>>>
>>> +static VFIODeviceMigState
>>> +mig_state_to_qapi_state(enum vfio_device_mig_state state)
>>> +{
>>> +    switch (state) {
>>> +    case VFIO_DEVICE_STATE_STOP:
>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP;
>>> +    case VFIO_DEVICE_STATE_RUNNING:
>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING;
>>> +    case VFIO_DEVICE_STATE_STOP_COPY:
>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP_COPY;
>>> +    case VFIO_DEVICE_STATE_RESUMING:
>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RESUMING;
>>> +    case VFIO_DEVICE_STATE_RUNNING_P2P:
>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING_P2P;
>>> +    case VFIO_DEVICE_STATE_PRE_COPY:
>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY;
>>> +    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY_P2P;
>>> +    default:
>>> +        g_assert_not_reached();
>>> +    }
>>> +}
>>> +
>>> +static void vfio_migration_send_state_change_event(VFIODevice *vbasedev)
>>> +{
>>> +    VFIOMigration *migration = vbasedev->migration;
>>> +    const char *id;
>>> +    Object *obj;
>>> +
>>> +    if (!vbasedev->migration_events) {
>>> +        return;
>>> +    }
>>> +
>> Shouldn't this leap frog migrate_events() capability instead of introducing its
>> vfio equivalent i.e.
>>
>>          if (!migrate_events()) {
>>              return;
>>          }
>>
>> ?
> 
> I used a per VFIO device cap so the events can be fine tuned for each device
> (maybe one device should send events while the other not).
> This gives the most flexibility and I don't think it complicates the
> configuration (one downside, though, is that it can't be enabled/disabled
> dynamically during runtime).
> 
Right.

> I don't think events add much overhead, so if you prefer a global cap, I can
> change it.
> However, I'm not sure overloading the existing migrate_events() is valid?
> 

migration 'events' capability just means we will have some migration events
emited via QAPI monitor for: 1) general global status and 2) for each migration
pass (both with different event names=. So the suggestion was just what feels a
natural extension of that (...)

>>
>> Applications that don't understand the event string (migration related or not)
>> will just discard it (AIUI)
>>

(...) specially because of this as all these events have a different name.

But overloading might not make sense for others IDK ... it was just a suggestion
:) not a strong preference

>>> +    obj = vbasedev->ops->vfio_get_object(vbasedev);
>>> +    id = object_get_canonical_path_component(obj);
>>> +
>>> +    qapi_event_send_vfio_device_mig_state_changed(
>>> +        id, mig_state_to_qapi_state(migration->device_state));
>>> +}
>>> +
>>>   static int vfio_migration_set_state(VFIODevice *vbasedev,
>>>                                       enum vfio_device_mig_state new_state,
>>>                                       enum vfio_device_mig_state recover_state)
>>> @@ -126,11 +167,13 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>>>           }
>>>
>>>           migration->device_state = recover_state;
>>> +        vfio_migration_send_state_change_event(vbasedev);
>>>
>>>           return ret;
>>>       }
>>>
>>>       migration->device_state = new_state;
>>> +    vfio_migration_send_state_change_event(vbasedev);
>>>       if (mig_state->data_fd != -1) {
>>>           if (migration->data_fd != -1) {
>>>               /*
>>> @@ -157,6 +200,7 @@ reset_device:
>>>       }
>>>
>>>       migration->device_state = VFIO_DEVICE_STATE_RUNNING;
>>> +    vfio_migration_send_state_change_event(vbasedev);
>>>
>>>       return ret;
>>>   }
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index 64780d1b79..8840602c50 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -3362,6 +3362,8 @@ static Property vfio_pci_dev_properties[] = {
>>>                       VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
>>>       DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
>>>                               vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
>>> +    DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
>>> +                     vbasedev.migration_events, false),
>>>       DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
>>>       DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
>>>                        vbasedev.ram_block_discard_allowed, false),
Avihai Horon May 5, 2024, 7:28 a.m. UTC | #4
On 02/05/2024 13:22, Joao Martins wrote:
> External email: Use caution opening links or attachments
>
>
> On 01/05/2024 13:28, Avihai Horon wrote:
>> On 01/05/2024 14:50, Joao Martins wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> On 30/04/2024 06:16, Avihai Horon wrote:
>>>> Emit VFIO device migration state change QAPI event when a VFIO device
>>>> changes its migration state. This can be used by management applications
>>>> to get updates on the current state of the VFIO device for their own
>>>> purposes.
>>>>
>>>> A new per VFIO device capability, "migration-events", is added so events
>>>> can be enabled only for the required devices. It is disabled by default.
>>>>
>>>> Signed-off-by: Avihai Horon<avihaih@nvidia.com>
>>>> ---
>>>>    include/hw/vfio/vfio-common.h |  1 +
>>>>    hw/vfio/migration.c           | 44 +++++++++++++++++++++++++++++++++++
>>>>    hw/vfio/pci.c                 |  2 ++
>>>>    3 files changed, 47 insertions(+)
>>>>
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>> index b9da6c08ef..3ec5f2425e 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -115,6 +115,7 @@ typedef struct VFIODevice {
>>>>        bool no_mmap;
>>>>        bool ram_block_discard_allowed;
>>>>        OnOffAuto enable_migration;
>>>> +    bool migration_events;
>>>>        VFIODeviceOps *ops;
>>>>        unsigned int num_irqs;
>>>>        unsigned int num_regions;
>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>> index 06ae40969b..6bbccf6545 100644
>>>> --- a/hw/vfio/migration.c
>>>> +++ b/hw/vfio/migration.c
>>>> @@ -24,6 +24,7 @@
>>>>    #include "migration/register.h"
>>>>    #include "migration/blocker.h"
>>>>    #include "qapi/error.h"
>>>> +#include "qapi/qapi-events-vfio.h"
>>>>    #include "exec/ramlist.h"
>>>>    #include "exec/ram_addr.h"
>>>>    #include "pci.h"
>>>> @@ -80,6 +81,46 @@ static const char *mig_state_to_str(enum
>>>> vfio_device_mig_state state)
>>>>        }
>>>>    }
>>>>
>>>> +static VFIODeviceMigState
>>>> +mig_state_to_qapi_state(enum vfio_device_mig_state state)
>>>> +{
>>>> +    switch (state) {
>>>> +    case VFIO_DEVICE_STATE_STOP:
>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP;
>>>> +    case VFIO_DEVICE_STATE_RUNNING:
>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING;
>>>> +    case VFIO_DEVICE_STATE_STOP_COPY:
>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP_COPY;
>>>> +    case VFIO_DEVICE_STATE_RESUMING:
>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RESUMING;
>>>> +    case VFIO_DEVICE_STATE_RUNNING_P2P:
>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING_P2P;
>>>> +    case VFIO_DEVICE_STATE_PRE_COPY:
>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY;
>>>> +    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY_P2P;
>>>> +    default:
>>>> +        g_assert_not_reached();
>>>> +    }
>>>> +}
>>>> +
>>>> +static void vfio_migration_send_state_change_event(VFIODevice *vbasedev)
>>>> +{
>>>> +    VFIOMigration *migration = vbasedev->migration;
>>>> +    const char *id;
>>>> +    Object *obj;
>>>> +
>>>> +    if (!vbasedev->migration_events) {
>>>> +        return;
>>>> +    }
>>>> +
>>> Shouldn't this leap frog migrate_events() capability instead of introducing its
>>> vfio equivalent i.e.
>>>
>>>           if (!migrate_events()) {
>>>               return;
>>>           }
>>>
>>> ?
>> I used a per VFIO device cap so the events can be fine tuned for each device
>> (maybe one device should send events while the other not).
>> This gives the most flexibility and I don't think it complicates the
>> configuration (one downside, though, is that it can't be enabled/disabled
>> dynamically during runtime).
>>
> Right.
>
>> I don't think events add much overhead, so if you prefer a global cap, I can
>> change it.
>> However, I'm not sure overloading the existing migrate_events() is valid?
>>
> migration 'events' capability just means we will have some migration events
> emited via QAPI monitor for: 1) general global status and 2) for each migration
> pass (both with different event names=.

Yes, it's already overloaded.

In migration QAPI it says: "@events: generate events for each migration 
state change (since 2.4)".
This only refers to the MIGRATION event AFAIU.

Later on (in QEMU 2.6), MIGRATION_PASS event was added and the events 
cap was overloaded for the first time (without changing @events 
description).

Now we want to add yet another use for events capability, the VFIO 
migration state change events.

I think what bothers me is the @events description, which is not accurate.
Maybe it should be changed to "@events: generate migration related 
events (since 2.4)"? However, I'm not sure if it's OK to do this.

>   So the suggestion was just what feels a
> natural extension of that (...)
>
>>> Applications that don't understand the event string (migration related or not)
>>> will just discard it (AIUI)
>>>
> (...) specially because of this as all these events have a different name.
>
> But overloading might not make sense for others IDK ... it was just a suggestion
> :) not a strong preference

Yes, I get your rationale.
I don't have a strong opinion either, so maybe let's see what other 
people think.

Thanks.

>>>> +    obj = vbasedev->ops->vfio_get_object(vbasedev);
>>>> +    id = object_get_canonical_path_component(obj);
>>>> +
>>>> +    qapi_event_send_vfio_device_mig_state_changed(
>>>> +        id, mig_state_to_qapi_state(migration->device_state));
>>>> +}
>>>> +
>>>>    static int vfio_migration_set_state(VFIODevice *vbasedev,
>>>>                                        enum vfio_device_mig_state new_state,
>>>>                                        enum vfio_device_mig_state recover_state)
>>>> @@ -126,11 +167,13 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>>>>            }
>>>>
>>>>            migration->device_state = recover_state;
>>>> +        vfio_migration_send_state_change_event(vbasedev);
>>>>
>>>>            return ret;
>>>>        }
>>>>
>>>>        migration->device_state = new_state;
>>>> +    vfio_migration_send_state_change_event(vbasedev);
>>>>        if (mig_state->data_fd != -1) {
>>>>            if (migration->data_fd != -1) {
>>>>                /*
>>>> @@ -157,6 +200,7 @@ reset_device:
>>>>        }
>>>>
>>>>        migration->device_state = VFIO_DEVICE_STATE_RUNNING;
>>>> +    vfio_migration_send_state_change_event(vbasedev);
>>>>
>>>>        return ret;
>>>>    }
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index 64780d1b79..8840602c50 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -3362,6 +3362,8 @@ static Property vfio_pci_dev_properties[] = {
>>>>                        VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
>>>>        DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
>>>>                                vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
>>>> +    DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
>>>> +                     vbasedev.migration_events, false),
>>>>        DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
>>>>        DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
>>>>                         vbasedev.ram_block_discard_allowed, false),
Markus Armbruster May 6, 2024, 4:56 a.m. UTC | #5
Peter, Fabiano, I'd like to hear your opinion on the issue discussed
below.

Avihai Horon <avihaih@nvidia.com> writes:

> On 02/05/2024 13:22, Joao Martins wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 01/05/2024 13:28, Avihai Horon wrote:
>>> On 01/05/2024 14:50, Joao Martins wrote:
>>>> External email: Use caution opening links or attachments
>>>>
>>>>
>>>> On 30/04/2024 06:16, Avihai Horon wrote:
>>>>> Emit VFIO device migration state change QAPI event when a VFIO device
>>>>> changes its migration state. This can be used by management applications
>>>>> to get updates on the current state of the VFIO device for their own
>>>>> purposes.
>>>>>
>>>>> A new per VFIO device capability, "migration-events", is added so events
>>>>> can be enabled only for the required devices. It is disabled by default.
>>>>>
>>>>> Signed-off-by: Avihai Horon<avihaih@nvidia.com>
>>>>> ---
>>>>>    include/hw/vfio/vfio-common.h |  1 +
>>>>>    hw/vfio/migration.c           | 44 +++++++++++++++++++++++++++++++++++
>>>>>    hw/vfio/pci.c                 |  2 ++
>>>>>    3 files changed, 47 insertions(+)
>>>>>
>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>> index b9da6c08ef..3ec5f2425e 100644
>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>> @@ -115,6 +115,7 @@ typedef struct VFIODevice {
>>>>>        bool no_mmap;
>>>>>        bool ram_block_discard_allowed;
>>>>>        OnOffAuto enable_migration;
>>>>> +    bool migration_events;
>>>>>        VFIODeviceOps *ops;
>>>>>        unsigned int num_irqs;
>>>>>        unsigned int num_regions;
>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>> index 06ae40969b..6bbccf6545 100644
>>>>> --- a/hw/vfio/migration.c
>>>>> +++ b/hw/vfio/migration.c
>>>>> @@ -24,6 +24,7 @@
>>>>>    #include "migration/register.h"
>>>>>    #include "migration/blocker.h"
>>>>>    #include "qapi/error.h"
>>>>> +#include "qapi/qapi-events-vfio.h"
>>>>>    #include "exec/ramlist.h"
>>>>>    #include "exec/ram_addr.h"
>>>>>    #include "pci.h"
>>>>> @@ -80,6 +81,46 @@ static const char *mig_state_to_str(enum
>>>>> vfio_device_mig_state state)
>>>>>        }
>>>>>    }
>>>>>
>>>>> +static VFIODeviceMigState
>>>>> +mig_state_to_qapi_state(enum vfio_device_mig_state state)
>>>>> +{
>>>>> +    switch (state) {
>>>>> +    case VFIO_DEVICE_STATE_STOP:
>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP;
>>>>> +    case VFIO_DEVICE_STATE_RUNNING:
>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING;
>>>>> +    case VFIO_DEVICE_STATE_STOP_COPY:
>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP_COPY;
>>>>> +    case VFIO_DEVICE_STATE_RESUMING:
>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RESUMING;
>>>>> +    case VFIO_DEVICE_STATE_RUNNING_P2P:
>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING_P2P;
>>>>> +    case VFIO_DEVICE_STATE_PRE_COPY:
>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY;
>>>>> +    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY_P2P;
>>>>> +    default:
>>>>> +        g_assert_not_reached();
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +static void vfio_migration_send_state_change_event(VFIODevice *vbasedev)
>>>>> +{
>>>>> +    VFIOMigration *migration = vbasedev->migration;
>>>>> +    const char *id;
>>>>> +    Object *obj;
>>>>> +
>>>>> +    if (!vbasedev->migration_events) {
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>> Shouldn't this leap frog migrate_events() capability instead of introducing its
>>>> vfio equivalent i.e.
>>>>
>>>>           if (!migrate_events()) {
>>>>               return;
>>>>           }
>>>>
>>>> ?
>>>
>>> I used a per VFIO device cap so the events can be fine tuned for each device
>>> (maybe one device should send events while the other not).
>>> This gives the most flexibility and I don't think it complicates the
>>> configuration (one downside, though, is that it can't be enabled/disabled
>>> dynamically during runtime).
>>>
>> Right.
>>
>>> I don't think events add much overhead, so if you prefer a global cap, I can
>>> change it.
>>> However, I'm not sure overloading the existing migrate_events() is valid?
>>>
>> migration 'events' capability just means we will have some migration events
>> emited via QAPI monitor for: 1) general global status and 2) for each migration
>> pass (both with different event names=.
>
> Yes, it's already overloaded.
>
> In migration QAPI it says: "@events: generate events for each migration state change (since 2.4)".
> This only refers to the MIGRATION event AFAIU.
>
> Later on (in QEMU 2.6), MIGRATION_PASS event was added and the events cap was overloaded for the first time (without changing @events description).
>
> Now we want to add yet another use for events capability, the VFIO migration state change events.
>
> I think what bothers me is the @events description, which is not accurate.
> Maybe it should be changed to "@events: generate migration related events (since 2.4)"? However, I'm not sure if it's OK to do this.
>
>>   So the suggestion was just what feels a
>> natural extension of that (...)
>>
>>>> Applications that don't understand the event string (migration related or not)
>>>> will just discard it (AIUI)
>>
>> (...) specially because of this as all these events have a different name.
>>
>> But overloading might not make sense for others IDK ... it was just a suggestion
>> :) not a strong preference
>
> Yes, I get your rationale.
> I don't have a strong opinion either, so maybe let's see what other people think.
>
> Thanks.

[...]
Fabiano Rosas May 6, 2024, 2:38 p.m. UTC | #6
Markus Armbruster <armbru@redhat.com> writes:

> Peter, Fabiano, I'd like to hear your opinion on the issue discussed
> below.
>
> Avihai Horon <avihaih@nvidia.com> writes:
>
>> On 02/05/2024 13:22, Joao Martins wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> On 01/05/2024 13:28, Avihai Horon wrote:
>>>> On 01/05/2024 14:50, Joao Martins wrote:
>>>>> External email: Use caution opening links or attachments
>>>>>
>>>>>
>>>>> On 30/04/2024 06:16, Avihai Horon wrote:
>>>>>> Emit VFIO device migration state change QAPI event when a VFIO device
>>>>>> changes its migration state. This can be used by management applications
>>>>>> to get updates on the current state of the VFIO device for their own
>>>>>> purposes.
>>>>>>
>>>>>> A new per VFIO device capability, "migration-events", is added so events
>>>>>> can be enabled only for the required devices. It is disabled by default.
>>>>>>
>>>>>> Signed-off-by: Avihai Horon<avihaih@nvidia.com>
>>>>>> ---
>>>>>>    include/hw/vfio/vfio-common.h |  1 +
>>>>>>    hw/vfio/migration.c           | 44 +++++++++++++++++++++++++++++++++++
>>>>>>    hw/vfio/pci.c                 |  2 ++
>>>>>>    3 files changed, 47 insertions(+)
>>>>>>
>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>> index b9da6c08ef..3ec5f2425e 100644
>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>> @@ -115,6 +115,7 @@ typedef struct VFIODevice {
>>>>>>        bool no_mmap;
>>>>>>        bool ram_block_discard_allowed;
>>>>>>        OnOffAuto enable_migration;
>>>>>> +    bool migration_events;
>>>>>>        VFIODeviceOps *ops;
>>>>>>        unsigned int num_irqs;
>>>>>>        unsigned int num_regions;
>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>> index 06ae40969b..6bbccf6545 100644
>>>>>> --- a/hw/vfio/migration.c
>>>>>> +++ b/hw/vfio/migration.c
>>>>>> @@ -24,6 +24,7 @@
>>>>>>    #include "migration/register.h"
>>>>>>    #include "migration/blocker.h"
>>>>>>    #include "qapi/error.h"
>>>>>> +#include "qapi/qapi-events-vfio.h"
>>>>>>    #include "exec/ramlist.h"
>>>>>>    #include "exec/ram_addr.h"
>>>>>>    #include "pci.h"
>>>>>> @@ -80,6 +81,46 @@ static const char *mig_state_to_str(enum
>>>>>> vfio_device_mig_state state)
>>>>>>        }
>>>>>>    }
>>>>>>
>>>>>> +static VFIODeviceMigState
>>>>>> +mig_state_to_qapi_state(enum vfio_device_mig_state state)
>>>>>> +{
>>>>>> +    switch (state) {
>>>>>> +    case VFIO_DEVICE_STATE_STOP:
>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP;
>>>>>> +    case VFIO_DEVICE_STATE_RUNNING:
>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING;
>>>>>> +    case VFIO_DEVICE_STATE_STOP_COPY:
>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP_COPY;
>>>>>> +    case VFIO_DEVICE_STATE_RESUMING:
>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RESUMING;
>>>>>> +    case VFIO_DEVICE_STATE_RUNNING_P2P:
>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING_P2P;
>>>>>> +    case VFIO_DEVICE_STATE_PRE_COPY:
>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY;
>>>>>> +    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY_P2P;
>>>>>> +    default:
>>>>>> +        g_assert_not_reached();
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +static void vfio_migration_send_state_change_event(VFIODevice *vbasedev)
>>>>>> +{
>>>>>> +    VFIOMigration *migration = vbasedev->migration;
>>>>>> +    const char *id;
>>>>>> +    Object *obj;
>>>>>> +
>>>>>> +    if (!vbasedev->migration_events) {
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>> Shouldn't this leap frog migrate_events() capability instead of introducing its
>>>>> vfio equivalent i.e.
>>>>>
>>>>>           if (!migrate_events()) {
>>>>>               return;
>>>>>           }
>>>>>
>>>>> ?
>>>>
>>>> I used a per VFIO device cap so the events can be fine tuned for each device
>>>> (maybe one device should send events while the other not).
>>>> This gives the most flexibility and I don't think it complicates the
>>>> configuration (one downside, though, is that it can't be enabled/disabled
>>>> dynamically during runtime).
>>>>
>>> Right.
>>>
>>>> I don't think events add much overhead, so if you prefer a global cap, I can
>>>> change it.
>>>> However, I'm not sure overloading the existing migrate_events() is valid?
>>>>
>>> migration 'events' capability just means we will have some migration events
>>> emited via QAPI monitor for: 1) general global status and 2) for each migration
>>> pass (both with different event names=.
>>
>> Yes, it's already overloaded.
>>
>> In migration QAPI it says: "@events: generate events for each migration state change (since 2.4)".
>> This only refers to the MIGRATION event AFAIU.
>>
>> Later on (in QEMU 2.6), MIGRATION_PASS event was added and the events cap was overloaded for the first time (without changing @events description).
>>
>> Now we want to add yet another use for events capability, the VFIO migration state change events.
>>
>> I think what bothers me is the @events description, which is not accurate.
>> Maybe it should be changed to "@events: generate migration related events (since 2.4)"? However, I'm not sure if it's OK to do this.
>>
>>>   So the suggestion was just what feels a
>>> natural extension of that (...)
>>>
>>>>> Applications that don't understand the event string (migration related or not)
>>>>> will just discard it (AIUI)
>>>
>>> (...) specially because of this as all these events have a different name.
>>>
>>> But overloading might not make sense for others IDK ... it was just a suggestion
>>> :) not a strong preference
>>
>> Yes, I get your rationale.
>> I don't have a strong opinion either, so maybe let's see what other people think.

I don't see the need to tie this to the migration events
capability. Although there's overlap in the terms used, this seems more
like exposing a device feature via QEMU, then a migration event
per-se. The state changes also happen during moments unrelated to
migration (cover letter mentions start/stopping guest), so I assume we'd
like to keep those even if the management layer doesn't want to see
migration events.
Peter Xu May 6, 2024, 3:07 p.m. UTC | #7
On Mon, May 06, 2024 at 11:38:00AM -0300, Fabiano Rosas wrote:
> Markus Armbruster <armbru@redhat.com> writes:
> 
> > Peter, Fabiano, I'd like to hear your opinion on the issue discussed
> > below.
> >
> > Avihai Horon <avihaih@nvidia.com> writes:
> >
> >> On 02/05/2024 13:22, Joao Martins wrote:
> >>> External email: Use caution opening links or attachments
> >>>
> >>>
> >>> On 01/05/2024 13:28, Avihai Horon wrote:
> >>>> On 01/05/2024 14:50, Joao Martins wrote:
> >>>>> External email: Use caution opening links or attachments
> >>>>>
> >>>>>
> >>>>> On 30/04/2024 06:16, Avihai Horon wrote:
> >>>>>> Emit VFIO device migration state change QAPI event when a VFIO device
> >>>>>> changes its migration state. This can be used by management applications
> >>>>>> to get updates on the current state of the VFIO device for their own
> >>>>>> purposes.
> >>>>>>
> >>>>>> A new per VFIO device capability, "migration-events", is added so events
> >>>>>> can be enabled only for the required devices. It is disabled by default.
> >>>>>>
> >>>>>> Signed-off-by: Avihai Horon<avihaih@nvidia.com>
> >>>>>> ---
> >>>>>>    include/hw/vfio/vfio-common.h |  1 +
> >>>>>>    hw/vfio/migration.c           | 44 +++++++++++++++++++++++++++++++++++
> >>>>>>    hw/vfio/pci.c                 |  2 ++
> >>>>>>    3 files changed, 47 insertions(+)
> >>>>>>
> >>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> >>>>>> index b9da6c08ef..3ec5f2425e 100644
> >>>>>> --- a/include/hw/vfio/vfio-common.h
> >>>>>> +++ b/include/hw/vfio/vfio-common.h
> >>>>>> @@ -115,6 +115,7 @@ typedef struct VFIODevice {
> >>>>>>        bool no_mmap;
> >>>>>>        bool ram_block_discard_allowed;
> >>>>>>        OnOffAuto enable_migration;
> >>>>>> +    bool migration_events;
> >>>>>>        VFIODeviceOps *ops;
> >>>>>>        unsigned int num_irqs;
> >>>>>>        unsigned int num_regions;
> >>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> >>>>>> index 06ae40969b..6bbccf6545 100644
> >>>>>> --- a/hw/vfio/migration.c
> >>>>>> +++ b/hw/vfio/migration.c
> >>>>>> @@ -24,6 +24,7 @@
> >>>>>>    #include "migration/register.h"
> >>>>>>    #include "migration/blocker.h"
> >>>>>>    #include "qapi/error.h"
> >>>>>> +#include "qapi/qapi-events-vfio.h"
> >>>>>>    #include "exec/ramlist.h"
> >>>>>>    #include "exec/ram_addr.h"
> >>>>>>    #include "pci.h"
> >>>>>> @@ -80,6 +81,46 @@ static const char *mig_state_to_str(enum
> >>>>>> vfio_device_mig_state state)
> >>>>>>        }
> >>>>>>    }
> >>>>>>
> >>>>>> +static VFIODeviceMigState
> >>>>>> +mig_state_to_qapi_state(enum vfio_device_mig_state state)
> >>>>>> +{
> >>>>>> +    switch (state) {
> >>>>>> +    case VFIO_DEVICE_STATE_STOP:
> >>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP;
> >>>>>> +    case VFIO_DEVICE_STATE_RUNNING:
> >>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING;
> >>>>>> +    case VFIO_DEVICE_STATE_STOP_COPY:
> >>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP_COPY;
> >>>>>> +    case VFIO_DEVICE_STATE_RESUMING:
> >>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RESUMING;
> >>>>>> +    case VFIO_DEVICE_STATE_RUNNING_P2P:
> >>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING_P2P;
> >>>>>> +    case VFIO_DEVICE_STATE_PRE_COPY:
> >>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY;
> >>>>>> +    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
> >>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY_P2P;
> >>>>>> +    default:
> >>>>>> +        g_assert_not_reached();
> >>>>>> +    }
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void vfio_migration_send_state_change_event(VFIODevice *vbasedev)
> >>>>>> +{
> >>>>>> +    VFIOMigration *migration = vbasedev->migration;
> >>>>>> +    const char *id;
> >>>>>> +    Object *obj;
> >>>>>> +
> >>>>>> +    if (!vbasedev->migration_events) {
> >>>>>> +        return;
> >>>>>> +    }
> >>>>>> +
> >>>>> Shouldn't this leap frog migrate_events() capability instead of introducing its
> >>>>> vfio equivalent i.e.
> >>>>>
> >>>>>           if (!migrate_events()) {
> >>>>>               return;
> >>>>>           }
> >>>>>
> >>>>> ?
> >>>>
> >>>> I used a per VFIO device cap so the events can be fine tuned for each device
> >>>> (maybe one device should send events while the other not).
> >>>> This gives the most flexibility and I don't think it complicates the
> >>>> configuration (one downside, though, is that it can't be enabled/disabled
> >>>> dynamically during runtime).
> >>>>
> >>> Right.
> >>>
> >>>> I don't think events add much overhead, so if you prefer a global cap, I can
> >>>> change it.
> >>>> However, I'm not sure overloading the existing migrate_events() is valid?
> >>>>
> >>> migration 'events' capability just means we will have some migration events
> >>> emited via QAPI monitor for: 1) general global status and 2) for each migration
> >>> pass (both with different event names=.
> >>
> >> Yes, it's already overloaded.
> >>
> >> In migration QAPI it says: "@events: generate events for each migration state change (since 2.4)".
> >> This only refers to the MIGRATION event AFAIU.
> >>
> >> Later on (in QEMU 2.6), MIGRATION_PASS event was added and the events cap was overloaded for the first time (without changing @events description).
> >>
> >> Now we want to add yet another use for events capability, the VFIO migration state change events.
> >>
> >> I think what bothers me is the @events description, which is not accurate.
> >> Maybe it should be changed to "@events: generate migration related events (since 2.4)"? However, I'm not sure if it's OK to do this.
> >>
> >>>   So the suggestion was just what feels a
> >>> natural extension of that (...)
> >>>
> >>>>> Applications that don't understand the event string (migration related or not)
> >>>>> will just discard it (AIUI)
> >>>
> >>> (...) specially because of this as all these events have a different name.
> >>>
> >>> But overloading might not make sense for others IDK ... it was just a suggestion
> >>> :) not a strong preference
> >>
> >> Yes, I get your rationale.
> >> I don't have a strong opinion either, so maybe let's see what other people think.
> 
> I don't see the need to tie this to the migration events
> capability. Although there's overlap in the terms used, this seems more
> like exposing a device feature via QEMU, then a migration event
> per-se. The state changes also happen during moments unrelated to
> migration (cover letter mentions start/stopping guest), so I assume we'd
> like to keep those even if the management layer doesn't want to see
> migration events.

Yeh makes sense to me to keep that per-device flag, as it's not a generic
migration state change.

  @events: generate events for each migration state change (since 2.4)

The other option is to emit event only if "migrate_events() &&
vfio_dev_send_event", but that sounds like an overkill to overload "events"
cap, and doesn't look like bring anything better than just keeping them
separate.

While at it, another trivial comment is maybe it's nice to have a helper to
both update the vfio migration state, plus emitting events when necessary.

Thanks,
Avihai Horon May 7, 2024, 7:47 a.m. UTC | #8
On 06/05/2024 18:07, Peter Xu wrote:
> External email: Use caution opening links or attachments
>
>
> On Mon, May 06, 2024 at 11:38:00AM -0300, Fabiano Rosas wrote:
>> Markus Armbruster <armbru@redhat.com> writes:
>>
>>> Peter, Fabiano, I'd like to hear your opinion on the issue discussed
>>> below.
>>>
>>> Avihai Horon <avihaih@nvidia.com> writes:
>>>
>>>> On 02/05/2024 13:22, Joao Martins wrote:
>>>>> External email: Use caution opening links or attachments
>>>>>
>>>>>
>>>>> On 01/05/2024 13:28, Avihai Horon wrote:
>>>>>> On 01/05/2024 14:50, Joao Martins wrote:
>>>>>>> External email: Use caution opening links or attachments
>>>>>>>
>>>>>>>
>>>>>>> On 30/04/2024 06:16, Avihai Horon wrote:
>>>>>>>> Emit VFIO device migration state change QAPI event when a VFIO device
>>>>>>>> changes its migration state. This can be used by management applications
>>>>>>>> to get updates on the current state of the VFIO device for their own
>>>>>>>> purposes.
>>>>>>>>
>>>>>>>> A new per VFIO device capability, "migration-events", is added so events
>>>>>>>> can be enabled only for the required devices. It is disabled by default.
>>>>>>>>
>>>>>>>> Signed-off-by: Avihai Horon<avihaih@nvidia.com>
>>>>>>>> ---
>>>>>>>>     include/hw/vfio/vfio-common.h |  1 +
>>>>>>>>     hw/vfio/migration.c           | 44 +++++++++++++++++++++++++++++++++++
>>>>>>>>     hw/vfio/pci.c                 |  2 ++
>>>>>>>>     3 files changed, 47 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>>>>>> index b9da6c08ef..3ec5f2425e 100644
>>>>>>>> --- a/include/hw/vfio/vfio-common.h
>>>>>>>> +++ b/include/hw/vfio/vfio-common.h
>>>>>>>> @@ -115,6 +115,7 @@ typedef struct VFIODevice {
>>>>>>>>         bool no_mmap;
>>>>>>>>         bool ram_block_discard_allowed;
>>>>>>>>         OnOffAuto enable_migration;
>>>>>>>> +    bool migration_events;
>>>>>>>>         VFIODeviceOps *ops;
>>>>>>>>         unsigned int num_irqs;
>>>>>>>>         unsigned int num_regions;
>>>>>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>>>>>> index 06ae40969b..6bbccf6545 100644
>>>>>>>> --- a/hw/vfio/migration.c
>>>>>>>> +++ b/hw/vfio/migration.c
>>>>>>>> @@ -24,6 +24,7 @@
>>>>>>>>     #include "migration/register.h"
>>>>>>>>     #include "migration/blocker.h"
>>>>>>>>     #include "qapi/error.h"
>>>>>>>> +#include "qapi/qapi-events-vfio.h"
>>>>>>>>     #include "exec/ramlist.h"
>>>>>>>>     #include "exec/ram_addr.h"
>>>>>>>>     #include "pci.h"
>>>>>>>> @@ -80,6 +81,46 @@ static const char *mig_state_to_str(enum
>>>>>>>> vfio_device_mig_state state)
>>>>>>>>         }
>>>>>>>>     }
>>>>>>>>
>>>>>>>> +static VFIODeviceMigState
>>>>>>>> +mig_state_to_qapi_state(enum vfio_device_mig_state state)
>>>>>>>> +{
>>>>>>>> +    switch (state) {
>>>>>>>> +    case VFIO_DEVICE_STATE_STOP:
>>>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP;
>>>>>>>> +    case VFIO_DEVICE_STATE_RUNNING:
>>>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING;
>>>>>>>> +    case VFIO_DEVICE_STATE_STOP_COPY:
>>>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_STOP_COPY;
>>>>>>>> +    case VFIO_DEVICE_STATE_RESUMING:
>>>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RESUMING;
>>>>>>>> +    case VFIO_DEVICE_STATE_RUNNING_P2P:
>>>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING_P2P;
>>>>>>>> +    case VFIO_DEVICE_STATE_PRE_COPY:
>>>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY;
>>>>>>>> +    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
>>>>>>>> +        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY_P2P;
>>>>>>>> +    default:
>>>>>>>> +        g_assert_not_reached();
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void vfio_migration_send_state_change_event(VFIODevice *vbasedev)
>>>>>>>> +{
>>>>>>>> +    VFIOMigration *migration = vbasedev->migration;
>>>>>>>> +    const char *id;
>>>>>>>> +    Object *obj;
>>>>>>>> +
>>>>>>>> +    if (!vbasedev->migration_events) {
>>>>>>>> +        return;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>> Shouldn't this leap frog migrate_events() capability instead of introducing its
>>>>>>> vfio equivalent i.e.
>>>>>>>
>>>>>>>            if (!migrate_events()) {
>>>>>>>                return;
>>>>>>>            }
>>>>>>>
>>>>>>> ?
>>>>>> I used a per VFIO device cap so the events can be fine tuned for each device
>>>>>> (maybe one device should send events while the other not).
>>>>>> This gives the most flexibility and I don't think it complicates the
>>>>>> configuration (one downside, though, is that it can't be enabled/disabled
>>>>>> dynamically during runtime).
>>>>>>
>>>>> Right.
>>>>>
>>>>>> I don't think events add much overhead, so if you prefer a global cap, I can
>>>>>> change it.
>>>>>> However, I'm not sure overloading the existing migrate_events() is valid?
>>>>>>
>>>>> migration 'events' capability just means we will have some migration events
>>>>> emited via QAPI monitor for: 1) general global status and 2) for each migration
>>>>> pass (both with different event names=.
>>>> Yes, it's already overloaded.
>>>>
>>>> In migration QAPI it says: "@events: generate events for each migration state change (since 2.4)".
>>>> This only refers to the MIGRATION event AFAIU.
>>>>
>>>> Later on (in QEMU 2.6), MIGRATION_PASS event was added and the events cap was overloaded for the first time (without changing @events description).
>>>>
>>>> Now we want to add yet another use for events capability, the VFIO migration state change events.
>>>>
>>>> I think what bothers me is the @events description, which is not accurate.
>>>> Maybe it should be changed to "@events: generate migration related events (since 2.4)"? However, I'm not sure if it's OK to do this.
>>>>
>>>>>    So the suggestion was just what feels a
>>>>> natural extension of that (...)
>>>>>
>>>>>>> Applications that don't understand the event string (migration related or not)
>>>>>>> will just discard it (AIUI)
>>>>> (...) specially because of this as all these events have a different name.
>>>>>
>>>>> But overloading might not make sense for others IDK ... it was just a suggestion
>>>>> :) not a strong preference
>>>> Yes, I get your rationale.
>>>> I don't have a strong opinion either, so maybe let's see what other people think.
>> I don't see the need to tie this to the migration events
>> capability. Although there's overlap in the terms used, this seems more
>> like exposing a device feature via QEMU, then a migration event
>> per-se. The state changes also happen during moments unrelated to
>> migration (cover letter mentions start/stopping guest), so I assume we'd
>> like to keep those even if the management layer doesn't want to see
>> migration events.
> Yeh makes sense to me to keep that per-device flag, as it's not a generic
> migration state change.
>
>    @events: generate events for each migration state change (since 2.4)
>
> The other option is to emit event only if "migrate_events() &&
> vfio_dev_send_event", but that sounds like an overkill to overload "events"
> cap, and doesn't look like bring anything better than just keeping them
> separate.

I agree, so I will keep it a per-device flag.

> While at it, another trivial comment is maybe it's nice to have a helper to
> both update the vfio migration state, plus emitting events when necessary.

I think vfio_migration_set_state() does exactly that, no?

Thanks.
Peter Xu May 7, 2024, 3:51 p.m. UTC | #9
On Tue, May 07, 2024 at 10:47:13AM +0300, Avihai Horon wrote:
> > While at it, another trivial comment is maybe it's nice to have a helper to
> > both update the vfio migration state, plus emitting events when necessary.
> 
> I think vfio_migration_set_state() does exactly that, no?

Ah yes, looks so.  It's just that I saw some common patterns, like:

===8<===
@@ -126,11 +167,13 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
         }
 
         migration->device_state = recover_state;
+        vfio_migration_send_state_change_event(vbasedev);
 
         return ret;
     }
 
     migration->device_state = new_state;
+    vfio_migration_send_state_change_event(vbasedev);
     if (mig_state->data_fd != -1) {
         if (migration->data_fd != -1) {
             /*
@@ -157,6 +200,7 @@ reset_device:
     }
 
     migration->device_state = VFIO_DEVICE_STATE_RUNNING;
+    vfio_migration_send_state_change_event(vbasedev);
 
     return ret;
 }
===8<===

So maybe some more internal helpers?  It doesn't look like to cover all
updates to device_state, but I really didn't read into it.  Not a huge deal
really, feel free to keep it as-is if maintainers are happy.

Thanks,
Avihai Horon May 7, 2024, 4:21 p.m. UTC | #10
On 07/05/2024 18:51, Peter Xu wrote:
> External email: Use caution opening links or attachments
>
>
> On Tue, May 07, 2024 at 10:47:13AM +0300, Avihai Horon wrote:
>>> While at it, another trivial comment is maybe it's nice to have a helper to
>>> both update the vfio migration state, plus emitting events when necessary.
>> I think vfio_migration_set_state() does exactly that, no?
> Ah yes, looks so.  It's just that I saw some common patterns, like:
>
> ===8<===
> @@ -126,11 +167,13 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>           }
>
>           migration->device_state = recover_state;
> +        vfio_migration_send_state_change_event(vbasedev);
>
>           return ret;
>       }
>
>       migration->device_state = new_state;
> +    vfio_migration_send_state_change_event(vbasedev);
>       if (mig_state->data_fd != -1) {
>           if (migration->data_fd != -1) {
>               /*
> @@ -157,6 +200,7 @@ reset_device:
>       }
>
>       migration->device_state = VFIO_DEVICE_STATE_RUNNING;
> +    vfio_migration_send_state_change_event(vbasedev);
>
>       return ret;
>   }
> ===8<===
>
> So maybe some more internal helpers?  It doesn't look like to cover all
> updates to device_state, but I really didn't read into it.  Not a huge deal
> really, feel free to keep it as-is if maintainers are happy.

Oh, I see what you mean.

Cedric, would you like me to add the following?

===8<===

--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -122,6 +122,14 @@ static void vfio_migration_send_event(VFIODevice 
*vbasedev)
          dev->id, qom_path, 
mig_state_to_qapi_state(migration->device_state));
  }

+static void set_state(VFIODevice *vbasedev, enum vfio_device_mig_state 
state)
+{
+    VFIOMigration *migration = vbasedev->migration;
+
+    migration->device_state = state;
+    vfio_migration_send_event(vbasedev);
+}
+
  static int vfio_migration_set_state(VFIODevice *vbasedev,
                                      enum vfio_device_mig_state new_state,
                                      enum vfio_device_mig_state 
recover_state)
@@ -171,14 +179,12 @@ static int vfio_migration_set_state(VFIODevice 
*vbasedev,
              goto reset_device;
          }

-        migration->device_state = recover_state;
-        vfio_migration_send_event(vbasedev);
+        set_state(vbasedev, recover_state);

          return ret;
      }

-    migration->device_state = new_state;
-    vfio_migration_send_event(vbasedev);
+    set_state(vbasedev, new_state);
      if (mig_state->data_fd != -1) {
          if (migration->data_fd != -1) {
              /*
@@ -204,8 +210,7 @@ reset_device:
                   strerror(errno));
      }

-    migration->device_state = VFIO_DEVICE_STATE_RUNNING;
-    vfio_migration_send_event(vbasedev);
+    set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING);

      return ret;
  }

===8<===
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index b9da6c08ef..3ec5f2425e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -115,6 +115,7 @@  typedef struct VFIODevice {
     bool no_mmap;
     bool ram_block_discard_allowed;
     OnOffAuto enable_migration;
+    bool migration_events;
     VFIODeviceOps *ops;
     unsigned int num_irqs;
     unsigned int num_regions;
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 06ae40969b..6bbccf6545 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -24,6 +24,7 @@ 
 #include "migration/register.h"
 #include "migration/blocker.h"
 #include "qapi/error.h"
+#include "qapi/qapi-events-vfio.h"
 #include "exec/ramlist.h"
 #include "exec/ram_addr.h"
 #include "pci.h"
@@ -80,6 +81,46 @@  static const char *mig_state_to_str(enum vfio_device_mig_state state)
     }
 }
 
+static VFIODeviceMigState
+mig_state_to_qapi_state(enum vfio_device_mig_state state)
+{
+    switch (state) {
+    case VFIO_DEVICE_STATE_STOP:
+        return QAPI_VFIO_DEVICE_MIG_STATE_STOP;
+    case VFIO_DEVICE_STATE_RUNNING:
+        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING;
+    case VFIO_DEVICE_STATE_STOP_COPY:
+        return QAPI_VFIO_DEVICE_MIG_STATE_STOP_COPY;
+    case VFIO_DEVICE_STATE_RESUMING:
+        return QAPI_VFIO_DEVICE_MIG_STATE_RESUMING;
+    case VFIO_DEVICE_STATE_RUNNING_P2P:
+        return QAPI_VFIO_DEVICE_MIG_STATE_RUNNING_P2P;
+    case VFIO_DEVICE_STATE_PRE_COPY:
+        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY;
+    case VFIO_DEVICE_STATE_PRE_COPY_P2P:
+        return QAPI_VFIO_DEVICE_MIG_STATE_PRE_COPY_P2P;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void vfio_migration_send_state_change_event(VFIODevice *vbasedev)
+{
+    VFIOMigration *migration = vbasedev->migration;
+    const char *id;
+    Object *obj;
+
+    if (!vbasedev->migration_events) {
+        return;
+    }
+
+    obj = vbasedev->ops->vfio_get_object(vbasedev);
+    id = object_get_canonical_path_component(obj);
+
+    qapi_event_send_vfio_device_mig_state_changed(
+        id, mig_state_to_qapi_state(migration->device_state));
+}
+
 static int vfio_migration_set_state(VFIODevice *vbasedev,
                                     enum vfio_device_mig_state new_state,
                                     enum vfio_device_mig_state recover_state)
@@ -126,11 +167,13 @@  static int vfio_migration_set_state(VFIODevice *vbasedev,
         }
 
         migration->device_state = recover_state;
+        vfio_migration_send_state_change_event(vbasedev);
 
         return ret;
     }
 
     migration->device_state = new_state;
+    vfio_migration_send_state_change_event(vbasedev);
     if (mig_state->data_fd != -1) {
         if (migration->data_fd != -1) {
             /*
@@ -157,6 +200,7 @@  reset_device:
     }
 
     migration->device_state = VFIO_DEVICE_STATE_RUNNING;
+    vfio_migration_send_state_change_event(vbasedev);
 
     return ret;
 }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 64780d1b79..8840602c50 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3362,6 +3362,8 @@  static Property vfio_pci_dev_properties[] = {
                     VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
     DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
                             vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
+    DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
+                     vbasedev.migration_events, false),
     DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
     DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
                      vbasedev.ram_block_discard_allowed, false),