Message ID | 1585084154-29461-9-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add migration support for VFIO devices | expand |
On Wed, 25 Mar 2020 02:39:06 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > Define flags to be used as delimeter in migration file stream. > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > region from these functions at source during saving or pre-copy phase. > Set VFIO device state depending on VM's state. During live migration, VM is > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/migration.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 2 ++ > 2 files changed, 78 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index 22ded9d28cf3..033f76526e49 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -8,6 +8,7 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/main-loop.h" > #include <linux/vfio.h> > > #include "sysemu/runstate.h" > @@ -24,6 +25,17 @@ > #include "pci.h" > #include "trace.h" > > +/* > + * Flags used as delimiter: > + * 0xffffffff => MSB 32-bit all 1s > + * 0xef10 => emulated (virtual) function IO > + * 0x0000 => 16-bits reserved for flags > + */ > +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > + > static void vfio_migration_region_exit(VFIODevice *vbasedev) > { > VFIOMigration *migration = vbasedev->migration; > @@ -126,6 +138,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > return 0; > } > > +/* ---------------------------------------------------------------------- */ > + > +static int vfio_save_setup(QEMUFile *f, void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + int ret; > + > + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > + > + if (migration->region.mmaps) { > + qemu_mutex_lock_iothread(); > + ret = vfio_region_mmap(&migration->region); > + qemu_mutex_unlock_iothread(); > + if (ret) { > + error_report("%s: Failed to mmap VFIO migration region %d: %s", > + vbasedev->name, migration->region.index, > + strerror(-ret)); > + return ret; > + } > + } > + > + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); > + if (ret) { > + error_report("%s: Failed to set state SAVING", vbasedev->name); > + return ret; > + } > + > + /* > + * Save migration region size. This is used to verify migration region size > + * is greater than or equal to migration region size at destination > + */ > + qemu_put_be64(f, migration->region.size); Is this requirement supported by the uapi? The vendor driver operates within the migration region, but it has no requirement to use the full extent of the region. Shouldn't we instead insert the version string from versioning API Yan proposed? Is this were we might choose to use an interface via the vfio API rather than sysfs if we had one? > + > + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); > + > + ret = qemu_file_get_error(f); > + if (ret) { > + return ret; > + } > + > + trace_vfio_save_setup(vbasedev->name); > + return 0; > +} > + > +static void vfio_save_cleanup(void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + > + if (migration->region.mmaps) { > + vfio_region_unmap(&migration->region); > + } > + trace_vfio_save_cleanup(vbasedev->name); > +} > + > +static SaveVMHandlers savevm_vfio_handlers = { > + .save_setup = vfio_save_setup, > + .save_cleanup = vfio_save_cleanup, > +}; > + > +/* ---------------------------------------------------------------------- */ > + > static void vfio_vmstate_change(void *opaque, int running, RunState state) > { > VFIODevice *vbasedev = opaque; > @@ -191,6 +266,7 @@ static int vfio_migration_init(VFIODevice *vbasedev, > return ret; > } > > + register_savevm_live("vfio", -1, 1, &savevm_vfio_handlers, vbasedev); > vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > vbasedev); > > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 69503228f20e..4bb43f18f315 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -149,3 +149,5 @@ vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d" > vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" > vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > vfio_migration_state_notifier(char *name, int state) " (%s) state %d" > +vfio_save_setup(char *name) " (%s)" > +vfio_save_cleanup(char *name) " (%s)"
* Kirti Wankhede (kwankhede@nvidia.com) wrote: > Define flags to be used as delimeter in migration file stream. > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > region from these functions at source during saving or pre-copy phase. > Set VFIO device state depending on VM's state. During live migration, VM is > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/migration.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 2 ++ > 2 files changed, 78 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index 22ded9d28cf3..033f76526e49 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -8,6 +8,7 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/main-loop.h" > #include <linux/vfio.h> > > #include "sysemu/runstate.h" > @@ -24,6 +25,17 @@ > #include "pci.h" > #include "trace.h" > > +/* > + * Flags used as delimiter: > + * 0xffffffff => MSB 32-bit all 1s > + * 0xef10 => emulated (virtual) function IO > + * 0x0000 => 16-bits reserved for flags > + */ > +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > + > static void vfio_migration_region_exit(VFIODevice *vbasedev) > { > VFIOMigration *migration = vbasedev->migration; > @@ -126,6 +138,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > return 0; > } > > +/* ---------------------------------------------------------------------- */ > + > +static int vfio_save_setup(QEMUFile *f, void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + int ret; > + > + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > + > + if (migration->region.mmaps) { > + qemu_mutex_lock_iothread(); > + ret = vfio_region_mmap(&migration->region); > + qemu_mutex_unlock_iothread(); > + if (ret) { > + error_report("%s: Failed to mmap VFIO migration region %d: %s", > + vbasedev->name, migration->region.index, > + strerror(-ret)); > + return ret; > + } > + } > + > + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); > + if (ret) { > + error_report("%s: Failed to set state SAVING", vbasedev->name); > + return ret; > + } > + > + /* > + * Save migration region size. This is used to verify migration region size > + * is greater than or equal to migration region size at destination > + */ > + qemu_put_be64(f, migration->region.size); > + > + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); OK, good, so now we can change that to something else if you want to migrate something extra in the future. > + ret = qemu_file_get_error(f); > + if (ret) { > + return ret; > + } > + > + trace_vfio_save_setup(vbasedev->name); I'd put that trace at the start of the function. > + return 0; > +} > + > +static void vfio_save_cleanup(void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + > + if (migration->region.mmaps) { > + vfio_region_unmap(&migration->region); > + } > + trace_vfio_save_cleanup(vbasedev->name); > +} > + > +static SaveVMHandlers savevm_vfio_handlers = { > + .save_setup = vfio_save_setup, > + .save_cleanup = vfio_save_cleanup, > +}; > + > +/* ---------------------------------------------------------------------- */ > + > static void vfio_vmstate_change(void *opaque, int running, RunState state) > { > VFIODevice *vbasedev = opaque; > @@ -191,6 +266,7 @@ static int vfio_migration_init(VFIODevice *vbasedev, > return ret; > } > > + register_savevm_live("vfio", -1, 1, &savevm_vfio_handlers, vbasedev); That doesn't look right to me; firstly the -1 should now be VMSTATE_INSTANCE_ID_ANY - after the recent change in commit 1df2c9a Have you tried this with two vfio devices? This is quite rare - it's an iterative device that can have multiple instances; if you look at 'ram' for example, all the RAM instances are handled inside the save_setup/save for the one instance of 'ram'. I think here you're trying to register an individual vfio device, so if you had multiple devices you'd see this called twice. So either you need to make vfio_save_* do all of the devices in a loop - which feels like a bad idea; or replace "vfio" in that call by a unique device name; as long as your device has a bus path then you should be able to use the same trick vmstate_register_with_alias_id does, and use I think, vmstate_if_get_id(VMSTAETE_IF(vbasedev)). but it might take some experimentation since this is an odd use. > vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > vbasedev); > > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 69503228f20e..4bb43f18f315 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -149,3 +149,5 @@ vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d" > vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" > vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > vfio_migration_state_notifier(char *name, int state) " (%s) state %d" > +vfio_save_setup(char *name) " (%s)" > +vfio_save_cleanup(char *name) " (%s)" > -- > 2.7.0 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 3/26/2020 2:32 AM, Alex Williamson wrote: > On Wed, 25 Mar 2020 02:39:06 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> Define flags to be used as delimeter in migration file stream. >> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration >> region from these functions at source during saving or pre-copy phase. >> Set VFIO device state depending on VM's state. During live migration, VM is >> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO >> device. During save-restore, VM is paused, _SAVING state is set for VFIO device. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Neo Jia <cjia@nvidia.com> >> --- >> hw/vfio/migration.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 2 ++ >> 2 files changed, 78 insertions(+) >> >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> index 22ded9d28cf3..033f76526e49 100644 >> --- a/hw/vfio/migration.c >> +++ b/hw/vfio/migration.c >> @@ -8,6 +8,7 @@ >> */ >> >> #include "qemu/osdep.h" >> +#include "qemu/main-loop.h" >> #include <linux/vfio.h> >> >> #include "sysemu/runstate.h" >> @@ -24,6 +25,17 @@ >> #include "pci.h" >> #include "trace.h" >> >> +/* >> + * Flags used as delimiter: >> + * 0xffffffff => MSB 32-bit all 1s >> + * 0xef10 => emulated (virtual) function IO >> + * 0x0000 => 16-bits reserved for flags >> + */ >> +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) >> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) >> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) >> +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) >> + >> static void vfio_migration_region_exit(VFIODevice *vbasedev) >> { >> VFIOMigration *migration = vbasedev->migration; >> @@ -126,6 +138,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, >> return 0; >> } >> >> +/* ---------------------------------------------------------------------- */ >> + >> +static int vfio_save_setup(QEMUFile *f, void *opaque) >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + int ret; >> + >> + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); >> + >> + if (migration->region.mmaps) { >> + qemu_mutex_lock_iothread(); >> + ret = vfio_region_mmap(&migration->region); >> + qemu_mutex_unlock_iothread(); >> + if (ret) { >> + error_report("%s: Failed to mmap VFIO migration region %d: %s", >> + vbasedev->name, migration->region.index, >> + strerror(-ret)); >> + return ret; >> + } >> + } >> + >> + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); >> + if (ret) { >> + error_report("%s: Failed to set state SAVING", vbasedev->name); >> + return ret; >> + } >> + >> + /* >> + * Save migration region size. This is used to verify migration region size >> + * is greater than or equal to migration region size at destination >> + */ >> + qemu_put_be64(f, migration->region.size); > > Is this requirement supported by the uapi? Yes, on UAPI thread we discussed this: * For the user application, data is opaque. The user application should write * data in the same order as the data is received and the data should be of * same transaction size at the source. data should be same transaction size, so migration region size should be greater than or equal to the size at source when verifying at destination. > The vendor driver operates > within the migration region, but it has no requirement to use the full > extent of the region. Shouldn't we instead insert the version string > from versioning API Yan proposed? Is this were we might choose to use > an interface via the vfio API rather than sysfs if we had one? > VFIO API cannot be used by libvirt or management tool stack. We need sysfs as Yan proposed to be used by libvirt or management tool stack. Thanks, Kirti >> + >> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); >> + >> + ret = qemu_file_get_error(f); >> + if (ret) { >> + return ret; >> + } >> + >> + trace_vfio_save_setup(vbasedev->name); >> + return 0; >> +} >> + >> +static void vfio_save_cleanup(void *opaque) >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + >> + if (migration->region.mmaps) { >> + vfio_region_unmap(&migration->region); >> + } >> + trace_vfio_save_cleanup(vbasedev->name); >> +} >> + >> +static SaveVMHandlers savevm_vfio_handlers = { >> + .save_setup = vfio_save_setup, >> + .save_cleanup = vfio_save_cleanup, >> +}; >> + >> +/* ---------------------------------------------------------------------- */ >> + >> static void vfio_vmstate_change(void *opaque, int running, RunState state) >> { >> VFIODevice *vbasedev = opaque; >> @@ -191,6 +266,7 @@ static int vfio_migration_init(VFIODevice *vbasedev, >> return ret; >> } >> >> + register_savevm_live("vfio", -1, 1, &savevm_vfio_handlers, vbasedev); >> vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, >> vbasedev); >> >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index 69503228f20e..4bb43f18f315 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -149,3 +149,5 @@ vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d" >> vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" >> vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" >> vfio_migration_state_notifier(char *name, int state) " (%s) state %d" >> +vfio_save_setup(char *name) " (%s)" >> +vfio_save_cleanup(char *name) " (%s)" >
On 4/1/2020 11:06 PM, Dr. David Alan Gilbert wrote: > * Kirti Wankhede (kwankhede@nvidia.com) wrote: >> Define flags to be used as delimeter in migration file stream. >> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration >> region from these functions at source during saving or pre-copy phase. >> Set VFIO device state depending on VM's state. During live migration, VM is >> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO >> device. During save-restore, VM is paused, _SAVING state is set for VFIO device. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Neo Jia <cjia@nvidia.com> >> --- >> hw/vfio/migration.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 2 ++ >> 2 files changed, 78 insertions(+) >> >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> index 22ded9d28cf3..033f76526e49 100644 >> --- a/hw/vfio/migration.c >> +++ b/hw/vfio/migration.c >> @@ -8,6 +8,7 @@ >> */ >> >> #include "qemu/osdep.h" >> +#include "qemu/main-loop.h" >> #include <linux/vfio.h> >> >> #include "sysemu/runstate.h" >> @@ -24,6 +25,17 @@ >> #include "pci.h" >> #include "trace.h" >> >> +/* >> + * Flags used as delimiter: >> + * 0xffffffff => MSB 32-bit all 1s >> + * 0xef10 => emulated (virtual) function IO >> + * 0x0000 => 16-bits reserved for flags >> + */ >> +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) >> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) >> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) >> +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) >> + >> static void vfio_migration_region_exit(VFIODevice *vbasedev) >> { >> VFIOMigration *migration = vbasedev->migration; >> @@ -126,6 +138,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, >> return 0; >> } >> >> +/* ---------------------------------------------------------------------- */ >> + >> +static int vfio_save_setup(QEMUFile *f, void *opaque) >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + int ret; >> + >> + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); >> + >> + if (migration->region.mmaps) { >> + qemu_mutex_lock_iothread(); >> + ret = vfio_region_mmap(&migration->region); >> + qemu_mutex_unlock_iothread(); >> + if (ret) { >> + error_report("%s: Failed to mmap VFIO migration region %d: %s", >> + vbasedev->name, migration->region.index, >> + strerror(-ret)); >> + return ret; >> + } >> + } >> + >> + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); >> + if (ret) { >> + error_report("%s: Failed to set state SAVING", vbasedev->name); >> + return ret; >> + } >> + >> + /* >> + * Save migration region size. This is used to verify migration region size >> + * is greater than or equal to migration region size at destination >> + */ >> + qemu_put_be64(f, migration->region.size); >> + >> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); > > OK, good, so now we can change that to something else if you want to > migrate something extra in the future. > >> + ret = qemu_file_get_error(f); >> + if (ret) { >> + return ret; >> + } >> + >> + trace_vfio_save_setup(vbasedev->name); > > I'd put that trace at the start of the function. > >> + return 0; >> +} >> + >> +static void vfio_save_cleanup(void *opaque) >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + >> + if (migration->region.mmaps) { >> + vfio_region_unmap(&migration->region); >> + } >> + trace_vfio_save_cleanup(vbasedev->name); >> +} >> + >> +static SaveVMHandlers savevm_vfio_handlers = { >> + .save_setup = vfio_save_setup, >> + .save_cleanup = vfio_save_cleanup, >> +}; >> + >> +/* ---------------------------------------------------------------------- */ >> + >> static void vfio_vmstate_change(void *opaque, int running, RunState state) >> { >> VFIODevice *vbasedev = opaque; >> @@ -191,6 +266,7 @@ static int vfio_migration_init(VFIODevice *vbasedev, >> return ret; >> } >> >> + register_savevm_live("vfio", -1, 1, &savevm_vfio_handlers, vbasedev); > > That doesn't look right to me; firstly the -1 should now be > VMSTATE_INSTANCE_ID_ANY - after the recent change in commit 1df2c9a > > Have you tried this with two vfio devices? Yes. And it works with multiple vfio devices. Thanks, Kirti > This is quite rare - it's an iterative device that can have > multiple instances; if you look at 'ram' for example, all the RAM > instances are handled inside the save_setup/save for the one instance of > 'ram'. I think here you're trying to register an individual vfio > device, so if you had multiple devices you'd see this called twice. > > So either you need to make vfio_save_* do all of the devices in a loop - > which feels like a bad idea; or replace "vfio" in that call by a unique > device name; as long as your device has a bus path then you should be > able to use the same trick vmstate_register_with_alias_id does, and use > I think, vmstate_if_get_id(VMSTAETE_IF(vbasedev)). > > but it might take some experimentation since this is an odd use. > >> vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, >> vbasedev); >> >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index 69503228f20e..4bb43f18f315 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -149,3 +149,5 @@ vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d" >> vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" >> vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" >> vfio_migration_state_notifier(char *name, int state) " (%s) state %d" >> +vfio_save_setup(char *name) " (%s)" >> +vfio_save_cleanup(char *name) " (%s)" >> -- >> 2.7.0 >> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >
On Tue, 5 May 2020 04:49:10 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 3/26/2020 2:32 AM, Alex Williamson wrote: > > On Wed, 25 Mar 2020 02:39:06 +0530 > > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > >> Define flags to be used as delimeter in migration file stream. > >> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > >> region from these functions at source during saving or pre-copy phase. > >> Set VFIO device state depending on VM's state. During live migration, VM is > >> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > >> device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > >> > >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > >> Reviewed-by: Neo Jia <cjia@nvidia.com> > >> --- > >> hw/vfio/migration.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > >> hw/vfio/trace-events | 2 ++ > >> 2 files changed, 78 insertions(+) > >> > >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > >> index 22ded9d28cf3..033f76526e49 100644 > >> --- a/hw/vfio/migration.c > >> +++ b/hw/vfio/migration.c > >> @@ -8,6 +8,7 @@ > >> */ > >> > >> #include "qemu/osdep.h" > >> +#include "qemu/main-loop.h" > >> #include <linux/vfio.h> > >> > >> #include "sysemu/runstate.h" > >> @@ -24,6 +25,17 @@ > >> #include "pci.h" > >> #include "trace.h" > >> > >> +/* > >> + * Flags used as delimiter: > >> + * 0xffffffff => MSB 32-bit all 1s > >> + * 0xef10 => emulated (virtual) function IO > >> + * 0x0000 => 16-bits reserved for flags > >> + */ > >> +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > >> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > >> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > >> +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > >> + > >> static void vfio_migration_region_exit(VFIODevice *vbasedev) > >> { > >> VFIOMigration *migration = vbasedev->migration; > >> @@ -126,6 +138,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > >> return 0; > >> } > >> > >> +/* ---------------------------------------------------------------------- */ > >> + > >> +static int vfio_save_setup(QEMUFile *f, void *opaque) > >> +{ > >> + VFIODevice *vbasedev = opaque; > >> + VFIOMigration *migration = vbasedev->migration; > >> + int ret; > >> + > >> + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > >> + > >> + if (migration->region.mmaps) { > >> + qemu_mutex_lock_iothread(); > >> + ret = vfio_region_mmap(&migration->region); > >> + qemu_mutex_unlock_iothread(); > >> + if (ret) { > >> + error_report("%s: Failed to mmap VFIO migration region %d: %s", > >> + vbasedev->name, migration->region.index, > >> + strerror(-ret)); > >> + return ret; > >> + } > >> + } > >> + > >> + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); > >> + if (ret) { > >> + error_report("%s: Failed to set state SAVING", vbasedev->name); > >> + return ret; > >> + } > >> + > >> + /* > >> + * Save migration region size. This is used to verify migration region size > >> + * is greater than or equal to migration region size at destination > >> + */ > >> + qemu_put_be64(f, migration->region.size); > > > > Is this requirement supported by the uapi? > > Yes, on UAPI thread we discussed this: > > * For the user application, data is opaque. The user application > should write > * data in the same order as the data is received and the data should be of > * same transaction size at the source. > > data should be same transaction size, so migration region size should be > greater than or equal to the size at source when verifying at destination. We are that user application for which the data is opaque, therefore we should make no assumptions about how the vendor driver makes use of their region. If we get a transaction that exceeds the end of the region, I agree, that would be an error. But we have no business predicting that such a transaction might occur if the vendor driver indicates it can support the migration. > > The vendor driver operates > > within the migration region, but it has no requirement to use the full > > extent of the region. Shouldn't we instead insert the version string > > from versioning API Yan proposed? Is this were we might choose to use > > an interface via the vfio API rather than sysfs if we had one? > > > > VFIO API cannot be used by libvirt or management tool stack. We need > sysfs as Yan proposed to be used by libvirt or management tool stack. It's been a long time, but that doesn't seem like what I was asking. The sysfs version checking is used to select a target that is likely to succeed, but the migration stream is still generated by a user and the vendor driver is still ultimately responsible for validating that stream. I would hope that a vendor migration stream therefore starts with information similar to that found in the sysfs interface, allowing the receiving vendor driver to validate the source device and vendor software version, such that we can fail an incoming migration that the vendor driver deems incompatible. Ideally the vendor driver might also include consistency and sequence checking throughout the stream to prevent a malicious user from exploiting the internal operation of the vendor driver. Thanks, Alex
On Tue, May 05, 2020 at 12:37:26PM +0800, Alex Williamson wrote: > On Tue, 5 May 2020 04:49:10 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > On 3/26/2020 2:32 AM, Alex Williamson wrote: > > > On Wed, 25 Mar 2020 02:39:06 +0530 > > > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > > > >> Define flags to be used as delimeter in migration file stream. > > >> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > > >> region from these functions at source during saving or pre-copy phase. > > >> Set VFIO device state depending on VM's state. During live migration, VM is > > >> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > > >> device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > > >> > > >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > > >> Reviewed-by: Neo Jia <cjia@nvidia.com> > > >> --- > > >> hw/vfio/migration.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > > >> hw/vfio/trace-events | 2 ++ > > >> 2 files changed, 78 insertions(+) > > >> > > >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > > >> index 22ded9d28cf3..033f76526e49 100644 > > >> --- a/hw/vfio/migration.c > > >> +++ b/hw/vfio/migration.c > > >> @@ -8,6 +8,7 @@ > > >> */ > > >> > > >> #include "qemu/osdep.h" > > >> +#include "qemu/main-loop.h" > > >> #include <linux/vfio.h> > > >> > > >> #include "sysemu/runstate.h" > > >> @@ -24,6 +25,17 @@ > > >> #include "pci.h" > > >> #include "trace.h" > > >> > > >> +/* > > >> + * Flags used as delimiter: > > >> + * 0xffffffff => MSB 32-bit all 1s > > >> + * 0xef10 => emulated (virtual) function IO > > >> + * 0x0000 => 16-bits reserved for flags > > >> + */ > > >> +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > > >> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > > >> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > > >> +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > > >> + > > >> static void vfio_migration_region_exit(VFIODevice *vbasedev) > > >> { > > >> VFIOMigration *migration = vbasedev->migration; > > >> @@ -126,6 +138,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > > >> return 0; > > >> } > > >> > > >> +/* ---------------------------------------------------------------------- */ > > >> + > > >> +static int vfio_save_setup(QEMUFile *f, void *opaque) > > >> +{ > > >> + VFIODevice *vbasedev = opaque; > > >> + VFIOMigration *migration = vbasedev->migration; > > >> + int ret; > > >> + > > >> + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > > >> + > > >> + if (migration->region.mmaps) { > > >> + qemu_mutex_lock_iothread(); > > >> + ret = vfio_region_mmap(&migration->region); > > >> + qemu_mutex_unlock_iothread(); > > >> + if (ret) { > > >> + error_report("%s: Failed to mmap VFIO migration region %d: %s", > > >> + vbasedev->name, migration->region.index, > > >> + strerror(-ret)); > > >> + return ret; > > >> + } > > >> + } > > >> + > > >> + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); > > >> + if (ret) { > > >> + error_report("%s: Failed to set state SAVING", vbasedev->name); > > >> + return ret; > > >> + } > > >> + > > >> + /* > > >> + * Save migration region size. This is used to verify migration region size > > >> + * is greater than or equal to migration region size at destination > > >> + */ > > >> + qemu_put_be64(f, migration->region.size); > > > > > > Is this requirement supported by the uapi? > > > > Yes, on UAPI thread we discussed this: > > > > * For the user application, data is opaque. The user application > > should write > > * data in the same order as the data is received and the data should be of > > * same transaction size at the source. > > > > data should be same transaction size, so migration region size should be > > greater than or equal to the size at source when verifying at destination. > > We are that user application for which the data is opaque, therefore we > should make no assumptions about how the vendor driver makes use of > their region. If we get a transaction that exceeds the end of the > region, I agree, that would be an error. But we have no business > predicting that such a transaction might occur if the vendor driver > indicates it can support the migration. > > > > The vendor driver operates > > > within the migration region, but it has no requirement to use the full > > > extent of the region. Shouldn't we instead insert the version string > > > from versioning API Yan proposed? Is this were we might choose to use > > > an interface via the vfio API rather than sysfs if we had one? > > > > > > > VFIO API cannot be used by libvirt or management tool stack. We need > > sysfs as Yan proposed to be used by libvirt or management tool stack. > > It's been a long time, but that doesn't seem like what I was asking. > The sysfs version checking is used to select a target that is likely to > succeed, but the migration stream is still generated by a user and the > vendor driver is still ultimately responsible for validating that > stream. I would hope that a vendor migration stream therefore starts > with information similar to that found in the sysfs interface, allowing > the receiving vendor driver to validate the source device and vendor > software version, such that we can fail an incoming migration that the > vendor driver deems incompatible. Ideally the vendor driver might also > include consistency and sequence checking throughout the stream to > prevent a malicious user from exploiting the internal operation of the > vendor driver. Thanks, > maybe we can add a rw field migration_version in struct vfio_device_migration_info besides sysfs interface ? when reading it in src, it gets the same string as that from sysfs; when writing it in target, it returns success or not to check compatibility and fails the migration early in setup phase. Thanks Yan.
On Wed, 6 May 2020 02:38:46 -0400 Yan Zhao <yan.y.zhao@intel.com> wrote: > On Tue, May 05, 2020 at 12:37:26PM +0800, Alex Williamson wrote: > > It's been a long time, but that doesn't seem like what I was asking. > > The sysfs version checking is used to select a target that is likely to > > succeed, but the migration stream is still generated by a user and the > > vendor driver is still ultimately responsible for validating that > > stream. I would hope that a vendor migration stream therefore starts > > with information similar to that found in the sysfs interface, allowing > > the receiving vendor driver to validate the source device and vendor > > software version, such that we can fail an incoming migration that the > > vendor driver deems incompatible. Ideally the vendor driver might also > > include consistency and sequence checking throughout the stream to > > prevent a malicious user from exploiting the internal operation of the > > vendor driver. Thanks, Some kind of somewhat standardized marker for driver/version seems like a good idea. Further checking is also a good idea, but I think the details of that need to be left to the individual drivers. > > > maybe we can add a rw field migration_version in > struct vfio_device_migration_info besides sysfs interface ? > > when reading it in src, it gets the same string as that from sysfs; > when writing it in target, it returns success or not to check > compatibility and fails the migration early in setup phase. Getting both populated from the same source seems like a good idea. Not sure if a string is the best value to put into a migration stream; maybe the sysfs interface can derive a human-readable string from a more compact value to be put into the migration region (and ultimately the stream)? Might be overengineering, just thinking out aloud here.
* Cornelia Huck (cohuck@redhat.com) wrote: > On Wed, 6 May 2020 02:38:46 -0400 > Yan Zhao <yan.y.zhao@intel.com> wrote: > > > On Tue, May 05, 2020 at 12:37:26PM +0800, Alex Williamson wrote: > > > It's been a long time, but that doesn't seem like what I was asking. > > > The sysfs version checking is used to select a target that is likely to > > > succeed, but the migration stream is still generated by a user and the > > > vendor driver is still ultimately responsible for validating that > > > stream. I would hope that a vendor migration stream therefore starts > > > with information similar to that found in the sysfs interface, allowing > > > the receiving vendor driver to validate the source device and vendor > > > software version, such that we can fail an incoming migration that the > > > vendor driver deems incompatible. Ideally the vendor driver might also > > > include consistency and sequence checking throughout the stream to > > > prevent a malicious user from exploiting the internal operation of the > > > vendor driver. Thanks, > > Some kind of somewhat standardized marker for driver/version seems like > a good idea. Further checking is also a good idea, but I think the > details of that need to be left to the individual drivers. Standardised markers like that would be useful; although the rules of how to compare them might be a bit vendor specific; but still - it would be good for us to be able to dump something out when it all goes wrong. > > > > > maybe we can add a rw field migration_version in > > struct vfio_device_migration_info besides sysfs interface ? > > > > when reading it in src, it gets the same string as that from sysfs; > > when writing it in target, it returns success or not to check > > compatibility and fails the migration early in setup phase. > > Getting both populated from the same source seems like a good idea. > > Not sure if a string is the best value to put into a migration stream; > maybe the sysfs interface can derive a human-readable string from a > more compact value to be put into the migration region (and ultimately > the stream)? Might be overengineering, just thinking out aloud here. A string might be OK fi you specify a little about it. Dave -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 5/6/2020 10:23 PM, Dr. David Alan Gilbert wrote: > * Cornelia Huck (cohuck@redhat.com) wrote: >> On Wed, 6 May 2020 02:38:46 -0400 >> Yan Zhao <yan.y.zhao@intel.com> wrote: >> >>> On Tue, May 05, 2020 at 12:37:26PM +0800, Alex Williamson wrote: >>>> It's been a long time, but that doesn't seem like what I was asking. >>>> The sysfs version checking is used to select a target that is likely to >>>> succeed, but the migration stream is still generated by a user and the >>>> vendor driver is still ultimately responsible for validating that >>>> stream. I would hope that a vendor migration stream therefore starts >>>> with information similar to that found in the sysfs interface, allowing >>>> the receiving vendor driver to validate the source device and vendor >>>> software version, such that we can fail an incoming migration that the >>>> vendor driver deems incompatible. Ideally the vendor driver might also >>>> include consistency and sequence checking throughout the stream to >>>> prevent a malicious user from exploiting the internal operation of the >>>> vendor driver. Thanks, >> >> Some kind of somewhat standardized marker for driver/version seems like >> a good idea. Further checking is also a good idea, but I think the >> details of that need to be left to the individual drivers. > > Standardised markers like that would be useful; although the rules of > how to compare them might be a bit vendor specific; but still - it would > be good for us to be able to dump something out when it all goes wrong. > Such checking should already there in vendor driver. Vendor driver might also support across version migration. I think checking in QEMU again would be redundant. Let vendor driver handle version checks. Thanks, Kirti >>>> >>> maybe we can add a rw field migration_version in >>> struct vfio_device_migration_info besides sysfs interface ? >>> >>> when reading it in src, it gets the same string as that from sysfs; >>> when writing it in target, it returns success or not to check >>> compatibility and fails the migration early in setup phase. >> >> Getting both populated from the same source seems like a good idea. >> >> Not sure if a string is the best value to put into a migration stream; >> maybe the sysfs interface can derive a human-readable string from a >> more compact value to be put into the migration region (and ultimately >> the stream)? Might be overengineering, just thinking out aloud here. > > A string might be OK fi you specify a little about it. > > Dave > > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >
On Thu, 7 May 2020 01:00:05 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 5/6/2020 10:23 PM, Dr. David Alan Gilbert wrote: > > * Cornelia Huck (cohuck@redhat.com) wrote: > >> On Wed, 6 May 2020 02:38:46 -0400 > >> Yan Zhao <yan.y.zhao@intel.com> wrote: > >> > >>> On Tue, May 05, 2020 at 12:37:26PM +0800, Alex Williamson wrote: > >>>> It's been a long time, but that doesn't seem like what I was asking. > >>>> The sysfs version checking is used to select a target that is likely to > >>>> succeed, but the migration stream is still generated by a user and the > >>>> vendor driver is still ultimately responsible for validating that > >>>> stream. I would hope that a vendor migration stream therefore starts > >>>> with information similar to that found in the sysfs interface, allowing > >>>> the receiving vendor driver to validate the source device and vendor > >>>> software version, such that we can fail an incoming migration that the > >>>> vendor driver deems incompatible. Ideally the vendor driver might also > >>>> include consistency and sequence checking throughout the stream to > >>>> prevent a malicious user from exploiting the internal operation of the > >>>> vendor driver. Thanks, > >> > >> Some kind of somewhat standardized marker for driver/version seems like > >> a good idea. Further checking is also a good idea, but I think the > >> details of that need to be left to the individual drivers. > > > > Standardised markers like that would be useful; although the rules of > > how to compare them might be a bit vendor specific; but still - it would > > be good for us to be able to dump something out when it all goes wrong. > > > > Such checking should already there in vendor driver. Vendor driver might > also support across version migration. I think checking in QEMU again > would be redundant. Let vendor driver handle version checks. Of course the actual rules of what is supported and what not are vendor driver specific -- but we can still benefit from some standardization. It ensures that this checking is not forgotten, and it can help with figuring out what went wrong.
On Thu, 7 May 2020 01:00:05 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 5/6/2020 10:23 PM, Dr. David Alan Gilbert wrote: > > * Cornelia Huck (cohuck@redhat.com) wrote: > >> On Wed, 6 May 2020 02:38:46 -0400 > >> Yan Zhao <yan.y.zhao@intel.com> wrote: > >> > >>> On Tue, May 05, 2020 at 12:37:26PM +0800, Alex Williamson wrote: > >>>> It's been a long time, but that doesn't seem like what I was asking. > >>>> The sysfs version checking is used to select a target that is likely to > >>>> succeed, but the migration stream is still generated by a user and the > >>>> vendor driver is still ultimately responsible for validating that > >>>> stream. I would hope that a vendor migration stream therefore starts > >>>> with information similar to that found in the sysfs interface, allowing > >>>> the receiving vendor driver to validate the source device and vendor > >>>> software version, such that we can fail an incoming migration that the > >>>> vendor driver deems incompatible. Ideally the vendor driver might also > >>>> include consistency and sequence checking throughout the stream to > >>>> prevent a malicious user from exploiting the internal operation of the > >>>> vendor driver. Thanks, > >> > >> Some kind of somewhat standardized marker for driver/version seems like > >> a good idea. Further checking is also a good idea, but I think the > >> details of that need to be left to the individual drivers. > > > > Standardised markers like that would be useful; although the rules of > > how to compare them might be a bit vendor specific; but still - it would > > be good for us to be able to dump something out when it all goes wrong. > > > > Such checking should already there in vendor driver. Vendor driver might > also support across version migration. I think checking in QEMU again > would be redundant. Let vendor driver handle version checks. > > >>>> > >>> maybe we can add a rw field migration_version in > >>> struct vfio_device_migration_info besides sysfs interface ? > >>> > >>> when reading it in src, it gets the same string as that from sysfs; > >>> when writing it in target, it returns success or not to check > >>> compatibility and fails the migration early in setup phase. > >> > >> Getting both populated from the same source seems like a good idea. > >> > >> Not sure if a string is the best value to put into a migration stream; > >> maybe the sysfs interface can derive a human-readable string from a > >> more compact value to be put into the migration region (and ultimately > >> the stream)? Might be overengineering, just thinking out aloud here. > > > > A string might be OK fi you specify a little about it. I think we've already hashed through that the version is represented by a string, but interpretation of that string is reserved for the vendor driver. I believe this particular thread started out as a question of whether QEMU is right to validate target compatibility by comparing the migration region size versus the source, which I see as an overstep of leaving the compatibility testing to the vendor driver. A write exceeding the migration region is clearly a protocol violation, but unless we're going to scan the entire migration stream to look for that violation, it's the vendor driver's business where and how it exposes data within the region. IOW, different migration region sizes might suggest to be suspicious, but nothing in our specification requires that the target region is at least as big as the source. If we had a mechanism to report and test the migration version through this migration API, using similar semantics to the sysfs interface, what would we actually do with it? The vendor driver's processing of an incoming migration stream cannot rely on the user. I initially struggled with Kirti's use of "should" rather than "must" in describing this checking, but I think that might actually be correct. If a user chooses to ignore the sysfs interface for compatibility testing, or otherwise chooses to allow the data stream to be corrupted or manipulated, I think the only requirement of the vendor driver is to contain the damage to the user's device. So, I think we're really looking at whether it's a benefit to the user to be able to retrieve the version and test it on the target through the migration API. IOW, is it sufficient for QEMU to presume that a well informed agent, that has already tested the source and target device compatibility, has setup this migration and that a well supported mdev vendor driver should fail the migration gracefully if the versions are incompatible, or contain the error within the user's device otherwise, or is there value to be gained if QEMU performs a separate compatibility test? Thanks, Alex
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 22ded9d28cf3..033f76526e49 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" +#include "qemu/main-loop.h" #include <linux/vfio.h> #include "sysemu/runstate.h" @@ -24,6 +25,17 @@ #include "pci.h" #include "trace.h" +/* + * Flags used as delimiter: + * 0xffffffff => MSB 32-bit all 1s + * 0xef10 => emulated (virtual) function IO + * 0x0000 => 16-bits reserved for flags + */ +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) + static void vfio_migration_region_exit(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; @@ -126,6 +138,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, return 0; } +/* ---------------------------------------------------------------------- */ + +static int vfio_save_setup(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + int ret; + + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); + + if (migration->region.mmaps) { + qemu_mutex_lock_iothread(); + ret = vfio_region_mmap(&migration->region); + qemu_mutex_unlock_iothread(); + if (ret) { + error_report("%s: Failed to mmap VFIO migration region %d: %s", + vbasedev->name, migration->region.index, + strerror(-ret)); + return ret; + } + } + + ret = vfio_migration_set_state(vbasedev, ~0, VFIO_DEVICE_STATE_SAVING); + if (ret) { + error_report("%s: Failed to set state SAVING", vbasedev->name); + return ret; + } + + /* + * Save migration region size. This is used to verify migration region size + * is greater than or equal to migration region size at destination + */ + qemu_put_be64(f, migration->region.size); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + ret = qemu_file_get_error(f); + if (ret) { + return ret; + } + + trace_vfio_save_setup(vbasedev->name); + return 0; +} + +static void vfio_save_cleanup(void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + + if (migration->region.mmaps) { + vfio_region_unmap(&migration->region); + } + trace_vfio_save_cleanup(vbasedev->name); +} + +static SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, +}; + +/* ---------------------------------------------------------------------- */ + static void vfio_vmstate_change(void *opaque, int running, RunState state) { VFIODevice *vbasedev = opaque; @@ -191,6 +266,7 @@ static int vfio_migration_init(VFIODevice *vbasedev, return ret; } + register_savevm_live("vfio", -1, 1, &savevm_vfio_handlers, vbasedev); vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, vbasedev); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 69503228f20e..4bb43f18f315 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -149,3 +149,5 @@ vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d" vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" vfio_migration_state_notifier(char *name, int state) " (%s) state %d" +vfio_save_setup(char *name) " (%s)" +vfio_save_cleanup(char *name) " (%s)"