diff mbox series

[QEMU,v25,05/17] vfio: Add VM state change handler to know state of VM

Message ID 1592684486-18511-6-git-send-email-kwankhede@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Add migration support for VFIO devices | expand

Commit Message

Kirti Wankhede June 20, 2020, 8:21 p.m. UTC
VM state change handler gets called on change in VM's state. This is used to set
VFIO device state to _RUNNING.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 hw/vfio/migration.c           | 87 +++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/trace-events          |  2 +
 include/hw/vfio/vfio-common.h |  4 ++
 3 files changed, 93 insertions(+)

Comments

Alex Williamson June 22, 2020, 10:50 p.m. UTC | #1
On Sun, 21 Jun 2020 01:51:14 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> VM state change handler gets called on change in VM's state. This is used to set
> VFIO device state to _RUNNING.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  hw/vfio/migration.c           | 87 +++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/trace-events          |  2 +
>  include/hw/vfio/vfio-common.h |  4 ++
>  3 files changed, 93 insertions(+)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 48ac385d80a7..fcecc0bb0874 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -10,6 +10,7 @@
>  #include "qemu/osdep.h"
>  #include <linux/vfio.h>
>  
> +#include "sysemu/runstate.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "cpu.h"
>  #include "migration/migration.h"
> @@ -74,6 +75,85 @@ err:
>      return ret;
>  }
>  
> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
> +                                    uint32_t value)
> +{
> +    VFIOMigration *migration = vbasedev->migration;
> +    VFIORegion *region = &migration->region;
> +    uint32_t device_state;
> +    int ret;
> +
> +    ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
> +                region->fd_offset + offsetof(struct vfio_device_migration_info,
> +                                              device_state));
> +    if (ret < 0) {
> +        error_report("%s: Failed to read device state %d %s",
> +                     vbasedev->name, ret, strerror(errno));
> +        return ret;
> +    }
> +
> +    device_state = (device_state & mask) | value;
> +
> +    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
> +        return -EINVAL;
> +    }
> +
> +    ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state),
> +                 region->fd_offset + offsetof(struct vfio_device_migration_info,
> +                                              device_state));
> +    if (ret < 0) {
> +        error_report("%s: Failed to set device state %d %s",
> +                     vbasedev->name, ret, strerror(errno));
> +
> +        ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
> +                region->fd_offset + offsetof(struct vfio_device_migration_info,
> +                device_state));
> +        if (ret < 0) {
> +            error_report("%s: On failure, failed to read device state %d %s",
> +                    vbasedev->name, ret, strerror(errno));
> +            return ret;
> +        }
> +
> +        if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) {
> +            error_report("%s: Device is in error state 0x%x",
> +                         vbasedev->name, device_state);
> +            return -EFAULT;
> +        }
> +    }
> +
> +    vbasedev->device_state = device_state;
> +    trace_vfio_migration_set_state(vbasedev->name, device_state);
> +    return 0;
> +}
> +
> +static void vfio_vmstate_change(void *opaque, int running, RunState state)
> +{
> +    VFIODevice *vbasedev = opaque;
> +
> +    if ((vbasedev->vm_running != running)) {
> +        int ret;
> +        uint32_t value = 0, mask = 0;
> +
> +        if (running) {
> +            value = VFIO_DEVICE_STATE_RUNNING;
> +            if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) {
> +                mask = ~VFIO_DEVICE_STATE_RESUMING;
> +            }
> +        } else {
> +            mask = ~VFIO_DEVICE_STATE_RUNNING;
> +        }
> +
> +        ret = vfio_migration_set_state(vbasedev, mask, value);
> +        if (ret) {
> +            error_report("%s: Failed to set device state 0x%x",
> +                         vbasedev->name, value & mask);


Is there nothing more we should do here?  It seems like in either the
case of an outbound migration where we can't stop the device or an
inbound migration where we can't start the device, we'd want this to
trigger an abort of the migration.  Should there at least be a TODO
comment if the reason is that QEMU migration doesn't yet support failure
here?  Thanks,

Alex

> +        }
> +        vbasedev->vm_running = running;
> +        trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
> +                                  value & mask);
> +    }
> +}
> +
>  static int vfio_migration_init(VFIODevice *vbasedev,
>                                 struct vfio_region_info *info)
>  {
> @@ -87,8 +167,11 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>                       vbasedev->name);
>          g_free(vbasedev->migration);
>          vbasedev->migration = NULL;
> +        return ret;
>      }
>  
> +    vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
> +                                                          vbasedev);
>      return ret;
>  }
>  
> @@ -131,6 +214,10 @@ add_blocker:
>  
>  void vfio_migration_finalize(VFIODevice *vbasedev)
>  {
> +    if (vbasedev->vm_state) {
> +        qemu_del_vm_change_state_handler(vbasedev->vm_state);
> +    }
> +
>      if (vbasedev->migration_blocker) {
>          migrate_del_blocker(vbasedev->migration_blocker);
>          error_free(vbasedev->migration_blocker);
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index fd034ac53684..14b0a86c0035 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) ""
>  
>  # migration.c
>  vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
> +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d"
> +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index d4b268641173..3d18eb146b33 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -29,6 +29,7 @@
>  #ifdef CONFIG_LINUX
>  #include <linux/vfio.h>
>  #endif
> +#include "sysemu/sysemu.h"
>  
>  #define VFIO_MSG_PREFIX "vfio %s: "
>  
> @@ -119,6 +120,9 @@ typedef struct VFIODevice {
>      unsigned int flags;
>      VFIOMigration *migration;
>      Error *migration_blocker;
> +    VMChangeStateEntry *vm_state;
> +    uint32_t device_state;
> +    int vm_running;
>  } VFIODevice;
>  
>  struct VFIODeviceOps {
Cornelia Huck June 23, 2020, 8:07 a.m. UTC | #2
On Sun, 21 Jun 2020 01:51:14 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> VM state change handler gets called on change in VM's state. This is used to set
> VFIO device state to _RUNNING.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  hw/vfio/migration.c           | 87 +++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/trace-events          |  2 +
>  include/hw/vfio/vfio-common.h |  4 ++
>  3 files changed, 93 insertions(+)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 48ac385d80a7..fcecc0bb0874 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -10,6 +10,7 @@
>  #include "qemu/osdep.h"
>  #include <linux/vfio.h>
>  
> +#include "sysemu/runstate.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "cpu.h"
>  #include "migration/migration.h"
> @@ -74,6 +75,85 @@ err:
>      return ret;
>  }
>  
> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
> +                                    uint32_t value)

I commented on this interface already in
https://lore.kernel.org/qemu-devel/20200505120459.62bd0b16.cohuck@redhat.com/,
but I did not see any reply... I guess my comments still apply (here
and below).

> +{
> +    VFIOMigration *migration = vbasedev->migration;
> +    VFIORegion *region = &migration->region;
> +    uint32_t device_state;
> +    int ret;
> +
> +    ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
> +                region->fd_offset + offsetof(struct vfio_device_migration_info,
> +                                              device_state));
> +    if (ret < 0) {
> +        error_report("%s: Failed to read device state %d %s",
> +                     vbasedev->name, ret, strerror(errno));
> +        return ret;
> +    }
> +
> +    device_state = (device_state & mask) | value;
> +
> +    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
> +        return -EINVAL;
> +    }
> +
> +    ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state),
> +                 region->fd_offset + offsetof(struct vfio_device_migration_info,
> +                                              device_state));
> +    if (ret < 0) {
> +        error_report("%s: Failed to set device state %d %s",
> +                     vbasedev->name, ret, strerror(errno));
> +
> +        ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
> +                region->fd_offset + offsetof(struct vfio_device_migration_info,
> +                device_state));
> +        if (ret < 0) {
> +            error_report("%s: On failure, failed to read device state %d %s",
> +                    vbasedev->name, ret, strerror(errno));
> +            return ret;
> +        }
> +
> +        if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) {
> +            error_report("%s: Device is in error state 0x%x",
> +                         vbasedev->name, device_state);
> +            return -EFAULT;

E.g., why -EFAULT here?

> +        }
> +    }
> +
> +    vbasedev->device_state = device_state;
> +    trace_vfio_migration_set_state(vbasedev->name, device_state);
> +    return 0;
> +}
> +
> +static void vfio_vmstate_change(void *opaque, int running, RunState state)
> +{
> +    VFIODevice *vbasedev = opaque;
> +
> +    if ((vbasedev->vm_running != running)) {
> +        int ret;
> +        uint32_t value = 0, mask = 0;
> +
> +        if (running) {
> +            value = VFIO_DEVICE_STATE_RUNNING;
> +            if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) {
> +                mask = ~VFIO_DEVICE_STATE_RESUMING;
> +            }
> +        } else {
> +            mask = ~VFIO_DEVICE_STATE_RUNNING;
> +        }
> +
> +        ret = vfio_migration_set_state(vbasedev, mask, value);
> +        if (ret) {
> +            error_report("%s: Failed to set device state 0x%x",
> +                         vbasedev->name, value & mask);
> +        }
> +        vbasedev->vm_running = running;
> +        trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
> +                                  value & mask);
> +    }
> +}
> +
>  static int vfio_migration_init(VFIODevice *vbasedev,
>                                 struct vfio_region_info *info)
>  {
Kirti Wankhede June 23, 2020, 6:55 p.m. UTC | #3
On 6/23/2020 4:20 AM, Alex Williamson wrote:
> On Sun, 21 Jun 2020 01:51:14 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
>> VM state change handler gets called on change in VM's state. This is used to set
>> VFIO device state to _RUNNING.
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> ---
>>   hw/vfio/migration.c           | 87 +++++++++++++++++++++++++++++++++++++++++++
>>   hw/vfio/trace-events          |  2 +
>>   include/hw/vfio/vfio-common.h |  4 ++
>>   3 files changed, 93 insertions(+)
>>
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 48ac385d80a7..fcecc0bb0874 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -10,6 +10,7 @@
>>   #include "qemu/osdep.h"
>>   #include <linux/vfio.h>
>>   
>> +#include "sysemu/runstate.h"
>>   #include "hw/vfio/vfio-common.h"
>>   #include "cpu.h"
>>   #include "migration/migration.h"
>> @@ -74,6 +75,85 @@ err:
>>       return ret;
>>   }
>>   
>> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
>> +                                    uint32_t value)
>> +{
>> +    VFIOMigration *migration = vbasedev->migration;
>> +    VFIORegion *region = &migration->region;
>> +    uint32_t device_state;
>> +    int ret;
>> +
>> +    ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
>> +                region->fd_offset + offsetof(struct vfio_device_migration_info,
>> +                                              device_state));
>> +    if (ret < 0) {
>> +        error_report("%s: Failed to read device state %d %s",
>> +                     vbasedev->name, ret, strerror(errno));
>> +        return ret;
>> +    }
>> +
>> +    device_state = (device_state & mask) | value;
>> +
>> +    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state),
>> +                 region->fd_offset + offsetof(struct vfio_device_migration_info,
>> +                                              device_state));
>> +    if (ret < 0) {
>> +        error_report("%s: Failed to set device state %d %s",
>> +                     vbasedev->name, ret, strerror(errno));
>> +
>> +        ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
>> +                region->fd_offset + offsetof(struct vfio_device_migration_info,
>> +                device_state));
>> +        if (ret < 0) {
>> +            error_report("%s: On failure, failed to read device state %d %s",
>> +                    vbasedev->name, ret, strerror(errno));
>> +            return ret;
>> +        }
>> +
>> +        if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) {
>> +            error_report("%s: Device is in error state 0x%x",
>> +                         vbasedev->name, device_state);
>> +            return -EFAULT;
>> +        }
>> +    }
>> +
>> +    vbasedev->device_state = device_state;
>> +    trace_vfio_migration_set_state(vbasedev->name, device_state);
>> +    return 0;
>> +}
>> +
>> +static void vfio_vmstate_change(void *opaque, int running, RunState state)
>> +{
>> +    VFIODevice *vbasedev = opaque;
>> +
>> +    if ((vbasedev->vm_running != running)) {
>> +        int ret;
>> +        uint32_t value = 0, mask = 0;
>> +
>> +        if (running) {
>> +            value = VFIO_DEVICE_STATE_RUNNING;
>> +            if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) {
>> +                mask = ~VFIO_DEVICE_STATE_RESUMING;
>> +            }
>> +        } else {
>> +            mask = ~VFIO_DEVICE_STATE_RUNNING;
>> +        }
>> +
>> +        ret = vfio_migration_set_state(vbasedev, mask, value);
>> +        if (ret) {
>> +            error_report("%s: Failed to set device state 0x%x",
>> +                         vbasedev->name, value & mask);
> 
> 
> Is there nothing more we should do here?  It seems like in either the
> case of an outbound migration where we can't stop the device or an
> inbound migration where we can't start the device, we'd want this to
> trigger an abort of the migration.  Should there at least be a TODO
> comment if the reason is that QEMU migration doesn't yet support failure
> here?  Thanks,
> 

Checked other modules in QEMU, at some places error message is reported 
as above while at some places abort() is called (for example 
kvmclock_vm_state_change() in hw/i386/kvm/clock.c). Abort will abort 
QEMU process, that is VM crash. Should we abort here on error case? 
Anyways VM will not recover properly on migration if there is such error.

Thanks,
Kirti
Dr. David Alan Gilbert June 26, 2020, 2:51 p.m. UTC | #4
* Kirti Wankhede (kwankhede@nvidia.com) wrote:
> 
> 
> On 6/23/2020 4:20 AM, Alex Williamson wrote:
> > On Sun, 21 Jun 2020 01:51:14 +0530
> > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> > 
> > > VM state change handler gets called on change in VM's state. This is used to set
> > > VFIO device state to _RUNNING.
> > > 
> > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> > > Reviewed-by: Neo Jia <cjia@nvidia.com>
> > > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > ---
> > >   hw/vfio/migration.c           | 87 +++++++++++++++++++++++++++++++++++++++++++
> > >   hw/vfio/trace-events          |  2 +
> > >   include/hw/vfio/vfio-common.h |  4 ++
> > >   3 files changed, 93 insertions(+)
> > > 
> > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> > > index 48ac385d80a7..fcecc0bb0874 100644
> > > --- a/hw/vfio/migration.c
> > > +++ b/hw/vfio/migration.c
> > > @@ -10,6 +10,7 @@
> > >   #include "qemu/osdep.h"
> > >   #include <linux/vfio.h>
> > > +#include "sysemu/runstate.h"
> > >   #include "hw/vfio/vfio-common.h"
> > >   #include "cpu.h"
> > >   #include "migration/migration.h"
> > > @@ -74,6 +75,85 @@ err:
> > >       return ret;
> > >   }
> > > +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
> > > +                                    uint32_t value)
> > > +{
> > > +    VFIOMigration *migration = vbasedev->migration;
> > > +    VFIORegion *region = &migration->region;
> > > +    uint32_t device_state;
> > > +    int ret;
> > > +
> > > +    ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
> > > +                region->fd_offset + offsetof(struct vfio_device_migration_info,
> > > +                                              device_state));
> > > +    if (ret < 0) {
> > > +        error_report("%s: Failed to read device state %d %s",
> > > +                     vbasedev->name, ret, strerror(errno));
> > > +        return ret;
> > > +    }
> > > +
> > > +    device_state = (device_state & mask) | value;
> > > +
> > > +    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
> > > +        return -EINVAL;
> > > +    }
> > > +
> > > +    ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state),
> > > +                 region->fd_offset + offsetof(struct vfio_device_migration_info,
> > > +                                              device_state));
> > > +    if (ret < 0) {
> > > +        error_report("%s: Failed to set device state %d %s",
> > > +                     vbasedev->name, ret, strerror(errno));
> > > +
> > > +        ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
> > > +                region->fd_offset + offsetof(struct vfio_device_migration_info,
> > > +                device_state));
> > > +        if (ret < 0) {
> > > +            error_report("%s: On failure, failed to read device state %d %s",
> > > +                    vbasedev->name, ret, strerror(errno));
> > > +            return ret;
> > > +        }
> > > +
> > > +        if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) {
> > > +            error_report("%s: Device is in error state 0x%x",
> > > +                         vbasedev->name, device_state);
> > > +            return -EFAULT;
> > > +        }
> > > +    }
> > > +
> > > +    vbasedev->device_state = device_state;
> > > +    trace_vfio_migration_set_state(vbasedev->name, device_state);
> > > +    return 0;
> > > +}
> > > +
> > > +static void vfio_vmstate_change(void *opaque, int running, RunState state)
> > > +{
> > > +    VFIODevice *vbasedev = opaque;
> > > +
> > > +    if ((vbasedev->vm_running != running)) {
> > > +        int ret;
> > > +        uint32_t value = 0, mask = 0;
> > > +
> > > +        if (running) {
> > > +            value = VFIO_DEVICE_STATE_RUNNING;
> > > +            if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) {
> > > +                mask = ~VFIO_DEVICE_STATE_RESUMING;
> > > +            }
> > > +        } else {
> > > +            mask = ~VFIO_DEVICE_STATE_RUNNING;
> > > +        }
> > > +
> > > +        ret = vfio_migration_set_state(vbasedev, mask, value);
> > > +        if (ret) {
> > > +            error_report("%s: Failed to set device state 0x%x",
> > > +                         vbasedev->name, value & mask);
> > 
> > 
> > Is there nothing more we should do here?  It seems like in either the
> > case of an outbound migration where we can't stop the device or an
> > inbound migration where we can't start the device, we'd want this to
> > trigger an abort of the migration.  Should there at least be a TODO
> > comment if the reason is that QEMU migration doesn't yet support failure
> > here?  Thanks,
> > 
> 
> Checked other modules in QEMU, at some places error message is reported as
> above while at some places abort() is called (for example
> kvmclock_vm_state_change() in hw/i386/kvm/clock.c). Abort will abort QEMU
> process, that is VM crash. Should we abort here on error case? Anyways VM
> will not recover properly on migration if there is such error.

I prefer to avoid aborts on migration if possible; unless the VM is
realyl dead.
Failing the migration is preferable.

Dave

> Thanks,
> Kirti
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 48ac385d80a7..fcecc0bb0874 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -10,6 +10,7 @@ 
 #include "qemu/osdep.h"
 #include <linux/vfio.h>
 
+#include "sysemu/runstate.h"
 #include "hw/vfio/vfio-common.h"
 #include "cpu.h"
 #include "migration/migration.h"
@@ -74,6 +75,85 @@  err:
     return ret;
 }
 
+static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
+                                    uint32_t value)
+{
+    VFIOMigration *migration = vbasedev->migration;
+    VFIORegion *region = &migration->region;
+    uint32_t device_state;
+    int ret;
+
+    ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
+                region->fd_offset + offsetof(struct vfio_device_migration_info,
+                                              device_state));
+    if (ret < 0) {
+        error_report("%s: Failed to read device state %d %s",
+                     vbasedev->name, ret, strerror(errno));
+        return ret;
+    }
+
+    device_state = (device_state & mask) | value;
+
+    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
+        return -EINVAL;
+    }
+
+    ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state),
+                 region->fd_offset + offsetof(struct vfio_device_migration_info,
+                                              device_state));
+    if (ret < 0) {
+        error_report("%s: Failed to set device state %d %s",
+                     vbasedev->name, ret, strerror(errno));
+
+        ret = pread(vbasedev->fd, &device_state, sizeof(device_state),
+                region->fd_offset + offsetof(struct vfio_device_migration_info,
+                device_state));
+        if (ret < 0) {
+            error_report("%s: On failure, failed to read device state %d %s",
+                    vbasedev->name, ret, strerror(errno));
+            return ret;
+        }
+
+        if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) {
+            error_report("%s: Device is in error state 0x%x",
+                         vbasedev->name, device_state);
+            return -EFAULT;
+        }
+    }
+
+    vbasedev->device_state = device_state;
+    trace_vfio_migration_set_state(vbasedev->name, device_state);
+    return 0;
+}
+
+static void vfio_vmstate_change(void *opaque, int running, RunState state)
+{
+    VFIODevice *vbasedev = opaque;
+
+    if ((vbasedev->vm_running != running)) {
+        int ret;
+        uint32_t value = 0, mask = 0;
+
+        if (running) {
+            value = VFIO_DEVICE_STATE_RUNNING;
+            if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) {
+                mask = ~VFIO_DEVICE_STATE_RESUMING;
+            }
+        } else {
+            mask = ~VFIO_DEVICE_STATE_RUNNING;
+        }
+
+        ret = vfio_migration_set_state(vbasedev, mask, value);
+        if (ret) {
+            error_report("%s: Failed to set device state 0x%x",
+                         vbasedev->name, value & mask);
+        }
+        vbasedev->vm_running = running;
+        trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
+                                  value & mask);
+    }
+}
+
 static int vfio_migration_init(VFIODevice *vbasedev,
                                struct vfio_region_info *info)
 {
@@ -87,8 +167,11 @@  static int vfio_migration_init(VFIODevice *vbasedev,
                      vbasedev->name);
         g_free(vbasedev->migration);
         vbasedev->migration = NULL;
+        return ret;
     }
 
+    vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
+                                                          vbasedev);
     return ret;
 }
 
@@ -131,6 +214,10 @@  add_blocker:
 
 void vfio_migration_finalize(VFIODevice *vbasedev)
 {
+    if (vbasedev->vm_state) {
+        qemu_del_vm_change_state_handler(vbasedev->vm_state);
+    }
+
     if (vbasedev->migration_blocker) {
         migrate_del_blocker(vbasedev->migration_blocker);
         error_free(vbasedev->migration_blocker);
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index fd034ac53684..14b0a86c0035 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -146,3 +146,5 @@  vfio_display_edid_write_error(void) ""
 
 # migration.c
 vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
+vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d"
+vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index d4b268641173..3d18eb146b33 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -29,6 +29,7 @@ 
 #ifdef CONFIG_LINUX
 #include <linux/vfio.h>
 #endif
+#include "sysemu/sysemu.h"
 
 #define VFIO_MSG_PREFIX "vfio %s: "
 
@@ -119,6 +120,9 @@  typedef struct VFIODevice {
     unsigned int flags;
     VFIOMigration *migration;
     Error *migration_blocker;
+    VMChangeStateEntry *vm_state;
+    uint32_t device_state;
+    int vm_running;
 } VFIODevice;
 
 struct VFIODeviceOps {