Message ID | 1600817059-26721-6-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add migration support for VFIO devices | expand |
On Wed, 23 Sep 2020 04:54:07 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > VM state change handler gets called on change in VM's state. This is used to set > VFIO device state to _RUNNING. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > hw/vfio/migration.c | 136 ++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 3 +- > include/hw/vfio/vfio-common.h | 4 ++ > 3 files changed, 142 insertions(+), 1 deletion(-) > (...) > +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > + uint32_t value) I think I've mentioned that before, but this function could really benefit from a comment what mask and value mean. > +{ > + VFIOMigration *migration = vbasedev->migration; > + VFIORegion *region = &migration->region; > + off_t dev_state_off = region->fd_offset + > + offsetof(struct vfio_device_migration_info, device_state); > + uint32_t device_state; > + int ret; > + > + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > + dev_state_off); > + if (ret < 0) { > + return ret; > + } > + > + device_state = (device_state & mask) | value; > + > + if (!VFIO_DEVICE_STATE_VALID(device_state)) { > + return -EINVAL; > + } > + > + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), > + dev_state_off); > + if (ret < 0) { > + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > + dev_state_off); > + if (ret < 0) { > + return ret; > + } > + > + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { > + hw_error("%s: Device is in error state 0x%x", > + vbasedev->name, device_state); > + return -EFAULT; Is -EFAULT a good return value here? Maybe -EIO? > + } > + } > + > + vbasedev->device_state = device_state; > + trace_vfio_migration_set_state(vbasedev->name, device_state); > + return 0; > +} > + > +static void vfio_vmstate_change(void *opaque, int running, RunState state) > +{ > + VFIODevice *vbasedev = opaque; > + > + if ((vbasedev->vm_running != running)) { > + int ret; > + uint32_t value = 0, mask = 0; > + > + if (running) { > + value = VFIO_DEVICE_STATE_RUNNING; > + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > + mask = ~VFIO_DEVICE_STATE_RESUMING; I've been staring at this for some time and I think that the desired result is - set _RUNNING - if _RESUMING was set, clear it, but leave the other bits intact - if _RESUMING was not set, clear everything previously set This would really benefit from a comment (or am I the only one struggling here?) > + } > + } else { > + mask = ~VFIO_DEVICE_STATE_RUNNING; > + } > + > + ret = vfio_migration_set_state(vbasedev, mask, value); > + if (ret) { > + /* > + * vm_state_notify() doesn't support reporting failure. If such > + * error reporting support added in furure, migration should be > + * aborted. "We should abort the migration in this case, but vm_state_notify() currently does not support reporting failures." ? Can/should we mark the failing device in some way? > + */ > + error_report("%s: Failed to set device state 0x%x", > + vbasedev->name, value & mask); > + } > + vbasedev->vm_running = running; > + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), > + value & mask); > + } > +} > + > static int vfio_migration_init(VFIODevice *vbasedev, > struct vfio_region_info *info) > { (...)
On Wed, 23 Sep 2020 04:54:07 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > VM state change handler gets called on change in VM's state. This is used to set > VFIO device state to _RUNNING. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > hw/vfio/migration.c | 136 ++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 3 +- > include/hw/vfio/vfio-common.h | 4 ++ > 3 files changed, 142 insertions(+), 1 deletion(-) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index 2f760f1f9c47..a30d628ba963 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -10,6 +10,7 @@ > #include "qemu/osdep.h" > #include <linux/vfio.h> > > +#include "sysemu/runstate.h" > #include "hw/vfio/vfio-common.h" > #include "cpu.h" > #include "migration/migration.h" > @@ -22,6 +23,58 @@ > #include "exec/ram_addr.h" > #include "pci.h" > #include "trace.h" > +#include "hw/hw.h" > + > +static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, > + off_t off, bool iswrite) > +{ > + int ret; > + > + ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : > + pread(vbasedev->fd, val, count, off); > + if (ret < count) { > + error_report("vfio_mig_%s%d %s: failed at offset 0x%lx, err: %s", > + iswrite ? "write" : "read", count * 8, > + vbasedev->name, off, strerror(errno)); This would suggest from the log that there's, for example, a vfio_mig_read8 function, which doesn't exist. > + return (ret < 0) ? ret : -EINVAL; > + } > + return 0; > +} > + > +static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, > + off_t off, bool iswrite) > +{ > + int ret, done = 0; > + __u8 *tbuf = buf; > + > + while (count) { > + int bytes = 0; > + > + if (count >= 8 && !(off % 8)) { > + bytes = 8; > + } else if (count >= 4 && !(off % 4)) { > + bytes = 4; > + } else if (count >= 2 && !(off % 2)) { > + bytes = 2; > + } else { > + bytes = 1; > + } > + > + ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); > + if (ret) { > + return ret; > + } > + > + count -= bytes; > + done += bytes; > + off += bytes; > + tbuf += bytes; > + } > + return done; > +} > + > +#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) > +#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) > > static void vfio_migration_region_exit(VFIODevice *vbasedev) > { > @@ -70,6 +123,82 @@ err: > return ret; > } > > +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > + uint32_t value) > +{ > + VFIOMigration *migration = vbasedev->migration; > + VFIORegion *region = &migration->region; > + off_t dev_state_off = region->fd_offset + > + offsetof(struct vfio_device_migration_info, device_state); > + uint32_t device_state; > + int ret; > + > + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > + dev_state_off); > + if (ret < 0) { > + return ret; > + } > + > + device_state = (device_state & mask) | value; Agree with Connie that mask and value args are not immediately obvious how they're used. I don't have a naming convention that would be more clear and the names do make some sense once they're understood, but a comment to indicate mask bits are preserved, value bits are set, remaining bits are cleared would probably help the reader. > + > + if (!VFIO_DEVICE_STATE_VALID(device_state)) { > + return -EINVAL; > + } > + > + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), > + dev_state_off); > + if (ret < 0) { > + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > + dev_state_off); > + if (ret < 0) { > + return ret; Seems like we're in pretty bad shape here, should this be combined with below to trigger a hw_error? > + } > + > + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { > + hw_error("%s: Device is in error state 0x%x", > + vbasedev->name, device_state); > + return -EFAULT; > + } > + } > + > + vbasedev->device_state = device_state; > + trace_vfio_migration_set_state(vbasedev->name, device_state); > + return 0; So we return success even if we failed to write the desired state as long as we were able to read back any non-error state? vbasedev->device_state remains correct, but it seems confusing form a caller perspective that a set-state can succeed but it's then necessary to check the state. > +} > + > +static void vfio_vmstate_change(void *opaque, int running, RunState state) > +{ > + VFIODevice *vbasedev = opaque; > + > + if ((vbasedev->vm_running != running)) { > + int ret; > + uint32_t value = 0, mask = 0; > + > + if (running) { > + value = VFIO_DEVICE_STATE_RUNNING; > + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > + mask = ~VFIO_DEVICE_STATE_RESUMING; > + } > + } else { > + mask = ~VFIO_DEVICE_STATE_RUNNING; > + } > + > + ret = vfio_migration_set_state(vbasedev, mask, value); > + if (ret) { > + /* > + * vm_state_notify() doesn't support reporting failure. If such > + * error reporting support added in furure, migration should be > + * aborted. > + */ > + error_report("%s: Failed to set device state 0x%x", > + vbasedev->name, value & mask); > + } Here for instance we assume that success means the device is now in the desired state, but we'd actually need to evaluate vbasedev->device_state to determine that. > + vbasedev->vm_running = running; > + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), > + value & mask); > + } > +} > + > static int vfio_migration_init(VFIODevice *vbasedev, > struct vfio_region_info *info) > { > @@ -87,8 +216,11 @@ static int vfio_migration_init(VFIODevice *vbasedev, > vbasedev->name); > g_free(vbasedev->migration); > vbasedev->migration = NULL; > + return ret; > } > > + vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > + vbasedev); > return ret; > } > > @@ -131,6 +263,10 @@ add_blocker: > > void vfio_migration_finalize(VFIODevice *vbasedev) > { > + if (vbasedev->vm_state) { > + qemu_del_vm_change_state_handler(vbasedev->vm_state); > + } > + > if (vbasedev->migration_blocker) { > migrate_del_blocker(vbasedev->migration_blocker); > error_free(vbasedev->migration_blocker); > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 8fe913175d85..6524734bf7b4 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -149,4 +149,5 @@ vfio_display_edid_write_error(void) "" > > # migration.c > vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" > - > +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" > +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 8275c4c68f45..25e3b1a3b90a 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -29,6 +29,7 @@ > #ifdef CONFIG_LINUX > #include <linux/vfio.h> > #endif > +#include "sysemu/sysemu.h" > > #define VFIO_MSG_PREFIX "vfio %s: " > > @@ -119,6 +120,9 @@ typedef struct VFIODevice { > unsigned int flags; > VFIOMigration *migration; > Error *migration_blocker; > + VMChangeStateEntry *vm_state; > + uint32_t device_state; > + int vm_running; Could these be placed in VFIOMigration? Thanks, Alex > } VFIODevice; > > struct VFIODeviceOps {
* Cornelia Huck (cohuck@redhat.com) wrote: > On Wed, 23 Sep 2020 04:54:07 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > VM state change handler gets called on change in VM's state. This is used to set > > VFIO device state to _RUNNING. > > > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > > Reviewed-by: Neo Jia <cjia@nvidia.com> > > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > > --- > > hw/vfio/migration.c | 136 ++++++++++++++++++++++++++++++++++++++++++ > > hw/vfio/trace-events | 3 +- > > include/hw/vfio/vfio-common.h | 4 ++ > > 3 files changed, 142 insertions(+), 1 deletion(-) > > > > (...) > > > +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > > + uint32_t value) > > I think I've mentioned that before, but this function could really > benefit from a comment what mask and value mean. > > > +{ > > + VFIOMigration *migration = vbasedev->migration; > > + VFIORegion *region = &migration->region; > > + off_t dev_state_off = region->fd_offset + > > + offsetof(struct vfio_device_migration_info, device_state); > > + uint32_t device_state; > > + int ret; > > + > > + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > > + dev_state_off); > > + if (ret < 0) { > > + return ret; > > + } > > + > > + device_state = (device_state & mask) | value; > > + > > + if (!VFIO_DEVICE_STATE_VALID(device_state)) { > > + return -EINVAL; > > + } > > + > > + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), > > + dev_state_off); > > + if (ret < 0) { > > + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > > + dev_state_off); > > + if (ret < 0) { > > + return ret; > > + } > > + > > + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { > > + hw_error("%s: Device is in error state 0x%x", > > + vbasedev->name, device_state); > > + return -EFAULT; > > Is -EFAULT a good return value here? Maybe -EIO? > > > + } > > + } > > + > > + vbasedev->device_state = device_state; > > + trace_vfio_migration_set_state(vbasedev->name, device_state); > > + return 0; > > +} > > + > > +static void vfio_vmstate_change(void *opaque, int running, RunState state) > > +{ > > + VFIODevice *vbasedev = opaque; > > + > > + if ((vbasedev->vm_running != running)) { > > + int ret; > > + uint32_t value = 0, mask = 0; > > + > > + if (running) { > > + value = VFIO_DEVICE_STATE_RUNNING; > > + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > > + mask = ~VFIO_DEVICE_STATE_RESUMING; > > I've been staring at this for some time and I think that the desired > result is > - set _RUNNING > - if _RESUMING was set, clear it, but leave the other bits intact > - if _RESUMING was not set, clear everything previously set > This would really benefit from a comment (or am I the only one > struggling here?) > > > + } > > + } else { > > + mask = ~VFIO_DEVICE_STATE_RUNNING; > > + } > > + > > + ret = vfio_migration_set_state(vbasedev, mask, value); > > + if (ret) { > > + /* > > + * vm_state_notify() doesn't support reporting failure. If such > > + * error reporting support added in furure, migration should be > > + * aborted. > > > "We should abort the migration in this case, but vm_state_notify() > currently does not support reporting failures." > > ? > > Can/should we mark the failing device in some way? I think you can call qemu_file_set_error on the migration stream to force an error. Dave > > + */ > > + error_report("%s: Failed to set device state 0x%x", > > + vbasedev->name, value & mask); > > + } > > + vbasedev->vm_running = running; > > + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), > > + value & mask); > > + } > > +} > > + > > static int vfio_migration_init(VFIODevice *vbasedev, > > struct vfio_region_info *info) > > { > > (...)
On 9/29/2020 4:33 PM, Dr. David Alan Gilbert wrote: > * Cornelia Huck (cohuck@redhat.com) wrote: >> On Wed, 23 Sep 2020 04:54:07 +0530 >> Kirti Wankhede <kwankhede@nvidia.com> wrote: >> >>> VM state change handler gets called on change in VM's state. This is used to set >>> VFIO device state to _RUNNING. >>> >>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >>> Reviewed-by: Neo Jia <cjia@nvidia.com> >>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >>> --- >>> hw/vfio/migration.c | 136 ++++++++++++++++++++++++++++++++++++++++++ >>> hw/vfio/trace-events | 3 +- >>> include/hw/vfio/vfio-common.h | 4 ++ >>> 3 files changed, 142 insertions(+), 1 deletion(-) >>> >> >> (...) >> >>> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, >>> + uint32_t value) >> >> I think I've mentioned that before, but this function could really >> benefit from a comment what mask and value mean. >> Adding a comment as: /* * Write device_state field to inform the vendor driver about the device state * to be transitioned to. * vbasedev: VFIO device * mask : bits set in the mask are preserved in device_state * value: bits set in the value are set in device_state * Remaining bits in device_state are cleared. */ >>> +{ >>> + VFIOMigration *migration = vbasedev->migration; >>> + VFIORegion *region = &migration->region; >>> + off_t dev_state_off = region->fd_offset + >>> + offsetof(struct vfio_device_migration_info, device_state); >>> + uint32_t device_state; >>> + int ret; >>> + >>> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), >>> + dev_state_off); >>> + if (ret < 0) { >>> + return ret; >>> + } >>> + >>> + device_state = (device_state & mask) | value; >>> + >>> + if (!VFIO_DEVICE_STATE_VALID(device_state)) { >>> + return -EINVAL; >>> + } >>> + >>> + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), >>> + dev_state_off); >>> + if (ret < 0) { >>> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), >>> + dev_state_off); >>> + if (ret < 0) { >>> + return ret; >>> + } >>> + >>> + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { >>> + hw_error("%s: Device is in error state 0x%x", >>> + vbasedev->name, device_state); >>> + return -EFAULT; >> >> Is -EFAULT a good return value here? Maybe -EIO? >> Ok. Changing to -EIO. >>> + } >>> + } >>> + >>> + vbasedev->device_state = device_state; >>> + trace_vfio_migration_set_state(vbasedev->name, device_state); >>> + return 0; >>> +} >>> + >>> +static void vfio_vmstate_change(void *opaque, int running, RunState state) >>> +{ >>> + VFIODevice *vbasedev = opaque; >>> + >>> + if ((vbasedev->vm_running != running)) { >>> + int ret; >>> + uint32_t value = 0, mask = 0; >>> + >>> + if (running) { >>> + value = VFIO_DEVICE_STATE_RUNNING; >>> + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { >>> + mask = ~VFIO_DEVICE_STATE_RESUMING; >> >> I've been staring at this for some time and I think that the desired >> result is >> - set _RUNNING >> - if _RESUMING was set, clear it, but leave the other bits intact Upto here, you're correct. >> - if _RESUMING was not set, clear everything previously set >> This would really benefit from a comment (or am I the only one >> struggling here?) >> Here mask should be ~0. Correcting it. >>> + } >>> + } else { >>> + mask = ~VFIO_DEVICE_STATE_RUNNING; >>> + } >>> + >>> + ret = vfio_migration_set_state(vbasedev, mask, value); >>> + if (ret) { >>> + /* >>> + * vm_state_notify() doesn't support reporting failure. If such >>> + * error reporting support added in furure, migration should be >>> + * aborted. >> >> >> "We should abort the migration in this case, but vm_state_notify() >> currently does not support reporting failures." >> >> ? >> Ok. Updating comment as suggested here. >> Can/should we mark the failing device in some way? > > I think you can call qemu_file_set_error on the migration stream to > force an error. > It should be as below, right? qemu_file_set_error(migrate_get_current()->to_dst_file, ret); Thanks, Kirti > Dave > >>> + */ >>> + error_report("%s: Failed to set device state 0x%x", >>> + vbasedev->name, value & mask); >>> + } >>> + vbasedev->vm_running = running; >>> + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), >>> + value & mask); >>> + } >>> +} >>> + >>> static int vfio_migration_init(VFIODevice *vbasedev, >>> struct vfio_region_info *info) >>> { >> >> (...)
On 9/26/2020 1:50 AM, Alex Williamson wrote: > On Wed, 23 Sep 2020 04:54:07 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> VM state change handler gets called on change in VM's state. This is used to set >> VFIO device state to _RUNNING. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Neo Jia <cjia@nvidia.com> >> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >> --- >> hw/vfio/migration.c | 136 ++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 3 +- >> include/hw/vfio/vfio-common.h | 4 ++ >> 3 files changed, 142 insertions(+), 1 deletion(-) >> >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> index 2f760f1f9c47..a30d628ba963 100644 >> --- a/hw/vfio/migration.c >> +++ b/hw/vfio/migration.c >> @@ -10,6 +10,7 @@ >> #include "qemu/osdep.h" >> #include <linux/vfio.h> >> >> +#include "sysemu/runstate.h" >> #include "hw/vfio/vfio-common.h" >> #include "cpu.h" >> #include "migration/migration.h" >> @@ -22,6 +23,58 @@ >> #include "exec/ram_addr.h" >> #include "pci.h" >> #include "trace.h" >> +#include "hw/hw.h" >> + >> +static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, >> + off_t off, bool iswrite) >> +{ >> + int ret; >> + >> + ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : >> + pread(vbasedev->fd, val, count, off); >> + if (ret < count) { >> + error_report("vfio_mig_%s%d %s: failed at offset 0x%lx, err: %s", >> + iswrite ? "write" : "read", count * 8, >> + vbasedev->name, off, strerror(errno)); > > This would suggest from the log that there's, for example, a > vfio_mig_read8 function, which doesn't exist. > Changing to: error_report("vfio_mig_%s %d byte %s: failed at offset 0x%lx, err: %s", iswrite ? "write" : "read", count, vbasedev->name, off, strerror(errno)); Hope this address your concern. >> + return (ret < 0) ? ret : -EINVAL; >> + } >> + return 0; >> +} >> + >> +static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, >> + off_t off, bool iswrite) >> +{ >> + int ret, done = 0; >> + __u8 *tbuf = buf; >> + >> + while (count) { >> + int bytes = 0; >> + >> + if (count >= 8 && !(off % 8)) { >> + bytes = 8; >> + } else if (count >= 4 && !(off % 4)) { >> + bytes = 4; >> + } else if (count >= 2 && !(off % 2)) { >> + bytes = 2; >> + } else { >> + bytes = 1; >> + } >> + >> + ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); >> + if (ret) { >> + return ret; >> + } >> + >> + count -= bytes; >> + done += bytes; >> + off += bytes; >> + tbuf += bytes; >> + } >> + return done; >> +} >> + >> +#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) >> +#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) >> >> static void vfio_migration_region_exit(VFIODevice *vbasedev) >> { >> @@ -70,6 +123,82 @@ err: >> return ret; >> } >> >> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, >> + uint32_t value) >> +{ >> + VFIOMigration *migration = vbasedev->migration; >> + VFIORegion *region = &migration->region; >> + off_t dev_state_off = region->fd_offset + >> + offsetof(struct vfio_device_migration_info, device_state); >> + uint32_t device_state; >> + int ret; >> + >> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), >> + dev_state_off); >> + if (ret < 0) { >> + return ret; >> + } >> + >> + device_state = (device_state & mask) | value; > > Agree with Connie that mask and value args are not immediately obvious > how they're used. I don't have a naming convention that would be more > clear and the names do make some sense once they're understood, but a > comment to indicate mask bits are preserved, value bits are set, > remaining bits are cleared would probably help the reader. > Added comment. >> + >> + if (!VFIO_DEVICE_STATE_VALID(device_state)) { >> + return -EINVAL; >> + } >> + >> + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), >> + dev_state_off); >> + if (ret < 0) { >> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), >> + dev_state_off); >> + if (ret < 0) { >> + return ret; > > Seems like we're in pretty bad shape here, should this be combined with > below to trigger a hw_error? > Ok. >> + } >> + >> + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { >> + hw_error("%s: Device is in error state 0x%x", >> + vbasedev->name, device_state); >> + return -EFAULT; >> + } >> + } >> + >> + vbasedev->device_state = device_state; >> + trace_vfio_migration_set_state(vbasedev->name, device_state); >> + return 0; > > So we return success even if we failed to write the desired state as > long as we were able to read back any non-error state? > vbasedev->device_state remains correct, but it seems confusing form a > caller perspective that a set-state can succeed but it's then necessary > to check the state. > Correcting here. If vfio_mig_write() had retured error, return error from vfio_migration_set_state() >> +} >> + >> +static void vfio_vmstate_change(void *opaque, int running, RunState state) >> +{ >> + VFIODevice *vbasedev = opaque; >> + >> + if ((vbasedev->vm_running != running)) { >> + int ret; >> + uint32_t value = 0, mask = 0; >> + >> + if (running) { >> + value = VFIO_DEVICE_STATE_RUNNING; >> + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { >> + mask = ~VFIO_DEVICE_STATE_RESUMING; >> + } >> + } else { >> + mask = ~VFIO_DEVICE_STATE_RUNNING; >> + } >> + >> + ret = vfio_migration_set_state(vbasedev, mask, value); >> + if (ret) { >> + /* >> + * vm_state_notify() doesn't support reporting failure. If such >> + * error reporting support added in furure, migration should be >> + * aborted. >> + */ >> + error_report("%s: Failed to set device state 0x%x", >> + vbasedev->name, value & mask); >> + } > > Here for instance we assume that success means the device is now in the > desired state, but we'd actually need to evaluate > vbasedev->device_state to determine that. > Updating. >> + vbasedev->vm_running = running; >> + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), >> + value & mask); >> + } >> +} >> + >> static int vfio_migration_init(VFIODevice *vbasedev, >> struct vfio_region_info *info) >> { >> @@ -87,8 +216,11 @@ static int vfio_migration_init(VFIODevice *vbasedev, >> vbasedev->name); >> g_free(vbasedev->migration); >> vbasedev->migration = NULL; >> + return ret; >> } >> >> + vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, >> + vbasedev); >> return ret; >> } >> >> @@ -131,6 +263,10 @@ add_blocker: >> >> void vfio_migration_finalize(VFIODevice *vbasedev) >> { >> + if (vbasedev->vm_state) { >> + qemu_del_vm_change_state_handler(vbasedev->vm_state); >> + } >> + >> if (vbasedev->migration_blocker) { >> migrate_del_blocker(vbasedev->migration_blocker); >> error_free(vbasedev->migration_blocker); >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index 8fe913175d85..6524734bf7b4 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -149,4 +149,5 @@ vfio_display_edid_write_error(void) "" >> >> # migration.c >> vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" >> - >> +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" >> +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index 8275c4c68f45..25e3b1a3b90a 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -29,6 +29,7 @@ >> #ifdef CONFIG_LINUX >> #include <linux/vfio.h> >> #endif >> +#include "sysemu/sysemu.h" >> >> #define VFIO_MSG_PREFIX "vfio %s: " >> >> @@ -119,6 +120,9 @@ typedef struct VFIODevice { >> unsigned int flags; >> VFIOMigration *migration; >> Error *migration_blocker; >> + VMChangeStateEntry *vm_state; >> + uint32_t device_state; >> + int vm_running; > > Could these be placed in VFIOMigration? Thanks, > I think device_state should be part of VFIODevice since its about device rather than only related to migration, others can be moved to VFIOMigration. Thanks, Kirti > Alex > >> } VFIODevice; >> >> struct VFIODeviceOps { >
On Sun, 18 Oct 2020 02:00:44 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 9/26/2020 1:50 AM, Alex Williamson wrote: > > On Wed, 23 Sep 2020 04:54:07 +0530 > > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > >> VM state change handler gets called on change in VM's state. This is used to set > >> VFIO device state to _RUNNING. > >> > >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > >> Reviewed-by: Neo Jia <cjia@nvidia.com> > >> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > >> --- > >> hw/vfio/migration.c | 136 ++++++++++++++++++++++++++++++++++++++++++ > >> hw/vfio/trace-events | 3 +- > >> include/hw/vfio/vfio-common.h | 4 ++ > >> 3 files changed, 142 insertions(+), 1 deletion(-) > >> > >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > >> index 2f760f1f9c47..a30d628ba963 100644 > >> --- a/hw/vfio/migration.c > >> +++ b/hw/vfio/migration.c > >> @@ -10,6 +10,7 @@ > >> #include "qemu/osdep.h" > >> #include <linux/vfio.h> > >> > >> +#include "sysemu/runstate.h" > >> #include "hw/vfio/vfio-common.h" > >> #include "cpu.h" > >> #include "migration/migration.h" > >> @@ -22,6 +23,58 @@ > >> #include "exec/ram_addr.h" > >> #include "pci.h" > >> #include "trace.h" > >> +#include "hw/hw.h" > >> + > >> +static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, > >> + off_t off, bool iswrite) > >> +{ > >> + int ret; > >> + > >> + ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : > >> + pread(vbasedev->fd, val, count, off); > >> + if (ret < count) { > >> + error_report("vfio_mig_%s%d %s: failed at offset 0x%lx, err: %s", > >> + iswrite ? "write" : "read", count * 8, > >> + vbasedev->name, off, strerror(errno)); > > > > This would suggest from the log that there's, for example, a > > vfio_mig_read8 function, which doesn't exist. > > > > Changing to: > error_report("vfio_mig_%s %d byte %s: failed at offset 0x%lx, err: %s", > iswrite ? "write" : "read", count, > vbasedev->name, off, strerror(errno)); > Hope this address your concern. > > >> + return (ret < 0) ? ret : -EINVAL; > >> + } > >> + return 0; > >> +} > >> + > >> +static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, > >> + off_t off, bool iswrite) > >> +{ > >> + int ret, done = 0; > >> + __u8 *tbuf = buf; > >> + > >> + while (count) { > >> + int bytes = 0; > >> + > >> + if (count >= 8 && !(off % 8)) { > >> + bytes = 8; > >> + } else if (count >= 4 && !(off % 4)) { > >> + bytes = 4; > >> + } else if (count >= 2 && !(off % 2)) { > >> + bytes = 2; > >> + } else { > >> + bytes = 1; > >> + } > >> + > >> + ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); > >> + if (ret) { > >> + return ret; > >> + } > >> + > >> + count -= bytes; > >> + done += bytes; > >> + off += bytes; > >> + tbuf += bytes; > >> + } > >> + return done; > >> +} > >> + > >> +#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) > >> +#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) > >> > >> static void vfio_migration_region_exit(VFIODevice *vbasedev) > >> { > >> @@ -70,6 +123,82 @@ err: > >> return ret; > >> } > >> > >> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > >> + uint32_t value) > >> +{ > >> + VFIOMigration *migration = vbasedev->migration; > >> + VFIORegion *region = &migration->region; > >> + off_t dev_state_off = region->fd_offset + > >> + offsetof(struct vfio_device_migration_info, device_state); > >> + uint32_t device_state; > >> + int ret; > >> + > >> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > >> + dev_state_off); > >> + if (ret < 0) { > >> + return ret; > >> + } > >> + > >> + device_state = (device_state & mask) | value; > > > > Agree with Connie that mask and value args are not immediately obvious > > how they're used. I don't have a naming convention that would be more > > clear and the names do make some sense once they're understood, but a > > comment to indicate mask bits are preserved, value bits are set, > > remaining bits are cleared would probably help the reader. > > > > Added comment. > > >> + > >> + if (!VFIO_DEVICE_STATE_VALID(device_state)) { > >> + return -EINVAL; > >> + } > >> + > >> + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), > >> + dev_state_off); > >> + if (ret < 0) { > >> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > >> + dev_state_off); > >> + if (ret < 0) { > >> + return ret; > > > > Seems like we're in pretty bad shape here, should this be combined with > > below to trigger a hw_error? > > > > Ok. > > >> + } > >> + > >> + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { > >> + hw_error("%s: Device is in error state 0x%x", > >> + vbasedev->name, device_state); > >> + return -EFAULT; > >> + } > >> + } > >> + > >> + vbasedev->device_state = device_state; > >> + trace_vfio_migration_set_state(vbasedev->name, device_state); > >> + return 0; > > > > So we return success even if we failed to write the desired state as > > long as we were able to read back any non-error state? > > vbasedev->device_state remains correct, but it seems confusing form a > > caller perspective that a set-state can succeed but it's then necessary > > to check the state. > > > > Correcting here. If vfio_mig_write() had retured error, return error > from vfio_migration_set_state() > > >> +} > >> + > >> +static void vfio_vmstate_change(void *opaque, int running, RunState state) > >> +{ > >> + VFIODevice *vbasedev = opaque; > >> + > >> + if ((vbasedev->vm_running != running)) { > >> + int ret; > >> + uint32_t value = 0, mask = 0; > >> + > >> + if (running) { > >> + value = VFIO_DEVICE_STATE_RUNNING; > >> + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > >> + mask = ~VFIO_DEVICE_STATE_RESUMING; > >> + } > >> + } else { > >> + mask = ~VFIO_DEVICE_STATE_RUNNING; > >> + } > >> + > >> + ret = vfio_migration_set_state(vbasedev, mask, value); > >> + if (ret) { > >> + /* > >> + * vm_state_notify() doesn't support reporting failure. If such > >> + * error reporting support added in furure, migration should be > >> + * aborted. > >> + */ > >> + error_report("%s: Failed to set device state 0x%x", > >> + vbasedev->name, value & mask); > >> + } > > > > Here for instance we assume that success means the device is now in the > > desired state, but we'd actually need to evaluate > > vbasedev->device_state to determine that. > > > > Updating. > > >> + vbasedev->vm_running = running; > >> + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), > >> + value & mask); > >> + } > >> +} > >> + > >> static int vfio_migration_init(VFIODevice *vbasedev, > >> struct vfio_region_info *info) > >> { > >> @@ -87,8 +216,11 @@ static int vfio_migration_init(VFIODevice *vbasedev, > >> vbasedev->name); > >> g_free(vbasedev->migration); > >> vbasedev->migration = NULL; > >> + return ret; > >> } > >> > >> + vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > >> + vbasedev); > >> return ret; > >> } > >> > >> @@ -131,6 +263,10 @@ add_blocker: > >> > >> void vfio_migration_finalize(VFIODevice *vbasedev) > >> { > >> + if (vbasedev->vm_state) { > >> + qemu_del_vm_change_state_handler(vbasedev->vm_state); > >> + } > >> + > >> if (vbasedev->migration_blocker) { > >> migrate_del_blocker(vbasedev->migration_blocker); > >> error_free(vbasedev->migration_blocker); > >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > >> index 8fe913175d85..6524734bf7b4 100644 > >> --- a/hw/vfio/trace-events > >> +++ b/hw/vfio/trace-events > >> @@ -149,4 +149,5 @@ vfio_display_edid_write_error(void) "" > >> > >> # migration.c > >> vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" > >> - > >> +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" > >> +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > >> index 8275c4c68f45..25e3b1a3b90a 100644 > >> --- a/include/hw/vfio/vfio-common.h > >> +++ b/include/hw/vfio/vfio-common.h > >> @@ -29,6 +29,7 @@ > >> #ifdef CONFIG_LINUX > >> #include <linux/vfio.h> > >> #endif > >> +#include "sysemu/sysemu.h" > >> > >> #define VFIO_MSG_PREFIX "vfio %s: " > >> > >> @@ -119,6 +120,9 @@ typedef struct VFIODevice { > >> unsigned int flags; > >> VFIOMigration *migration; > >> Error *migration_blocker; > >> + VMChangeStateEntry *vm_state; > >> + uint32_t device_state; > >> + int vm_running; > > > > Could these be placed in VFIOMigration? Thanks, > > > > I think device_state should be part of VFIODevice since its about device > rather than only related to migration, others can be moved to VFIOMigration. But these are only valid when migration is supported and thus when VFIOMigration exists. Thanks, Alex
<snip> >>>> +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" >>>> +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" >>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >>>> index 8275c4c68f45..25e3b1a3b90a 100644 >>>> --- a/include/hw/vfio/vfio-common.h >>>> +++ b/include/hw/vfio/vfio-common.h >>>> @@ -29,6 +29,7 @@ >>>> #ifdef CONFIG_LINUX >>>> #include <linux/vfio.h> >>>> #endif >>>> +#include "sysemu/sysemu.h" >>>> >>>> #define VFIO_MSG_PREFIX "vfio %s: " >>>> >>>> @@ -119,6 +120,9 @@ typedef struct VFIODevice { >>>> unsigned int flags; >>>> VFIOMigration *migration; >>>> Error *migration_blocker; >>>> + VMChangeStateEntry *vm_state; >>>> + uint32_t device_state; >>>> + int vm_running; >>> >>> Could these be placed in VFIOMigration? Thanks, >>> >> >> I think device_state should be part of VFIODevice since its about device >> rather than only related to migration, others can be moved to VFIOMigration. > > But these are only valid when migration is supported and thus when > VFIOMigration exists. Thanks, > Even though it is used when migration is supported, its device's attribute. Thanks, Kirti
On Sun, 18 Oct 2020 23:13:39 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > <snip> > > >>>> +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" > >>>> +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > >>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > >>>> index 8275c4c68f45..25e3b1a3b90a 100644 > >>>> --- a/include/hw/vfio/vfio-common.h > >>>> +++ b/include/hw/vfio/vfio-common.h > >>>> @@ -29,6 +29,7 @@ > >>>> #ifdef CONFIG_LINUX > >>>> #include <linux/vfio.h> > >>>> #endif > >>>> +#include "sysemu/sysemu.h" > >>>> > >>>> #define VFIO_MSG_PREFIX "vfio %s: " > >>>> > >>>> @@ -119,6 +120,9 @@ typedef struct VFIODevice { > >>>> unsigned int flags; > >>>> VFIOMigration *migration; > >>>> Error *migration_blocker; > >>>> + VMChangeStateEntry *vm_state; > >>>> + uint32_t device_state; > >>>> + int vm_running; > >>> > >>> Could these be placed in VFIOMigration? Thanks, > >>> > >> > >> I think device_state should be part of VFIODevice since its about device > >> rather than only related to migration, others can be moved to VFIOMigration. > > > > But these are only valid when migration is supported and thus when > > VFIOMigration exists. Thanks, > > > > Even though it is used when migration is supported, its device's attribute. device_state is a local copy of the migration region register, so it serves no purpose when a migration region is not present. In fact the initial value would indicate the device is stopped, which is incorrect. vm_running is never initialized and cannot be set other than through a migration region update of device_state, so at least two of these values show incorrect state when migration is not supported by the device. vm_state is unused when migration isn't present, so if nothing else the pointer here is wasteful. It's not clear to me what justification is being presented here as a "device's attribute", supporting migration as indicated by a non-NULL migration pointer is also a device attribute and these are attributes further defining the state of that support. BTW, device_state is used in patch 03/ but only defined here in 05/, so the series would fail to compile on bisect. Thanks, Alex
On Mon, 19 Oct 2020 11:51:36 -0600 Alex Williamson <alex.williamson@redhat.com> wrote: > On Sun, 18 Oct 2020 23:13:39 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > <snip> > > > > >>>> +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" > > >>>> +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > > >>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > > >>>> index 8275c4c68f45..25e3b1a3b90a 100644 > > >>>> --- a/include/hw/vfio/vfio-common.h > > >>>> +++ b/include/hw/vfio/vfio-common.h > > >>>> @@ -29,6 +29,7 @@ > > >>>> #ifdef CONFIG_LINUX > > >>>> #include <linux/vfio.h> > > >>>> #endif > > >>>> +#include "sysemu/sysemu.h" > > >>>> > > >>>> #define VFIO_MSG_PREFIX "vfio %s: " > > >>>> > > >>>> @@ -119,6 +120,9 @@ typedef struct VFIODevice { > > >>>> unsigned int flags; > > >>>> VFIOMigration *migration; > > >>>> Error *migration_blocker; > > >>>> + VMChangeStateEntry *vm_state; > > >>>> + uint32_t device_state; > > >>>> + int vm_running; > > >>> > > >>> Could these be placed in VFIOMigration? Thanks, > > >>> > > >> > > >> I think device_state should be part of VFIODevice since its about device > > >> rather than only related to migration, others can be moved to VFIOMigration. > > > > > > But these are only valid when migration is supported and thus when > > > VFIOMigration exists. Thanks, > > > > > > > Even though it is used when migration is supported, its device's attribute. > > device_state is a local copy of the migration region register, so it > serves no purpose when a migration region is not present. In fact the > initial value would indicate the device is stopped, which is incorrect. > vm_running is never initialized and cannot be set other than through a > migration region update of device_state, so at least two of these > values show incorrect state when migration is not supported by the > device. vm_state is unused when migration isn't present, so if nothing > else the pointer here is wasteful. It's not clear to me what > justification is being presented here as a "device's attribute", > supporting migration as indicated by a non-NULL migration pointer is > also a device attribute and these are attributes further defining the > state of that support. Agreed. Also, it is not obvious from the naming that 'device_state' is related to migration, and it is easy to assume that this field is useful even in the non-migration case. Moving it would solve that problem.
On Sun, 18 Oct 2020 01:54:56 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 9/29/2020 4:33 PM, Dr. David Alan Gilbert wrote: > > * Cornelia Huck (cohuck@redhat.com) wrote: > >> On Wed, 23 Sep 2020 04:54:07 +0530 > >> Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> > >>> VM state change handler gets called on change in VM's state. This is used to set > >>> VFIO device state to _RUNNING. > >>> > >>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > >>> Reviewed-by: Neo Jia <cjia@nvidia.com> > >>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > >>> --- > >>> hw/vfio/migration.c | 136 ++++++++++++++++++++++++++++++++++++++++++ > >>> hw/vfio/trace-events | 3 +- > >>> include/hw/vfio/vfio-common.h | 4 ++ > >>> 3 files changed, 142 insertions(+), 1 deletion(-) > >>> > >> > >> (...) > >> > >>> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > >>> + uint32_t value) > >> > >> I think I've mentioned that before, but this function could really > >> benefit from a comment what mask and value mean. > >> > > Adding a comment as: > > /* > * Write device_state field to inform the vendor driver about the > device state > * to be transitioned to. > * vbasedev: VFIO device > * mask : bits set in the mask are preserved in device_state > * value: bits set in the value are set in device_state > * Remaining bits in device_state are cleared. > */ Maybe: "Change the device_state register for device @vbasedev. Bits set in @mask are preserved, bits set in @value are set, and bits not set in either @mask or @value are cleared in device_state. If the register cannot be accessed, the resulting state would be invalid, or the device enters an error state, an error is returned." ? > > > >>> +{ > >>> + VFIOMigration *migration = vbasedev->migration; > >>> + VFIORegion *region = &migration->region; > >>> + off_t dev_state_off = region->fd_offset + > >>> + offsetof(struct vfio_device_migration_info, device_state); > >>> + uint32_t device_state; > >>> + int ret; > >>> + > >>> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > >>> + dev_state_off); > >>> + if (ret < 0) { > >>> + return ret; > >>> + } > >>> + > >>> + device_state = (device_state & mask) | value; > >>> + > >>> + if (!VFIO_DEVICE_STATE_VALID(device_state)) { > >>> + return -EINVAL; > >>> + } > >>> + > >>> + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), > >>> + dev_state_off); > >>> + if (ret < 0) { > >>> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > >>> + dev_state_off); > >>> + if (ret < 0) { > >>> + return ret; > >>> + } > >>> + > >>> + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { > >>> + hw_error("%s: Device is in error state 0x%x", > >>> + vbasedev->name, device_state); > >>> + return -EFAULT; > >> > >> Is -EFAULT a good return value here? Maybe -EIO? > >> > > Ok. Changing to -EIO. > > >>> + } > >>> + } > >>> + > >>> + vbasedev->device_state = device_state; > >>> + trace_vfio_migration_set_state(vbasedev->name, device_state); > >>> + return 0; > >>> +} > >>> + > >>> +static void vfio_vmstate_change(void *opaque, int running, RunState state) > >>> +{ > >>> + VFIODevice *vbasedev = opaque; > >>> + > >>> + if ((vbasedev->vm_running != running)) { > >>> + int ret; > >>> + uint32_t value = 0, mask = 0; > >>> + > >>> + if (running) { > >>> + value = VFIO_DEVICE_STATE_RUNNING; > >>> + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > >>> + mask = ~VFIO_DEVICE_STATE_RESUMING; > >> > >> I've been staring at this for some time and I think that the desired > >> result is > >> - set _RUNNING > >> - if _RESUMING was set, clear it, but leave the other bits intact > > Upto here, you're correct. > > >> - if _RESUMING was not set, clear everything previously set > >> This would really benefit from a comment (or am I the only one > >> struggling here?) > >> > > Here mask should be ~0. Correcting it. Hm, now I'm confused. With value == _RUNNING, ~_RUNNING and ~0 as mask should be equivalent, shouldn't they? > > > >>> + } > >>> + } else { > >>> + mask = ~VFIO_DEVICE_STATE_RUNNING; > >>> + } > >>> + > >>> + ret = vfio_migration_set_state(vbasedev, mask, value); > >>> + if (ret) { > >>> + /* > >>> + * vm_state_notify() doesn't support reporting failure. If such > >>> + * error reporting support added in furure, migration should be > >>> + * aborted. > >> > >> > >> "We should abort the migration in this case, but vm_state_notify() > >> currently does not support reporting failures." > >> > >> ? > >> > > Ok. Updating comment as suggested here. > > >> Can/should we mark the failing device in some way? > > > > I think you can call qemu_file_set_error on the migration stream to > > force an error. > > > > It should be as below, right? > qemu_file_set_error(migrate_get_current()->to_dst_file, ret); Does this indicate in any way which device was causing problems? (I'm not sure how visible the error_report would be?) > > > Thanks, > Kirti > > > Dave > > > >>> + */ > >>> + error_report("%s: Failed to set device state 0x%x", > >>> + vbasedev->name, value & mask); > >>> + } > >>> + vbasedev->vm_running = running; > >>> + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), > >>> + value & mask); > >>> + } > >>> +} > >>> + > >>> static int vfio_migration_init(VFIODevice *vbasedev, > >>> struct vfio_region_info *info) > >>> { > >> > >> (...) >
On 10/20/2020 4:21 PM, Cornelia Huck wrote: > On Sun, 18 Oct 2020 01:54:56 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> On 9/29/2020 4:33 PM, Dr. David Alan Gilbert wrote: >>> * Cornelia Huck (cohuck@redhat.com) wrote: >>>> On Wed, 23 Sep 2020 04:54:07 +0530 >>>> Kirti Wankhede <kwankhede@nvidia.com> wrote: >>>> >>>>> VM state change handler gets called on change in VM's state. This is used to set >>>>> VFIO device state to _RUNNING. >>>>> >>>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >>>>> Reviewed-by: Neo Jia <cjia@nvidia.com> >>>>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >>>>> --- >>>>> hw/vfio/migration.c | 136 ++++++++++++++++++++++++++++++++++++++++++ >>>>> hw/vfio/trace-events | 3 +- >>>>> include/hw/vfio/vfio-common.h | 4 ++ >>>>> 3 files changed, 142 insertions(+), 1 deletion(-) >>>>> >>>> >>>> (...) >>>> >>>>> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, >>>>> + uint32_t value) >>>> >>>> I think I've mentioned that before, but this function could really >>>> benefit from a comment what mask and value mean. >>>> >> >> Adding a comment as: >> >> /* >> * Write device_state field to inform the vendor driver about the >> device state >> * to be transitioned to. >> * vbasedev: VFIO device >> * mask : bits set in the mask are preserved in device_state >> * value: bits set in the value are set in device_state >> * Remaining bits in device_state are cleared. >> */ > > Maybe: > > "Change the device_state register for device @vbasedev. Bits set in > @mask are preserved, bits set in @value are set, and bits not set in > either @mask or @value are cleared in device_state. If the register > cannot be accessed, the resulting state would be invalid, or the device > enters an error state, an error is returned." ? > Ok. >> >> >>>>> +{ >>>>> + VFIOMigration *migration = vbasedev->migration; >>>>> + VFIORegion *region = &migration->region; >>>>> + off_t dev_state_off = region->fd_offset + >>>>> + offsetof(struct vfio_device_migration_info, device_state); >>>>> + uint32_t device_state; >>>>> + int ret; >>>>> + >>>>> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), >>>>> + dev_state_off); >>>>> + if (ret < 0) { >>>>> + return ret; >>>>> + } >>>>> + >>>>> + device_state = (device_state & mask) | value; >>>>> + >>>>> + if (!VFIO_DEVICE_STATE_VALID(device_state)) { >>>>> + return -EINVAL; >>>>> + } >>>>> + >>>>> + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), >>>>> + dev_state_off); >>>>> + if (ret < 0) { >>>>> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), >>>>> + dev_state_off); >>>>> + if (ret < 0) { >>>>> + return ret; >>>>> + } >>>>> + >>>>> + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { >>>>> + hw_error("%s: Device is in error state 0x%x", >>>>> + vbasedev->name, device_state); >>>>> + return -EFAULT; >>>> >>>> Is -EFAULT a good return value here? Maybe -EIO? >>>> >> >> Ok. Changing to -EIO. >> >>>>> + } >>>>> + } >>>>> + >>>>> + vbasedev->device_state = device_state; >>>>> + trace_vfio_migration_set_state(vbasedev->name, device_state); >>>>> + return 0; >>>>> +} >>>>> + >>>>> +static void vfio_vmstate_change(void *opaque, int running, RunState state) >>>>> +{ >>>>> + VFIODevice *vbasedev = opaque; >>>>> + >>>>> + if ((vbasedev->vm_running != running)) { >>>>> + int ret; >>>>> + uint32_t value = 0, mask = 0; >>>>> + >>>>> + if (running) { >>>>> + value = VFIO_DEVICE_STATE_RUNNING; >>>>> + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { >>>>> + mask = ~VFIO_DEVICE_STATE_RESUMING; >>>> >>>> I've been staring at this for some time and I think that the desired >>>> result is >>>> - set _RUNNING >>>> - if _RESUMING was set, clear it, but leave the other bits intact >> >> Upto here, you're correct. >> >>>> - if _RESUMING was not set, clear everything previously set >>>> This would really benefit from a comment (or am I the only one >>>> struggling here?) >>>> >> >> Here mask should be ~0. Correcting it. > > Hm, now I'm confused. With value == _RUNNING, ~_RUNNING and ~0 as mask > should be equivalent, shouldn't they? > I too got confused after reading your comment. Lets walk through the device states and transitions can happen here: if running - device state could be either _SAVING or _RESUMING or _STOP. Both _SAVING and _RESUMING can't be set at a time, that is the error state. _STOP means 0. - Transition from _SAVING to _RUNNING can happen if there is migration failure, in that case we have to clear _SAVING - Transition from _RESUMING to _RUNNING can happen on resuming and we have to clear _RESUMING. - In both the above cases, we have to set _RUNNING and clear rest 2 bits. Then: mask = ~VFIO_DEVICE_STATE_MASK; value = VFIO_DEVICE_STATE_RUNNING; if !running - device state could be either _RUNNING or _SAVING|_RUNNING. Here we have to reset running bit. Then: mask = ~VFIO_DEVICE_STATE_RUNNING; value = 0; I'll add comment in the code above. >> >> >>>>> + } >>>>> + } else { >>>>> + mask = ~VFIO_DEVICE_STATE_RUNNING; >>>>> + } >>>>> + >>>>> + ret = vfio_migration_set_state(vbasedev, mask, value); >>>>> + if (ret) { >>>>> + /* >>>>> + * vm_state_notify() doesn't support reporting failure. If such >>>>> + * error reporting support added in furure, migration should be >>>>> + * aborted. >>>> >>>> >>>> "We should abort the migration in this case, but vm_state_notify() >>>> currently does not support reporting failures." >>>> >>>> ? >>>> >> >> Ok. Updating comment as suggested here. >> >>>> Can/should we mark the failing device in some way? >>> >>> I think you can call qemu_file_set_error on the migration stream to >>> force an error. >>> >> >> It should be as below, right? >> qemu_file_set_error(migrate_get_current()->to_dst_file, ret); > > Does this indicate in any way which device was causing problems? (I'm > not sure how visible the error_report would be?) > I think it doesn't indicate which device caused problem but it sets error on migration stream Thanks, Kirti >> >> >> Thanks, >> Kirti >> >>> Dave >>> >>>>> + */ >>>>> + error_report("%s: Failed to set device state 0x%x", >>>>> + vbasedev->name, value & mask); >>>>> + } >>>>> + vbasedev->vm_running = running; >>>>> + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), >>>>> + value & mask); >>>>> + } >>>>> +} >>>>> + >>>>> static int vfio_migration_init(VFIODevice *vbasedev, >>>>> struct vfio_region_info *info) >>>>> { >>>> >>>> (...) >> >
On Wed, 21 Oct 2020 11:03:23 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 10/20/2020 4:21 PM, Cornelia Huck wrote: > > On Sun, 18 Oct 2020 01:54:56 +0530 > > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > >> On 9/29/2020 4:33 PM, Dr. David Alan Gilbert wrote: > >>> * Cornelia Huck (cohuck@redhat.com) wrote: > >>>> On Wed, 23 Sep 2020 04:54:07 +0530 > >>>> Kirti Wankhede <kwankhede@nvidia.com> wrote: > >>>>> +static void vfio_vmstate_change(void *opaque, int running, RunState state) > >>>>> +{ > >>>>> + VFIODevice *vbasedev = opaque; > >>>>> + > >>>>> + if ((vbasedev->vm_running != running)) { > >>>>> + int ret; > >>>>> + uint32_t value = 0, mask = 0; > >>>>> + > >>>>> + if (running) { > >>>>> + value = VFIO_DEVICE_STATE_RUNNING; > >>>>> + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > >>>>> + mask = ~VFIO_DEVICE_STATE_RESUMING; > >>>> > >>>> I've been staring at this for some time and I think that the desired > >>>> result is > >>>> - set _RUNNING > >>>> - if _RESUMING was set, clear it, but leave the other bits intact > >> > >> Upto here, you're correct. > >> > >>>> - if _RESUMING was not set, clear everything previously set > >>>> This would really benefit from a comment (or am I the only one > >>>> struggling here?) > >>>> > >> > >> Here mask should be ~0. Correcting it. > > > > Hm, now I'm confused. With value == _RUNNING, ~_RUNNING and ~0 as mask > > should be equivalent, shouldn't they? > > > > I too got confused after reading your comment. > Lets walk through the device states and transitions can happen here: > > if running > - device state could be either _SAVING or _RESUMING or _STOP. Both > _SAVING and _RESUMING can't be set at a time, that is the error state. > _STOP means 0. > - Transition from _SAVING to _RUNNING can happen if there is migration > failure, in that case we have to clear _SAVING > - Transition from _RESUMING to _RUNNING can happen on resuming and we > have to clear _RESUMING. > - In both the above cases, we have to set _RUNNING and clear rest 2 bits. > Then: > mask = ~VFIO_DEVICE_STATE_MASK; > value = VFIO_DEVICE_STATE_RUNNING; ok > > if !running > - device state could be either _RUNNING or _SAVING|_RUNNING. Here we > have to reset running bit. > Then: > mask = ~VFIO_DEVICE_STATE_RUNNING; > value = 0; ok > > I'll add comment in the code above. That will help. I'm a bit worried though that all that reasoning which flags are set or cleared when is quite complex, and it's easy to make mistakes. Can we model this as a FSM, where an event (running state changes) transitions the device state from one state to another? I (personally) find FSMs easier to comprehend, but I'm not sure whether that change would be too invasive. If others can parse the state changes with that mask/value interface, I won't object to it. > > > >> > >> > >>>>> + } > >>>>> + } else { > >>>>> + mask = ~VFIO_DEVICE_STATE_RUNNING; > >>>>> + }
On 10/22/2020 1:21 PM, Cornelia Huck wrote: > On Wed, 21 Oct 2020 11:03:23 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> On 10/20/2020 4:21 PM, Cornelia Huck wrote: >>> On Sun, 18 Oct 2020 01:54:56 +0530 >>> Kirti Wankhede <kwankhede@nvidia.com> wrote: >>> >>>> On 9/29/2020 4:33 PM, Dr. David Alan Gilbert wrote: >>>>> * Cornelia Huck (cohuck@redhat.com) wrote: >>>>>> On Wed, 23 Sep 2020 04:54:07 +0530 >>>>>> Kirti Wankhede <kwankhede@nvidia.com> wrote: > >>>>>>> +static void vfio_vmstate_change(void *opaque, int running, RunState state) >>>>>>> +{ >>>>>>> + VFIODevice *vbasedev = opaque; >>>>>>> + >>>>>>> + if ((vbasedev->vm_running != running)) { >>>>>>> + int ret; >>>>>>> + uint32_t value = 0, mask = 0; >>>>>>> + >>>>>>> + if (running) { >>>>>>> + value = VFIO_DEVICE_STATE_RUNNING; >>>>>>> + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { >>>>>>> + mask = ~VFIO_DEVICE_STATE_RESUMING; >>>>>> >>>>>> I've been staring at this for some time and I think that the desired >>>>>> result is >>>>>> - set _RUNNING >>>>>> - if _RESUMING was set, clear it, but leave the other bits intact >>>> >>>> Upto here, you're correct. >>>> >>>>>> - if _RESUMING was not set, clear everything previously set >>>>>> This would really benefit from a comment (or am I the only one >>>>>> struggling here?) >>>>>> >>>> >>>> Here mask should be ~0. Correcting it. >>> >>> Hm, now I'm confused. With value == _RUNNING, ~_RUNNING and ~0 as mask >>> should be equivalent, shouldn't they? >>> >> >> I too got confused after reading your comment. >> Lets walk through the device states and transitions can happen here: >> >> if running >> - device state could be either _SAVING or _RESUMING or _STOP. Both >> _SAVING and _RESUMING can't be set at a time, that is the error state. >> _STOP means 0. >> - Transition from _SAVING to _RUNNING can happen if there is migration >> failure, in that case we have to clear _SAVING >> - Transition from _RESUMING to _RUNNING can happen on resuming and we >> have to clear _RESUMING. >> - In both the above cases, we have to set _RUNNING and clear rest 2 bits. >> Then: >> mask = ~VFIO_DEVICE_STATE_MASK; >> value = VFIO_DEVICE_STATE_RUNNING; > > ok > >> >> if !running >> - device state could be either _RUNNING or _SAVING|_RUNNING. Here we >> have to reset running bit. >> Then: >> mask = ~VFIO_DEVICE_STATE_RUNNING; >> value = 0; > > ok > >> >> I'll add comment in the code above. > > That will help. > > I'm a bit worried though that all that reasoning which flags are set or > cleared when is quite complex, and it's easy to make mistakes. > > Can we model this as a FSM, where an event (running state changes) > transitions the device state from one state to another? I (personally) > find FSMs easier to comprehend, but I'm not sure whether that change > would be too invasive. If others can parse the state changes with that > mask/value interface, I won't object to it. > I agree FSM will be easy and for long term may be easy to maintain. But at this moment it will be intrusive change. For now we can go ahead with this code and later we can change to FSM model, if all agrees on it. Thanks, Kirti >> >> >>>> >>>> >>>>>>> + } >>>>>>> + } else { >>>>>>> + mask = ~VFIO_DEVICE_STATE_RUNNING; >>>>>>> + } >
On Thu, 22 Oct 2020 21:12:58 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 10/22/2020 1:21 PM, Cornelia Huck wrote: > > I'm a bit worried though that all that reasoning which flags are set or > > cleared when is quite complex, and it's easy to make mistakes. > > > > Can we model this as a FSM, where an event (running state changes) > > transitions the device state from one state to another? I (personally) > > find FSMs easier to comprehend, but I'm not sure whether that change > > would be too invasive. If others can parse the state changes with that > > mask/value interface, I won't object to it. > > > > I agree FSM will be easy and for long term may be easy to maintain. But > at this moment it will be intrusive change. For now we can go ahead with > this code and later we can change to FSM model, if all agrees on it. Yes, we can certainly revisit this later.
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 2f760f1f9c47..a30d628ba963 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include <linux/vfio.h> +#include "sysemu/runstate.h" #include "hw/vfio/vfio-common.h" #include "cpu.h" #include "migration/migration.h" @@ -22,6 +23,58 @@ #include "exec/ram_addr.h" #include "pci.h" #include "trace.h" +#include "hw/hw.h" + +static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, + off_t off, bool iswrite) +{ + int ret; + + ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : + pread(vbasedev->fd, val, count, off); + if (ret < count) { + error_report("vfio_mig_%s%d %s: failed at offset 0x%lx, err: %s", + iswrite ? "write" : "read", count * 8, + vbasedev->name, off, strerror(errno)); + return (ret < 0) ? ret : -EINVAL; + } + return 0; +} + +static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, + off_t off, bool iswrite) +{ + int ret, done = 0; + __u8 *tbuf = buf; + + while (count) { + int bytes = 0; + + if (count >= 8 && !(off % 8)) { + bytes = 8; + } else if (count >= 4 && !(off % 4)) { + bytes = 4; + } else if (count >= 2 && !(off % 2)) { + bytes = 2; + } else { + bytes = 1; + } + + ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); + if (ret) { + return ret; + } + + count -= bytes; + done += bytes; + off += bytes; + tbuf += bytes; + } + return done; +} + +#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) +#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) static void vfio_migration_region_exit(VFIODevice *vbasedev) { @@ -70,6 +123,82 @@ err: return ret; } +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, + uint32_t value) +{ + VFIOMigration *migration = vbasedev->migration; + VFIORegion *region = &migration->region; + off_t dev_state_off = region->fd_offset + + offsetof(struct vfio_device_migration_info, device_state); + uint32_t device_state; + int ret; + + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), + dev_state_off); + if (ret < 0) { + return ret; + } + + device_state = (device_state & mask) | value; + + if (!VFIO_DEVICE_STATE_VALID(device_state)) { + return -EINVAL; + } + + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), + dev_state_off); + if (ret < 0) { + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), + dev_state_off); + if (ret < 0) { + return ret; + } + + if (VFIO_DEVICE_STATE_IS_ERROR(device_state)) { + hw_error("%s: Device is in error state 0x%x", + vbasedev->name, device_state); + return -EFAULT; + } + } + + vbasedev->device_state = device_state; + trace_vfio_migration_set_state(vbasedev->name, device_state); + return 0; +} + +static void vfio_vmstate_change(void *opaque, int running, RunState state) +{ + VFIODevice *vbasedev = opaque; + + if ((vbasedev->vm_running != running)) { + int ret; + uint32_t value = 0, mask = 0; + + if (running) { + value = VFIO_DEVICE_STATE_RUNNING; + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { + mask = ~VFIO_DEVICE_STATE_RESUMING; + } + } else { + mask = ~VFIO_DEVICE_STATE_RUNNING; + } + + ret = vfio_migration_set_state(vbasedev, mask, value); + if (ret) { + /* + * vm_state_notify() doesn't support reporting failure. If such + * error reporting support added in furure, migration should be + * aborted. + */ + error_report("%s: Failed to set device state 0x%x", + vbasedev->name, value & mask); + } + vbasedev->vm_running = running; + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), + value & mask); + } +} + static int vfio_migration_init(VFIODevice *vbasedev, struct vfio_region_info *info) { @@ -87,8 +216,11 @@ static int vfio_migration_init(VFIODevice *vbasedev, vbasedev->name); g_free(vbasedev->migration); vbasedev->migration = NULL; + return ret; } + vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, + vbasedev); return ret; } @@ -131,6 +263,10 @@ add_blocker: void vfio_migration_finalize(VFIODevice *vbasedev) { + if (vbasedev->vm_state) { + qemu_del_vm_change_state_handler(vbasedev->vm_state); + } + if (vbasedev->migration_blocker) { migrate_del_blocker(vbasedev->migration_blocker); error_free(vbasedev->migration_blocker); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 8fe913175d85..6524734bf7b4 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -149,4 +149,5 @@ vfio_display_edid_write_error(void) "" # migration.c vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" - +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 8275c4c68f45..25e3b1a3b90a 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -29,6 +29,7 @@ #ifdef CONFIG_LINUX #include <linux/vfio.h> #endif +#include "sysemu/sysemu.h" #define VFIO_MSG_PREFIX "vfio %s: " @@ -119,6 +120,9 @@ typedef struct VFIODevice { unsigned int flags; VFIOMigration *migration; Error *migration_blocker; + VMChangeStateEntry *vm_state; + uint32_t device_state; + int vm_running; } VFIODevice; struct VFIODeviceOps {