Message ID | 1573578324-8389-9-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add migration support for VFIO devices | expand |
On Tue, 12 Nov 2019 22:35:17 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > VM state change handler gets called on change in VM's state. This is used to set > VFIO device state to _RUNNING. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/migration.c | 69 +++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 2 ++ > include/hw/vfio/vfio-common.h | 4 +++ > 3 files changed, 75 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index c17bd1b0b934..28981a759e6c 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -10,6 +10,7 @@ > #include "qemu/osdep.h" > #include <linux/vfio.h> > > +#include "sysemu/runstate.h" > #include "hw/vfio/vfio-common.h" > #include "cpu.h" > #include "migration/migration.h" > @@ -74,6 +75,67 @@ err: > return ret; > } > > +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t set_flags, > + uint32_t clear_flags) > +{ Perhaps a mask and value interface like we have elsewhere? > + VFIOMigration *migration = vbasedev->migration; > + VFIORegion *region = &migration->region; > + uint32_t device_state; > + int ret = 0; > + > + /* same flags should not be set or clear */ > + assert(!(set_flags & clear_flags)); mask/value avoids this sort of thing. > + device_state = (vbasedev->device_state | set_flags) & ~clear_flags; Don't we need to re-read device_state from the region? We can't predict what those reserved bits will be used for, they could be volatile. If we adopt that a reset returns to running, our cached state may be stale. > + > + switch (device_state & VFIO_DEVICE_STATE_MASK) { > + case VFIO_DEVICE_STATE_INVALID_CASE1: > + case VFIO_DEVICE_STATE_INVALID_CASE2: > + return -EINVAL; > + } I like the VALID macro better. > + > + ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state), > + region->fd_offset + offsetof(struct vfio_device_migration_info, > + device_state)); > + if (ret < 0) { > + error_report("%s: Failed to set device state %d %s", > + vbasedev->name, ret, strerror(errno)); > + return ret; > + } > + > + vbasedev->device_state = device_state; Are we opposed to re-reading device_state, here and in the error case above? > + trace_vfio_migration_set_state(vbasedev->name, device_state); > + return 0; > +} > + > +static void vfio_vmstate_change(void *opaque, int running, RunState state) > +{ > + VFIODevice *vbasedev = opaque; > + > + if ((vbasedev->vm_running != running)) { > + int ret; > + uint32_t set_flags = 0, clear_flags = 0; > + > + if (running) { > + set_flags = VFIO_DEVICE_STATE_RUNNING; > + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { > + clear_flags = VFIO_DEVICE_STATE_RESUMING; > + } > + } else { > + clear_flags = VFIO_DEVICE_STATE_RUNNING; > + } > + > + ret = vfio_migration_set_state(vbasedev, set_flags, clear_flags); > + if (ret) { > + error_report("%s: Failed to set device state 0x%x", > + vbasedev->name, set_flags & ~clear_flags); > + } > + vbasedev->vm_running = running; We're effectively storing running both in vbasedev->device_state and vbasedev->vm_running, why? Seems like this could trivially know the initial state of the device is running. > + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), > + set_flags & ~clear_flags); > + } > +} > + > static int vfio_migration_init(VFIODevice *vbasedev, > struct vfio_region_info *info) > { > @@ -89,6 +151,9 @@ static int vfio_migration_init(VFIODevice *vbasedev, > return ret; > } > > + vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > + vbasedev); > + > return 0; > } > > @@ -127,6 +192,10 @@ add_blocker: > > void vfio_migration_finalize(VFIODevice *vbasedev) > { > + if (vbasedev->vm_state) { > + qemu_del_vm_change_state_handler(vbasedev->vm_state); > + } > + > if (vbasedev->migration_blocker) { > migrate_del_blocker(vbasedev->migration_blocker); > error_free(vbasedev->migration_blocker); > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 191a726a1312..3d15bacd031a 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) "" > > # migration.c > vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d" > +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" > +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 927511897a44..6573acd6738e 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -29,6 +29,7 @@ > #ifdef CONFIG_LINUX > #include <linux/vfio.h> > #endif > +#include "sysemu/sysemu.h" > > #define VFIO_MSG_PREFIX "vfio %s: " > > @@ -120,6 +121,9 @@ typedef struct VFIODevice { > unsigned int flags; > VFIOMigration *migration; > Error *migration_blocker; > + uint32_t device_state; > + VMChangeStateEntry *vm_state; > + int vm_running; Isn't this effectively a bool per our usage. Field ordering is wasteful. > } VFIODevice; > > struct VFIODeviceOps {
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index c17bd1b0b934..28981a759e6c 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include <linux/vfio.h> +#include "sysemu/runstate.h" #include "hw/vfio/vfio-common.h" #include "cpu.h" #include "migration/migration.h" @@ -74,6 +75,67 @@ err: return ret; } +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t set_flags, + uint32_t clear_flags) +{ + VFIOMigration *migration = vbasedev->migration; + VFIORegion *region = &migration->region; + uint32_t device_state; + int ret = 0; + + /* same flags should not be set or clear */ + assert(!(set_flags & clear_flags)); + + device_state = (vbasedev->device_state | set_flags) & ~clear_flags; + + switch (device_state & VFIO_DEVICE_STATE_MASK) { + case VFIO_DEVICE_STATE_INVALID_CASE1: + case VFIO_DEVICE_STATE_INVALID_CASE2: + return -EINVAL; + } + + ret = pwrite(vbasedev->fd, &device_state, sizeof(device_state), + region->fd_offset + offsetof(struct vfio_device_migration_info, + device_state)); + if (ret < 0) { + error_report("%s: Failed to set device state %d %s", + vbasedev->name, ret, strerror(errno)); + return ret; + } + + vbasedev->device_state = device_state; + trace_vfio_migration_set_state(vbasedev->name, device_state); + return 0; +} + +static void vfio_vmstate_change(void *opaque, int running, RunState state) +{ + VFIODevice *vbasedev = opaque; + + if ((vbasedev->vm_running != running)) { + int ret; + uint32_t set_flags = 0, clear_flags = 0; + + if (running) { + set_flags = VFIO_DEVICE_STATE_RUNNING; + if (vbasedev->device_state & VFIO_DEVICE_STATE_RESUMING) { + clear_flags = VFIO_DEVICE_STATE_RESUMING; + } + } else { + clear_flags = VFIO_DEVICE_STATE_RUNNING; + } + + ret = vfio_migration_set_state(vbasedev, set_flags, clear_flags); + if (ret) { + error_report("%s: Failed to set device state 0x%x", + vbasedev->name, set_flags & ~clear_flags); + } + vbasedev->vm_running = running; + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), + set_flags & ~clear_flags); + } +} + static int vfio_migration_init(VFIODevice *vbasedev, struct vfio_region_info *info) { @@ -89,6 +151,9 @@ static int vfio_migration_init(VFIODevice *vbasedev, return ret; } + vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, + vbasedev); + return 0; } @@ -127,6 +192,10 @@ add_blocker: void vfio_migration_finalize(VFIODevice *vbasedev) { + if (vbasedev->vm_state) { + qemu_del_vm_change_state_handler(vbasedev->vm_state); + } + if (vbasedev->migration_blocker) { migrate_del_blocker(vbasedev->migration_blocker); error_free(vbasedev->migration_blocker); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 191a726a1312..3d15bacd031a 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) "" # migration.c vfio_migration_probe(char *name, uint32_t index) " (%s) Region %d" +vfio_migration_set_state(char *name, uint32_t state) " (%s) state %d" +vfio_vmstate_change(char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 927511897a44..6573acd6738e 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -29,6 +29,7 @@ #ifdef CONFIG_LINUX #include <linux/vfio.h> #endif +#include "sysemu/sysemu.h" #define VFIO_MSG_PREFIX "vfio %s: " @@ -120,6 +121,9 @@ typedef struct VFIODevice { unsigned int flags; VFIOMigration *migration; Error *migration_blocker; + uint32_t device_state; + VMChangeStateEntry *vm_state; + int vm_running; } VFIODevice; struct VFIODeviceOps {