Message ID | e3bff515a8d61c582b94b409eb12a45b1a143a69.1741124640.git.maciej.szmigiero@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Multifd | expand |
On 3/4/25 23:03, Maciej S. Szmigiero wrote: > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > The multifd received data needs to be reassembled since device state > packets sent via different multifd channels can arrive out-of-order. > > Therefore, each VFIO device state packet carries a header indicating its > position in the stream. > The raw device state data is saved into a VFIOStateBuffer for later > in-order loading into the device. > > The last such VFIO device state packet should have > VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state. > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> Reviewed-by: Cédric Le Goater <clg@redhat.com> Thanks, C. > --- > hw/vfio/migration-multifd.c | 163 ++++++++++++++++++++++++++++++++++++ > hw/vfio/migration-multifd.h | 3 + > hw/vfio/migration.c | 1 + > hw/vfio/trace-events | 1 + > 4 files changed, 168 insertions(+) > > diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c > index 091dc43210ad..79df11b7baa9 100644 > --- a/hw/vfio/migration-multifd.c > +++ b/hw/vfio/migration-multifd.c > @@ -32,18 +32,181 @@ typedef struct VFIODeviceStatePacket { > uint8_t data[0]; > } QEMU_PACKED VFIODeviceStatePacket; > > +/* type safety */ > +typedef struct VFIOStateBuffers { > + GArray *array; > +} VFIOStateBuffers; > + > +typedef struct VFIOStateBuffer { > + bool is_present; > + char *data; > + size_t len; > +} VFIOStateBuffer; > + > typedef struct VFIOMultifd { > + VFIOStateBuffers load_bufs; > + QemuCond load_bufs_buffer_ready_cond; > + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ > + uint32_t load_buf_idx; > + uint32_t load_buf_idx_last; > } VFIOMultifd; > > +static void vfio_state_buffer_clear(gpointer data) > +{ > + VFIOStateBuffer *lb = data; > + > + if (!lb->is_present) { > + return; > + } > + > + g_clear_pointer(&lb->data, g_free); > + lb->is_present = false; > +} > + > +static void vfio_state_buffers_init(VFIOStateBuffers *bufs) > +{ > + bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer)); > + g_array_set_clear_func(bufs->array, vfio_state_buffer_clear); > +} > + > +static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs) > +{ > + g_clear_pointer(&bufs->array, g_array_unref); > +} > + > +static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs) > +{ > + assert(bufs->array); > +} > + > +static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs) > +{ > + return bufs->array->len; > +} > + > +static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs, > + unsigned int size) > +{ > + g_array_set_size(bufs->array, size); > +} > + > +static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, > + unsigned int idx) > +{ > + return &g_array_index(bufs->array, VFIOStateBuffer, idx); > +} > + > +/* called with load_bufs_mutex locked */ > +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, > + VFIODeviceStatePacket *packet, > + size_t packet_total_size, > + Error **errp) > +{ > + VFIOMigration *migration = vbasedev->migration; > + VFIOMultifd *multifd = migration->multifd; > + VFIOStateBuffer *lb; > + > + vfio_state_buffers_assert_init(&multifd->load_bufs); > + if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { > + vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1); > + } > + > + lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx); > + if (lb->is_present) { > + error_setg(errp, "%s: state buffer %" PRIu32 " already filled", > + vbasedev->name, packet->idx); > + return false; > + } > + > + assert(packet->idx >= multifd->load_buf_idx); > + > + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); > + lb->len = packet_total_size - sizeof(*packet); > + lb->is_present = true; > + > + return true; > +} > + > +bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, > + Error **errp) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + VFIOMultifd *multifd = migration->multifd; > + VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data; > + > + if (!vfio_multifd_transfer_enabled(vbasedev)) { > + error_setg(errp, > + "%s: got device state packet but not doing multifd transfer", > + vbasedev->name); > + return false; > + } > + > + assert(multifd); > + > + if (data_size < sizeof(*packet)) { > + error_setg(errp, "%s: packet too short at %zu (min is %zu)", > + vbasedev->name, data_size, sizeof(*packet)); > + return false; > + } > + > + if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { > + error_setg(errp, "%s: packet has unknown version %" PRIu32, > + vbasedev->name, packet->version); > + return false; > + } > + > + if (packet->idx == UINT32_MAX) { > + error_setg(errp, "%s: packet index is invalid", vbasedev->name); > + return false; > + } > + > + trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx); > + > + /* > + * Holding BQL here would violate the lock order and can cause > + * a deadlock once we attempt to lock load_bufs_mutex below. > + */ > + assert(!bql_locked()); > + > + WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) { > + /* config state packet should be the last one in the stream */ > + if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) { > + multifd->load_buf_idx_last = packet->idx; > + } > + > + if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size, > + errp)) { > + return false; > + } > + > + qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); > + } > + > + return true; > +} > + > static VFIOMultifd *vfio_multifd_new(void) > { > VFIOMultifd *multifd = g_new(VFIOMultifd, 1); > > + vfio_state_buffers_init(&multifd->load_bufs); > + > + qemu_mutex_init(&multifd->load_bufs_mutex); > + > + multifd->load_buf_idx = 0; > + multifd->load_buf_idx_last = UINT32_MAX; > + qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); > + > return multifd; > } > > static void vfio_multifd_free(VFIOMultifd *multifd) > { > + vfio_state_buffers_destroy(&multifd->load_bufs); > + qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); > + qemu_mutex_destroy(&multifd->load_bufs_mutex); > + > g_free(multifd); > } > > diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h > index 2a7a76164f29..8c6320fcb484 100644 > --- a/hw/vfio/migration-multifd.h > +++ b/hw/vfio/migration-multifd.h > @@ -20,4 +20,7 @@ void vfio_multifd_cleanup(VFIODevice *vbasedev); > bool vfio_multifd_transfer_supported(void); > bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); > > +bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, > + Error **errp); > + > #endif > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index 3c8286ae6230..ecc4ee940567 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -801,6 +801,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { > .load_setup = vfio_load_setup, > .load_cleanup = vfio_load_cleanup, > .load_state = vfio_load_state, > + .load_state_buffer = vfio_multifd_load_state_buffer, > .switchover_ack_needed = vfio_switchover_ack_needed, > }; > > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index a02c668f28a4..404ea079b25c 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -154,6 +154,7 @@ vfio_load_device_config_state_start(const char *name) " (%s)" > vfio_load_device_config_state_end(const char *name) " (%s)" > vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 > vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d" > +vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32 > vfio_migration_realize(const char *name) " (%s)" > vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s" > vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s" >
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c index 091dc43210ad..79df11b7baa9 100644 --- a/hw/vfio/migration-multifd.c +++ b/hw/vfio/migration-multifd.c @@ -32,18 +32,181 @@ typedef struct VFIODeviceStatePacket { uint8_t data[0]; } QEMU_PACKED VFIODeviceStatePacket; +/* type safety */ +typedef struct VFIOStateBuffers { + GArray *array; +} VFIOStateBuffers; + +typedef struct VFIOStateBuffer { + bool is_present; + char *data; + size_t len; +} VFIOStateBuffer; + typedef struct VFIOMultifd { + VFIOStateBuffers load_bufs; + QemuCond load_bufs_buffer_ready_cond; + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ + uint32_t load_buf_idx; + uint32_t load_buf_idx_last; } VFIOMultifd; +static void vfio_state_buffer_clear(gpointer data) +{ + VFIOStateBuffer *lb = data; + + if (!lb->is_present) { + return; + } + + g_clear_pointer(&lb->data, g_free); + lb->is_present = false; +} + +static void vfio_state_buffers_init(VFIOStateBuffers *bufs) +{ + bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer)); + g_array_set_clear_func(bufs->array, vfio_state_buffer_clear); +} + +static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs) +{ + g_clear_pointer(&bufs->array, g_array_unref); +} + +static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs) +{ + assert(bufs->array); +} + +static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs) +{ + return bufs->array->len; +} + +static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs, + unsigned int size) +{ + g_array_set_size(bufs->array, size); +} + +static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, + unsigned int idx) +{ + return &g_array_index(bufs->array, VFIOStateBuffer, idx); +} + +/* called with load_bufs_mutex locked */ +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, + VFIODeviceStatePacket *packet, + size_t packet_total_size, + Error **errp) +{ + VFIOMigration *migration = vbasedev->migration; + VFIOMultifd *multifd = migration->multifd; + VFIOStateBuffer *lb; + + vfio_state_buffers_assert_init(&multifd->load_bufs); + if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { + vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1); + } + + lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx); + if (lb->is_present) { + error_setg(errp, "%s: state buffer %" PRIu32 " already filled", + vbasedev->name, packet->idx); + return false; + } + + assert(packet->idx >= multifd->load_buf_idx); + + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); + lb->len = packet_total_size - sizeof(*packet); + lb->is_present = true; + + return true; +} + +bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, + Error **errp) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + VFIOMultifd *multifd = migration->multifd; + VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data; + + if (!vfio_multifd_transfer_enabled(vbasedev)) { + error_setg(errp, + "%s: got device state packet but not doing multifd transfer", + vbasedev->name); + return false; + } + + assert(multifd); + + if (data_size < sizeof(*packet)) { + error_setg(errp, "%s: packet too short at %zu (min is %zu)", + vbasedev->name, data_size, sizeof(*packet)); + return false; + } + + if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { + error_setg(errp, "%s: packet has unknown version %" PRIu32, + vbasedev->name, packet->version); + return false; + } + + if (packet->idx == UINT32_MAX) { + error_setg(errp, "%s: packet index is invalid", vbasedev->name); + return false; + } + + trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx); + + /* + * Holding BQL here would violate the lock order and can cause + * a deadlock once we attempt to lock load_bufs_mutex below. + */ + assert(!bql_locked()); + + WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) { + /* config state packet should be the last one in the stream */ + if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) { + multifd->load_buf_idx_last = packet->idx; + } + + if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size, + errp)) { + return false; + } + + qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); + } + + return true; +} + static VFIOMultifd *vfio_multifd_new(void) { VFIOMultifd *multifd = g_new(VFIOMultifd, 1); + vfio_state_buffers_init(&multifd->load_bufs); + + qemu_mutex_init(&multifd->load_bufs_mutex); + + multifd->load_buf_idx = 0; + multifd->load_buf_idx_last = UINT32_MAX; + qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); + return multifd; } static void vfio_multifd_free(VFIOMultifd *multifd) { + vfio_state_buffers_destroy(&multifd->load_bufs); + qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); + qemu_mutex_destroy(&multifd->load_bufs_mutex); + g_free(multifd); } diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h index 2a7a76164f29..8c6320fcb484 100644 --- a/hw/vfio/migration-multifd.h +++ b/hw/vfio/migration-multifd.h @@ -20,4 +20,7 @@ void vfio_multifd_cleanup(VFIODevice *vbasedev); bool vfio_multifd_transfer_supported(void); bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); +bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, + Error **errp); + #endif diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 3c8286ae6230..ecc4ee940567 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -801,6 +801,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { .load_setup = vfio_load_setup, .load_cleanup = vfio_load_cleanup, .load_state = vfio_load_state, + .load_state_buffer = vfio_multifd_load_state_buffer, .switchover_ack_needed = vfio_switchover_ack_needed, }; diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index a02c668f28a4..404ea079b25c 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -154,6 +154,7 @@ vfio_load_device_config_state_start(const char *name) " (%s)" vfio_load_device_config_state_end(const char *name) " (%s)" vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d" +vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32 vfio_migration_realize(const char *name) " (%s)" vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s" vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s"