Message ID | 09463235e0aa30e48e40bd6c89d07f56f4140a93.1741124640.git.maciej.szmigiero@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Multifd | expand |
On 3/4/25 23:04, Maciej S. Szmigiero wrote: > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > Allow capping the maximum count of in-flight VFIO device state buffers > queued at the destination, otherwise a malicious QEMU source could > theoretically cause the target QEMU to allocate unlimited amounts of memory > for buffers-in-flight. > > Since this is not expected to be a realistic threat in most of VFIO live > migration use cases and the right value depends on the particular setup > disable the limit by default by setting it to UINT64_MAX. I agree with Avihai that a limit on bytes would make more sense. -rc0 is in ~2w. We have time to prepare a patch for this. Should there be a correlation with : /* * This is an arbitrary size based on migration of mlx5 devices, where typically * total device migration size is on the order of 100s of MB. Testing with * larger values, e.g. 128MB and 1GB, did not show a performance improvement. */ #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) Thanks, C. > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > hw/vfio/migration-multifd.c | 16 ++++++++++++++++ > hw/vfio/pci.c | 9 +++++++++ > include/hw/vfio/vfio-common.h | 1 + > 3 files changed, 26 insertions(+) > > diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c > index 233724710b37..d6dabaf869ca 100644 > --- a/hw/vfio/migration-multifd.c > +++ b/hw/vfio/migration-multifd.c > @@ -54,6 +54,7 @@ typedef struct VFIOMultifd { > QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ > uint32_t load_buf_idx; > uint32_t load_buf_idx_last; > + uint32_t load_buf_queued_pending_buffers; > } VFIOMultifd; > > static void vfio_state_buffer_clear(gpointer data) > @@ -125,6 +126,17 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, > > assert(packet->idx >= multifd->load_buf_idx); > > + multifd->load_buf_queued_pending_buffers++; > + if (multifd->load_buf_queued_pending_buffers > > + vbasedev->migration_max_queued_buffers) { > + error_setg(errp, > + "%s: queuing state buffer %" PRIu32 > + " would exceed the max of %" PRIu64, > + vbasedev->name, packet->idx, > + vbasedev->migration_max_queued_buffers); > + return false; > + } > + > lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); > lb->len = packet_total_size - sizeof(*packet); > lb->is_present = true; > @@ -381,6 +393,9 @@ static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp) > goto thread_exit; > } > > + assert(multifd->load_buf_queued_pending_buffers > 0); > + multifd->load_buf_queued_pending_buffers--; > + > if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) { > trace_vfio_load_state_device_buffer_end(vbasedev->name); > } > @@ -417,6 +432,7 @@ static VFIOMultifd *vfio_multifd_new(void) > > multifd->load_buf_idx = 0; > multifd->load_buf_idx_last = UINT32_MAX; > + multifd->load_buf_queued_pending_buffers = 0; > qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); > > multifd->load_bufs_thread_running = false; > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 21605bac2fb0..ce407f971000 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -3383,6 +3383,8 @@ static const Property vfio_pci_dev_properties[] = { > vbasedev.migration_multifd_transfer, > vfio_pci_migration_multifd_transfer_prop, OnOffAuto, > .set_default = true, .defval.i = ON_OFF_AUTO_AUTO), > + DEFINE_PROP_UINT64("x-migration-max-queued-buffers", VFIOPCIDevice, > + vbasedev.migration_max_queued_buffers, UINT64_MAX), > DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, > vbasedev.migration_events, false), > DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), > @@ -3444,6 +3446,13 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) > "x-migration-multifd-transfer", > "Transfer this device state via " > "multifd channels when live migrating it"); > + object_class_property_set_description(klass, /* 10.0 */ > + "x-migration-max-queued-buffers", > + "Maximum count of in-flight VFIO " > + "device state buffers queued at the " > + "destination when doing live " > + "migration of device state via " > + "multifd channels"); > } > > static const TypeInfo vfio_pci_dev_info = { > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 04b123a6c929..c033c3c5134f 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -155,6 +155,7 @@ typedef struct VFIODevice { > bool ram_block_discard_allowed; > OnOffAuto enable_migration; > OnOffAuto migration_multifd_transfer; > + uint64_t migration_max_queued_buffers; > bool migration_events; > VFIODeviceOps *ops; > unsigned int num_irqs; >
On 5.03.2025 10:19, Cédric Le Goater wrote: > On 3/4/25 23:04, Maciej S. Szmigiero wrote: >> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >> >> Allow capping the maximum count of in-flight VFIO device state buffers >> queued at the destination, otherwise a malicious QEMU source could >> theoretically cause the target QEMU to allocate unlimited amounts of memory >> for buffers-in-flight. >> >> Since this is not expected to be a realistic threat in most of VFIO live >> migration use cases and the right value depends on the particular setup >> disable the limit by default by setting it to UINT64_MAX. > > I agree with Avihai that a limit on bytes would make more sense. > -rc0 is in ~2w. We have time to prepare a patch for this. According to https://wiki.qemu.org/Planning/10.0 "Soft feature freeze" is next Tuesday. Do you still want to have that patch with a new byte limit applied after that? > > Should there be a correlation with : > > /* > * This is an arbitrary size based on migration of mlx5 devices, where typically > * total device migration size is on the order of 100s of MB. Testing with > * larger values, e.g. 128MB and 1GB, did not show a performance improvement. > */ > #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) I think we could simply have a counter of queued bytes up to this point and then abort/error out if the set amount of bytes is exceeded. > Thanks, > > C. Thanks, Maciej
On 3/5/25 16:11, Maciej S. Szmigiero wrote: > On 5.03.2025 10:19, Cédric Le Goater wrote: >> On 3/4/25 23:04, Maciej S. Szmigiero wrote: >>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>> >>> Allow capping the maximum count of in-flight VFIO device state buffers >>> queued at the destination, otherwise a malicious QEMU source could >>> theoretically cause the target QEMU to allocate unlimited amounts of memory >>> for buffers-in-flight. >>> >>> Since this is not expected to be a realistic threat in most of VFIO live >>> migration use cases and the right value depends on the particular setup >>> disable the limit by default by setting it to UINT64_MAX. >> >> I agree with Avihai that a limit on bytes would make more sense. >> -rc0 is in ~2w. We have time to prepare a patch for this. > > According to https://wiki.qemu.org/Planning/10.0 "Soft feature freeze" > is next Tuesday. > > Do you still want to have that patch with a new byte limit applied > after that? yes. It has been discussed and we can still merge stuff until the hard freeze. After that, it's fixes only. Thanks, C. > >> >> Should there be a correlation with : >> >> /* >> * This is an arbitrary size based on migration of mlx5 devices, where typically >> * total device migration size is on the order of 100s of MB. Testing with >> * larger values, e.g. 128MB and 1GB, did not show a performance improvement. >> */ >> #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) > > I think we could simply have a counter of queued bytes up to this point > and then abort/error out if the set amount of bytes is exceeded. > >> Thanks, >> >> C. > > Thanks, > Maciej >
On 5.03.2025 17:39, Cédric Le Goater wrote: > On 3/5/25 16:11, Maciej S. Szmigiero wrote: >> On 5.03.2025 10:19, Cédric Le Goater wrote: >>> On 3/4/25 23:04, Maciej S. Szmigiero wrote: >>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>> >>>> Allow capping the maximum count of in-flight VFIO device state buffers >>>> queued at the destination, otherwise a malicious QEMU source could >>>> theoretically cause the target QEMU to allocate unlimited amounts of memory >>>> for buffers-in-flight. >>>> >>>> Since this is not expected to be a realistic threat in most of VFIO live >>>> migration use cases and the right value depends on the particular setup >>>> disable the limit by default by setting it to UINT64_MAX. >>> >>> I agree with Avihai that a limit on bytes would make more sense. >>> -rc0 is in ~2w. We have time to prepare a patch for this. >> >> According to https://wiki.qemu.org/Planning/10.0 "Soft feature freeze" >> is next Tuesday. >> >> Do you still want to have that patch with a new byte limit applied >> after that? > > yes. It has been discussed and we can still merge stuff until the > hard freeze. After that, it's fixes only. All right, I can/will prepare such a patch then after we're done with the discussion on the existing/basic patch set. > Thanks, > > C. > Thanks, Maciej
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c index 233724710b37..d6dabaf869ca 100644 --- a/hw/vfio/migration-multifd.c +++ b/hw/vfio/migration-multifd.c @@ -54,6 +54,7 @@ typedef struct VFIOMultifd { QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ uint32_t load_buf_idx; uint32_t load_buf_idx_last; + uint32_t load_buf_queued_pending_buffers; } VFIOMultifd; static void vfio_state_buffer_clear(gpointer data) @@ -125,6 +126,17 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, assert(packet->idx >= multifd->load_buf_idx); + multifd->load_buf_queued_pending_buffers++; + if (multifd->load_buf_queued_pending_buffers > + vbasedev->migration_max_queued_buffers) { + error_setg(errp, + "%s: queuing state buffer %" PRIu32 + " would exceed the max of %" PRIu64, + vbasedev->name, packet->idx, + vbasedev->migration_max_queued_buffers); + return false; + } + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); lb->len = packet_total_size - sizeof(*packet); lb->is_present = true; @@ -381,6 +393,9 @@ static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp) goto thread_exit; } + assert(multifd->load_buf_queued_pending_buffers > 0); + multifd->load_buf_queued_pending_buffers--; + if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) { trace_vfio_load_state_device_buffer_end(vbasedev->name); } @@ -417,6 +432,7 @@ static VFIOMultifd *vfio_multifd_new(void) multifd->load_buf_idx = 0; multifd->load_buf_idx_last = UINT32_MAX; + multifd->load_buf_queued_pending_buffers = 0; qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); multifd->load_bufs_thread_running = false; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 21605bac2fb0..ce407f971000 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3383,6 +3383,8 @@ static const Property vfio_pci_dev_properties[] = { vbasedev.migration_multifd_transfer, vfio_pci_migration_multifd_transfer_prop, OnOffAuto, .set_default = true, .defval.i = ON_OFF_AUTO_AUTO), + DEFINE_PROP_UINT64("x-migration-max-queued-buffers", VFIOPCIDevice, + vbasedev.migration_max_queued_buffers, UINT64_MAX), DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, vbasedev.migration_events, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), @@ -3444,6 +3446,13 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) "x-migration-multifd-transfer", "Transfer this device state via " "multifd channels when live migrating it"); + object_class_property_set_description(klass, /* 10.0 */ + "x-migration-max-queued-buffers", + "Maximum count of in-flight VFIO " + "device state buffers queued at the " + "destination when doing live " + "migration of device state via " + "multifd channels"); } static const TypeInfo vfio_pci_dev_info = { diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 04b123a6c929..c033c3c5134f 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -155,6 +155,7 @@ typedef struct VFIODevice { bool ram_block_discard_allowed; OnOffAuto enable_migration; OnOffAuto migration_multifd_transfer; + uint64_t migration_max_queued_buffers; bool migration_events; VFIODeviceOps *ops; unsigned int num_irqs;