Message ID | 1738161802-172631-12-git-send-email-steven.sistare@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Live update: vfio and iommufd | expand |
On 1/29/25 15:43, Steve Sistare wrote: > Do not reset a vfio-pci device during CPR, and do not complain if the > kernel's PCI config space changes for non-emulated bits between the > vmstate save and load, which can happen due to ongoing interrupt activity. > > Signed-off-by: Steve Sistare <steven.sistare@oracle.com> > --- > hw/vfio/pci.c | 37 +++++++++++++++++++++++++++++++++++++ > 1 file changed, 37 insertions(+) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 24ebd69..fa77c36 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -29,6 +29,8 @@ > #include "hw/pci/pci_bridge.h" > #include "hw/qdev-properties.h" > #include "hw/qdev-properties-system.h" > +#include "migration/misc.h" > +#include "migration/cpr.h" > #include "migration/vmstate.h" > #include "qapi/qmp/qdict.h" > #include "qemu/error-report.h" > @@ -3324,6 +3326,11 @@ static void vfio_pci_reset(DeviceState *dev) > { > VFIOPCIDevice *vdev = VFIO_PCI(dev); > > + /* Do not reset the device during qemu_system_reset prior to cpr load */ > + if (vdev->vbasedev.reused) { > + return; > + } > + sometime we use : MigMode mode = migrate_mode(); if (mode == MIG_MODE_CPR_TRANSFER) { return; } Why is this different ? This is confusing. Thanks, C. > trace_vfio_pci_reset(vdev->vbasedev.name); > > vfio_pci_pre_reset(vdev); > @@ -3447,6 +3454,35 @@ static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) > } > #endif > > +/* > + * The kernel may change non-emulated config bits. Exclude them from the > + * changed-bits check in get_pci_config_device. > + */ > +static int vfio_pci_pre_load(void *opaque) > +{ > + VFIOPCIDevice *vdev = opaque; > + PCIDevice *pdev = &vdev->pdev; > + int size = MIN(pci_config_size(pdev), vdev->config_size); > + int i; > + > + for (i = 0; i < size; i++) { > + pdev->cmask[i] &= vdev->emulated_config_bits[i]; > + } > + > + return 0; > +} > + > +static const VMStateDescription vfio_pci_vmstate = { > + .name = "vfio-pci", > + .version_id = 0, > + .minimum_version_id = 0, > + .pre_load = vfio_pci_pre_load, > + .needed = cpr_needed_for_reuse, > + .fields = (VMStateField[]) { > + VMSTATE_END_OF_LIST() > + } > +}; > + > static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) > { > DeviceClass *dc = DEVICE_CLASS(klass); > @@ -3457,6 +3493,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) > #ifdef CONFIG_IOMMUFD > object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); > #endif > + dc->vmsd = &vfio_pci_vmstate; > dc->desc = "VFIO-based PCI device assignment"; > set_bit(DEVICE_CATEGORY_MISC, dc->categories); > pdc->realize = vfio_realize;
On 2/4/2025 9:56 AM, Cédric Le Goater wrote: > On 1/29/25 15:43, Steve Sistare wrote: >> Do not reset a vfio-pci device during CPR, and do not complain if the >> kernel's PCI config space changes for non-emulated bits between the >> vmstate save and load, which can happen due to ongoing interrupt activity. >> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com> >> --- >> hw/vfio/pci.c | 37 +++++++++++++++++++++++++++++++++++++ >> 1 file changed, 37 insertions(+) >> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index 24ebd69..fa77c36 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -29,6 +29,8 @@ >> #include "hw/pci/pci_bridge.h" >> #include "hw/qdev-properties.h" >> #include "hw/qdev-properties-system.h" >> +#include "migration/misc.h" >> +#include "migration/cpr.h" >> #include "migration/vmstate.h" >> #include "qapi/qmp/qdict.h" >> #include "qemu/error-report.h" >> @@ -3324,6 +3326,11 @@ static void vfio_pci_reset(DeviceState *dev) >> { >> VFIOPCIDevice *vdev = VFIO_PCI(dev); >> + /* Do not reset the device during qemu_system_reset prior to cpr load */ >> + if (vdev->vbasedev.reused) { >> + return; >> + } >> + > > sometime we use : > > MigMode mode = migrate_mode(); > if (mode == MIG_MODE_CPR_TRANSFER) { > return; > } > > Why is this different ? This is confusing. I try to use local state -- object's knowledge about themselves -- whenever possible. Sometimes that is less desirable. For example, in pci_do_device_reset I test mode, rather than add a reused field to the generic PCIDevice, because the pci code would not use the reused field anywhere else. - Steve >> trace_vfio_pci_reset(vdev->vbasedev.name); >> vfio_pci_pre_reset(vdev); >> @@ -3447,6 +3454,35 @@ static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) >> } >> #endif >> +/* >> + * The kernel may change non-emulated config bits. Exclude them from the >> + * changed-bits check in get_pci_config_device. >> + */ >> +static int vfio_pci_pre_load(void *opaque) >> +{ >> + VFIOPCIDevice *vdev = opaque; >> + PCIDevice *pdev = &vdev->pdev; >> + int size = MIN(pci_config_size(pdev), vdev->config_size); >> + int i; >> + >> + for (i = 0; i < size; i++) { >> + pdev->cmask[i] &= vdev->emulated_config_bits[i]; >> + } >> + >> + return 0; >> +} >> + >> +static const VMStateDescription vfio_pci_vmstate = { >> + .name = "vfio-pci", >> + .version_id = 0, >> + .minimum_version_id = 0, >> + .pre_load = vfio_pci_pre_load, >> + .needed = cpr_needed_for_reuse, >> + .fields = (VMStateField[]) { >> + VMSTATE_END_OF_LIST() >> + } >> +}; >> + >> static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) >> { >> DeviceClass *dc = DEVICE_CLASS(klass); >> @@ -3457,6 +3493,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) >> #ifdef CONFIG_IOMMUFD >> object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); >> #endif >> + dc->vmsd = &vfio_pci_vmstate; >> dc->desc = "VFIO-based PCI device assignment"; >> set_bit(DEVICE_CATEGORY_MISC, dc->categories); >> pdc->realize = vfio_realize; >
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 24ebd69..fa77c36 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -29,6 +29,8 @@ #include "hw/pci/pci_bridge.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "migration/misc.h" +#include "migration/cpr.h" #include "migration/vmstate.h" #include "qapi/qmp/qdict.h" #include "qemu/error-report.h" @@ -3324,6 +3326,11 @@ static void vfio_pci_reset(DeviceState *dev) { VFIOPCIDevice *vdev = VFIO_PCI(dev); + /* Do not reset the device during qemu_system_reset prior to cpr load */ + if (vdev->vbasedev.reused) { + return; + } + trace_vfio_pci_reset(vdev->vbasedev.name); vfio_pci_pre_reset(vdev); @@ -3447,6 +3454,35 @@ static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) } #endif +/* + * The kernel may change non-emulated config bits. Exclude them from the + * changed-bits check in get_pci_config_device. + */ +static int vfio_pci_pre_load(void *opaque) +{ + VFIOPCIDevice *vdev = opaque; + PCIDevice *pdev = &vdev->pdev; + int size = MIN(pci_config_size(pdev), vdev->config_size); + int i; + + for (i = 0; i < size; i++) { + pdev->cmask[i] &= vdev->emulated_config_bits[i]; + } + + return 0; +} + +static const VMStateDescription vfio_pci_vmstate = { + .name = "vfio-pci", + .version_id = 0, + .minimum_version_id = 0, + .pre_load = vfio_pci_pre_load, + .needed = cpr_needed_for_reuse, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -3457,6 +3493,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) #ifdef CONFIG_IOMMUFD object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); #endif + dc->vmsd = &vfio_pci_vmstate; dc->desc = "VFIO-based PCI device assignment"; set_bit(DEVICE_CATEGORY_MISC, dc->categories); pdc->realize = vfio_realize;
Do not reset a vfio-pci device during CPR, and do not complain if the kernel's PCI config space changes for non-emulated bits between the vmstate save and load, which can happen due to ongoing interrupt activity. Signed-off-by: Steve Sistare <steven.sistare@oracle.com> --- hw/vfio/pci.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+)