Message ID | 20230307125450.62409-10-joao.m.martins@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio/migration: Device dirty page tracking | expand |
On 3/7/23 13:54, Joao Martins wrote: > According to the device DMA logging uAPI, IOVA ranges to be logged by > the device must be provided all at once upon DMA logging start. > > As preparation for the following patches which will add device dirty > page tracking, keep a record of all DMA mapped IOVA ranges so later they > can be used for DMA logging start. > > Signed-off-by: Avihai Horon <avihaih@nvidia.com> > Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > --- > hw/vfio/common.c | 85 ++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 1 + > 2 files changed, 86 insertions(+) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 63831eab78a1..811502dbc97c 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -1325,11 +1325,96 @@ static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) > return ret; > } > > +typedef struct VFIODirtyRanges { > + hwaddr min32; > + hwaddr max32; > + hwaddr min64; > + hwaddr max64; > +} VFIODirtyRanges; > + > +typedef struct VFIODirtyRangesListener { > + VFIOContainer *container; > + VFIODirtyRanges ranges; I would have introduced a pointer instead, to avoid the memcpy. Anyhow, this is minor. Reviewed-by: Cédric Le Goater <clg@redhat.com> Thanks, C. > + MemoryListener listener; > +} VFIODirtyRangesListener; > + > +static void vfio_dirty_tracking_update(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIODirtyRangesListener *dirty = container_of(listener, > + VFIODirtyRangesListener, > + listener); > + VFIODirtyRanges *range = &dirty->ranges; > + hwaddr iova, end, *min, *max; > + > + if (!vfio_listener_valid_section(section, "tracking_update") || > + !vfio_get_section_iova_range(dirty->container, section, > + &iova, &end, NULL)) { > + return; > + } > + > + /* > + * The address space passed to the dirty tracker is reduced to two ranges: > + * one for 32-bit DMA ranges, and another one for 64-bit DMA ranges. > + * The underlying reports of dirty will query a sub-interval of each of > + * these ranges. > + * > + * The purpose of the dual range handling is to handle known cases of big > + * holes in the address space, like the x86 AMD 1T hole. The alternative > + * would be an IOVATree but that has a much bigger runtime overhead and > + * unnecessary complexity. > + */ > + min = (end <= UINT32_MAX) ? &range->min32 : &range->min64; > + max = (end <= UINT32_MAX) ? &range->max32 : &range->max64; > + > + if (*min > iova) { > + *min = iova; > + } > + if (*max < end) { > + *max = end; > + } > + > + trace_vfio_device_dirty_tracking_update(iova, end, *min, *max); > + return; > +} > + > +static const MemoryListener vfio_dirty_tracking_listener = { > + .name = "vfio-tracking", > + .region_add = vfio_dirty_tracking_update, > +}; > + > +static void vfio_dirty_tracking_init(VFIOContainer *container, > + VFIODirtyRanges *ranges) > +{ > + VFIODirtyRangesListener dirty; > + > + memset(&dirty, 0, sizeof(dirty)); > + dirty.ranges.min32 = UINT32_MAX; > + dirty.ranges.min64 = UINT64_MAX; > + dirty.listener = vfio_dirty_tracking_listener; > + dirty.container = container; > + > + memory_listener_register(&dirty.listener, > + container->space->as); > + > + *ranges = dirty.ranges; > + > + /* > + * The memory listener is synchronous, and used to calculate the range > + * to dirty tracking. Unregister it after we are done as we are not > + * interested in any follow-up updates. > + */ > + memory_listener_unregister(&dirty.listener); > +} > + > static void vfio_listener_log_global_start(MemoryListener *listener) > { > VFIOContainer *container = container_of(listener, VFIOContainer, listener); > + VFIODirtyRanges ranges; > int ret; > > + vfio_dirty_tracking_init(container, &ranges); > + > ret = vfio_set_dirty_page_tracking(container, true); > if (ret) { > vfio_set_migration_error(ret); > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 7173e6a5c721..dd9fd7b9bddb 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -103,6 +103,7 @@ vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr > vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR > vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" > vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 > +vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]" > vfio_disconnect_container(int fd) "close container->fd=%d" > vfio_put_group(int fd) "close group->fd=%d" > vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 63831eab78a1..811502dbc97c 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1325,11 +1325,96 @@ static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) return ret; } +typedef struct VFIODirtyRanges { + hwaddr min32; + hwaddr max32; + hwaddr min64; + hwaddr max64; +} VFIODirtyRanges; + +typedef struct VFIODirtyRangesListener { + VFIOContainer *container; + VFIODirtyRanges ranges; + MemoryListener listener; +} VFIODirtyRangesListener; + +static void vfio_dirty_tracking_update(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIODirtyRangesListener *dirty = container_of(listener, + VFIODirtyRangesListener, + listener); + VFIODirtyRanges *range = &dirty->ranges; + hwaddr iova, end, *min, *max; + + if (!vfio_listener_valid_section(section, "tracking_update") || + !vfio_get_section_iova_range(dirty->container, section, + &iova, &end, NULL)) { + return; + } + + /* + * The address space passed to the dirty tracker is reduced to two ranges: + * one for 32-bit DMA ranges, and another one for 64-bit DMA ranges. + * The underlying reports of dirty will query a sub-interval of each of + * these ranges. + * + * The purpose of the dual range handling is to handle known cases of big + * holes in the address space, like the x86 AMD 1T hole. The alternative + * would be an IOVATree but that has a much bigger runtime overhead and + * unnecessary complexity. + */ + min = (end <= UINT32_MAX) ? &range->min32 : &range->min64; + max = (end <= UINT32_MAX) ? &range->max32 : &range->max64; + + if (*min > iova) { + *min = iova; + } + if (*max < end) { + *max = end; + } + + trace_vfio_device_dirty_tracking_update(iova, end, *min, *max); + return; +} + +static const MemoryListener vfio_dirty_tracking_listener = { + .name = "vfio-tracking", + .region_add = vfio_dirty_tracking_update, +}; + +static void vfio_dirty_tracking_init(VFIOContainer *container, + VFIODirtyRanges *ranges) +{ + VFIODirtyRangesListener dirty; + + memset(&dirty, 0, sizeof(dirty)); + dirty.ranges.min32 = UINT32_MAX; + dirty.ranges.min64 = UINT64_MAX; + dirty.listener = vfio_dirty_tracking_listener; + dirty.container = container; + + memory_listener_register(&dirty.listener, + container->space->as); + + *ranges = dirty.ranges; + + /* + * The memory listener is synchronous, and used to calculate the range + * to dirty tracking. Unregister it after we are done as we are not + * interested in any follow-up updates. + */ + memory_listener_unregister(&dirty.listener); +} + static void vfio_listener_log_global_start(MemoryListener *listener) { VFIOContainer *container = container_of(listener, VFIOContainer, listener); + VFIODirtyRanges ranges; int ret; + vfio_dirty_tracking_init(container, &ranges); + ret = vfio_set_dirty_page_tracking(container, true); if (ret) { vfio_set_migration_error(ret); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 7173e6a5c721..dd9fd7b9bddb 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -103,6 +103,7 @@ vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 +vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]" vfio_disconnect_container(int fd) "close container->fd=%d" vfio_put_group(int fd) "close group->fd=%d" vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"