Message ID | 20230126184948.10478-9-avihaih@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio: Add migration pre-copy support and device dirty tracking | expand |
On Thu, 26 Jan 2023 20:49:38 +0200 Avihai Horon <avihaih@nvidia.com> wrote: > From: Joao Martins <joao.m.martins@oracle.com> > > According to the device DMA logging uAPI, IOVA ranges to be logged by > the device must be provided all at once upon DMA logging start. > > As preparation for the following patches which will add device dirty > page tracking, keep a record of all DMA mapped IOVA ranges so later they > can be used for DMA logging start. > > Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked. > This is due to the dynamic nature of vIOMMU DMA mapping/unmapping. > Following patches will address the vIOMMU case specifically. > > Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > Signed-off-by: Avihai Horon <avihaih@nvidia.com> > --- > include/hw/vfio/vfio-common.h | 3 ++ > hw/vfio/common.c | 86 +++++++++++++++++++++++++++++++++-- > 2 files changed, 86 insertions(+), 3 deletions(-) > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 88c2194fb9..d54000d7ae 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -23,6 +23,7 @@ > > #include "exec/memory.h" > #include "qemu/queue.h" > +#include "qemu/iova-tree.h" > #include "qemu/notify.h" > #include "ui/console.h" > #include "hw/display/ramfb.h" > @@ -94,6 +95,8 @@ typedef struct VFIOContainer { > uint64_t max_dirty_bitmap_size; > unsigned long pgsizes; > unsigned int dma_max_mappings; > + IOVATree *mappings; > + QemuMutex mappings_mutex; > QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; > QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; > QLIST_HEAD(, VFIOGroup) group_list; > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index e554573eb5..fafc361cea 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -43,6 +43,7 @@ > #include "migration/misc.h" > #include "migration/qemu-file.h" > #include "sysemu/tpm.h" > +#include "qemu/iova-tree.h" > > VFIOGroupList vfio_group_list = > QLIST_HEAD_INITIALIZER(vfio_group_list); > @@ -373,6 +374,11 @@ bool vfio_mig_active(void) > return true; > } > > +static bool vfio_have_giommu(VFIOContainer *container) > +{ > + return !QLIST_EMPTY(&container->giommu_list); > +} > + > static void vfio_set_migration_error(int err) > { > MigrationState *ms = migrate_get_current(); > @@ -450,6 +456,51 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) > return true; > } > > +static int vfio_record_mapping(VFIOContainer *container, hwaddr iova, > + hwaddr size, bool readonly) > +{ > + DMAMap map = { > + .iova = iova, > + .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */ <facepalm>
On 27/01/2023 23:42, Alex Williamson wrote: > External email: Use caution opening links or attachments > > > On Thu, 26 Jan 2023 20:49:38 +0200 > Avihai Horon <avihaih@nvidia.com> wrote: > >> From: Joao Martins <joao.m.martins@oracle.com> >> >> According to the device DMA logging uAPI, IOVA ranges to be logged by >> the device must be provided all at once upon DMA logging start. >> >> As preparation for the following patches which will add device dirty >> page tracking, keep a record of all DMA mapped IOVA ranges so later they >> can be used for DMA logging start. >> >> Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked. >> This is due to the dynamic nature of vIOMMU DMA mapping/unmapping. >> Following patches will address the vIOMMU case specifically. >> >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> >> Signed-off-by: Avihai Horon <avihaih@nvidia.com> >> --- >> include/hw/vfio/vfio-common.h | 3 ++ >> hw/vfio/common.c | 86 +++++++++++++++++++++++++++++++++-- >> 2 files changed, 86 insertions(+), 3 deletions(-) >> >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index 88c2194fb9..d54000d7ae 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -23,6 +23,7 @@ >> >> #include "exec/memory.h" >> #include "qemu/queue.h" >> +#include "qemu/iova-tree.h" >> #include "qemu/notify.h" >> #include "ui/console.h" >> #include "hw/display/ramfb.h" >> @@ -94,6 +95,8 @@ typedef struct VFIOContainer { >> uint64_t max_dirty_bitmap_size; >> unsigned long pgsizes; >> unsigned int dma_max_mappings; >> + IOVATree *mappings; >> + QemuMutex mappings_mutex; >> QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; >> QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; >> QLIST_HEAD(, VFIOGroup) group_list; >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index e554573eb5..fafc361cea 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -43,6 +43,7 @@ >> #include "migration/misc.h" >> #include "migration/qemu-file.h" >> #include "sysemu/tpm.h" >> +#include "qemu/iova-tree.h" >> >> VFIOGroupList vfio_group_list = >> QLIST_HEAD_INITIALIZER(vfio_group_list); >> @@ -373,6 +374,11 @@ bool vfio_mig_active(void) >> return true; >> } >> >> +static bool vfio_have_giommu(VFIOContainer *container) >> +{ >> + return !QLIST_EMPTY(&container->giommu_list); >> +} >> + >> static void vfio_set_migration_error(int err) >> { >> MigrationState *ms = migrate_get_current(); >> @@ -450,6 +456,51 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) >> return true; >> } >> >> +static int vfio_record_mapping(VFIOContainer *container, hwaddr iova, >> + hwaddr size, bool readonly) >> +{ >> + DMAMap map = { >> + .iova = iova, >> + .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */ > <facepalm> I am not sure what's the error you are referring here. Is it because DMAMap.size is not actually the size? Something else? Thanks.
On Sun, 12 Feb 2023 17:40:06 +0200 Avihai Horon <avihaih@nvidia.com> wrote: > On 27/01/2023 23:42, Alex Williamson wrote: > > External email: Use caution opening links or attachments > > > > > > On Thu, 26 Jan 2023 20:49:38 +0200 > > Avihai Horon <avihaih@nvidia.com> wrote: > > > >> From: Joao Martins <joao.m.martins@oracle.com> > >> > >> According to the device DMA logging uAPI, IOVA ranges to be logged by > >> the device must be provided all at once upon DMA logging start. > >> > >> As preparation for the following patches which will add device dirty > >> page tracking, keep a record of all DMA mapped IOVA ranges so later they > >> can be used for DMA logging start. > >> > >> Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked. > >> This is due to the dynamic nature of vIOMMU DMA mapping/unmapping. > >> Following patches will address the vIOMMU case specifically. > >> > >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > >> Signed-off-by: Avihai Horon <avihaih@nvidia.com> > >> --- > >> include/hw/vfio/vfio-common.h | 3 ++ > >> hw/vfio/common.c | 86 +++++++++++++++++++++++++++++++++-- > >> 2 files changed, 86 insertions(+), 3 deletions(-) > >> > >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > >> index 88c2194fb9..d54000d7ae 100644 > >> --- a/include/hw/vfio/vfio-common.h > >> +++ b/include/hw/vfio/vfio-common.h > >> @@ -23,6 +23,7 @@ > >> > >> #include "exec/memory.h" > >> #include "qemu/queue.h" > >> +#include "qemu/iova-tree.h" > >> #include "qemu/notify.h" > >> #include "ui/console.h" > >> #include "hw/display/ramfb.h" > >> @@ -94,6 +95,8 @@ typedef struct VFIOContainer { > >> uint64_t max_dirty_bitmap_size; > >> unsigned long pgsizes; > >> unsigned int dma_max_mappings; > >> + IOVATree *mappings; > >> + QemuMutex mappings_mutex; > >> QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; > >> QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; > >> QLIST_HEAD(, VFIOGroup) group_list; > >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c > >> index e554573eb5..fafc361cea 100644 > >> --- a/hw/vfio/common.c > >> +++ b/hw/vfio/common.c > >> @@ -43,6 +43,7 @@ > >> #include "migration/misc.h" > >> #include "migration/qemu-file.h" > >> #include "sysemu/tpm.h" > >> +#include "qemu/iova-tree.h" > >> > >> VFIOGroupList vfio_group_list = > >> QLIST_HEAD_INITIALIZER(vfio_group_list); > >> @@ -373,6 +374,11 @@ bool vfio_mig_active(void) > >> return true; > >> } > >> > >> +static bool vfio_have_giommu(VFIOContainer *container) > >> +{ > >> + return !QLIST_EMPTY(&container->giommu_list); > >> +} > >> + > >> static void vfio_set_migration_error(int err) > >> { > >> MigrationState *ms = migrate_get_current(); > >> @@ -450,6 +456,51 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) > >> return true; > >> } > >> > >> +static int vfio_record_mapping(VFIOContainer *container, hwaddr iova, > >> + hwaddr size, bool readonly) > >> +{ > >> + DMAMap map = { > >> + .iova = iova, > >> + .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */ > > <facepalm> > > I am not sure what's the error you are referring here. > Is it because DMAMap.size is not actually the size? > Something else? Sorry, I'm just lamenting what a terrible interface IOVATree provides with this inclusive range nonsense. Thanks, Alex
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 88c2194fb9..d54000d7ae 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -23,6 +23,7 @@ #include "exec/memory.h" #include "qemu/queue.h" +#include "qemu/iova-tree.h" #include "qemu/notify.h" #include "ui/console.h" #include "hw/display/ramfb.h" @@ -94,6 +95,8 @@ typedef struct VFIOContainer { uint64_t max_dirty_bitmap_size; unsigned long pgsizes; unsigned int dma_max_mappings; + IOVATree *mappings; + QemuMutex mappings_mutex; QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; QLIST_HEAD(, VFIOGroup) group_list; diff --git a/hw/vfio/common.c b/hw/vfio/common.c index e554573eb5..fafc361cea 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -43,6 +43,7 @@ #include "migration/misc.h" #include "migration/qemu-file.h" #include "sysemu/tpm.h" +#include "qemu/iova-tree.h" VFIOGroupList vfio_group_list = QLIST_HEAD_INITIALIZER(vfio_group_list); @@ -373,6 +374,11 @@ bool vfio_mig_active(void) return true; } +static bool vfio_have_giommu(VFIOContainer *container) +{ + return !QLIST_EMPTY(&container->giommu_list); +} + static void vfio_set_migration_error(int err) { MigrationState *ms = migrate_get_current(); @@ -450,6 +456,51 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) return true; } +static int vfio_record_mapping(VFIOContainer *container, hwaddr iova, + hwaddr size, bool readonly) +{ + DMAMap map = { + .iova = iova, + .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */ + .perm = readonly ? IOMMU_RO : IOMMU_RW, + }; + int ret; + + if (vfio_have_giommu(container)) { + return 0; + } + + WITH_QEMU_LOCK_GUARD(&container->mappings_mutex) { + ret = iova_tree_insert(container->mappings, &map); + if (ret) { + if (ret == IOVA_ERR_INVALID) { + ret = -EINVAL; + } else if (ret == IOVA_ERR_OVERLAP) { + ret = -EEXIST; + } + } + } + + return ret; +} + +static void vfio_erase_mapping(VFIOContainer *container, hwaddr iova, + hwaddr size) +{ + DMAMap map = { + .iova = iova, + .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size */ + }; + + if (vfio_have_giommu(container)) { + return; + } + + WITH_QEMU_LOCK_GUARD(&container->mappings_mutex) { + iova_tree_remove(container->mappings, map); + } +} + static int vfio_dma_unmap_bitmap(VFIOContainer *container, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb) @@ -550,6 +601,8 @@ static int vfio_dma_unmap(VFIOContainer *container, DIRTY_CLIENTS_NOCODE); } + vfio_erase_mapping(container, iova, size); + return 0; } @@ -563,6 +616,16 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, .iova = iova, .size = size, }; + int ret; + + ret = vfio_record_mapping(container, iova, size, readonly); + if (ret) { + error_report("vfio: Failed to record mapping, iova: 0x%" HWADDR_PRIx + ", size: 0x" RAM_ADDR_FMT ", ret: %d (%s)", + iova, size, ret, strerror(-ret)); + + return ret; + } if (!readonly) { map.flags |= VFIO_DMA_MAP_FLAG_WRITE; @@ -579,8 +642,12 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, return 0; } + ret = -errno; error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); - return -errno; + + vfio_erase_mapping(container, iova, size); + + return ret; } static void vfio_host_win_add(VFIOContainer *container, @@ -2134,16 +2201,23 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, QLIST_INIT(&container->giommu_list); QLIST_INIT(&container->hostwin_list); QLIST_INIT(&container->vrdl_list); + container->mappings = iova_tree_new(); + if (!container->mappings) { + error_setg(errp, "Cannot allocate DMA mappings tree"); + ret = -ENOMEM; + goto free_container_exit; + } + qemu_mutex_init(&container->mappings_mutex); ret = vfio_init_container(container, group->fd, errp); if (ret) { - goto free_container_exit; + goto destroy_mappings_exit; } ret = vfio_ram_block_discard_disable(container, true); if (ret) { error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - goto free_container_exit; + goto destroy_mappings_exit; } switch (container->iommu_type) { @@ -2279,6 +2353,10 @@ listener_release_exit: enable_discards_exit: vfio_ram_block_discard_disable(container, false); +destroy_mappings_exit: + qemu_mutex_destroy(&container->mappings_mutex); + iova_tree_destroy(container->mappings); + free_container_exit: g_free(container); @@ -2333,6 +2411,8 @@ static void vfio_disconnect_container(VFIOGroup *group) } trace_vfio_disconnect_container(container->fd); + qemu_mutex_destroy(&container->mappings_mutex); + iova_tree_destroy(container->mappings); close(container->fd); g_free(container);