Message ID | 1573578324-8389-15-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add migration support for VFIO devices | expand |
On Wed, Nov 13, 2019 at 01:05:23AM +0800, Kirti Wankhede wrote: > With vIOMMU, IO virtual address range can get unmapped while in pre-copy phase > of migration. In that case, unmap ioctl should return pages pinned in that range > and QEMU should find its correcponding guest physical addresses and report > those dirty. > > Note: This patch is not yet tested. I'm trying to see how I can test this code > path. > > Suggested-by: Alex Williamson <alex.williamson@redhat.com> > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/common.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 61 insertions(+), 4 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 66f1c64bf074..dc5768219d44 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -311,11 +311,30 @@ static bool vfio_devices_are_stopped_and_saving(void) > return true; > } > > +static bool vfio_devices_are_running_and_saving(void) > +{ > + VFIOGroup *group; > + VFIODevice *vbasedev; > + > + QLIST_FOREACH(group, &vfio_group_list, next) { > + QLIST_FOREACH(vbasedev, &group->device_list, next) { > + if ((vbasedev->device_state & VFIO_DEVICE_STATE_SAVING) && > + (vbasedev->device_state & VFIO_DEVICE_STATE_RUNNING)) { > + continue; > + } else { > + return false; > + } > + } > + } > + return true; > +} > + > /* > * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 > */ > static int vfio_dma_unmap(VFIOContainer *container, > - hwaddr iova, ram_addr_t size) > + hwaddr iova, ram_addr_t size, > + VFIOGuestIOMMU *giommu) > { > struct vfio_iommu_type1_dma_unmap unmap = { > .argsz = sizeof(unmap), > @@ -324,6 +343,44 @@ static int vfio_dma_unmap(VFIOContainer *container, > .size = size, > }; > > + if (giommu && vfio_devices_are_running_and_saving()) { > + int ret; > + uint64_t bitmap_size; > + struct vfio_iommu_type1_dma_unmap_bitmap unmap_bitmap = { > + .argsz = sizeof(unmap_bitmap), > + .flags = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP, > + .iova = iova, > + .size = size, > + }; > + > + bitmap_size = BITS_TO_LONGS(size >> TARGET_PAGE_BITS) * > + sizeof(uint64_t); > + > + unmap_bitmap.bitmap = g_try_malloc0(bitmap_size); > + if (!unmap_bitmap.bitmap) { > + error_report("%s: Error allocating bitmap buffer of size 0x%lx", > + __func__, bitmap_size); > + return -ENOMEM; > + } > + > + unmap_bitmap.bitmap_size = bitmap_size; > + > + ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA_GET_BITMAP, > + &unmap_bitmap); Once reaching vfio_dma_unmap, the IOVAs being unmapped will be failed to get translated in viommu for shadow page tables are updated already. so except for iotlbs have been generated and iotlb inalidation is delayed until after unmap notification, IOVA to GPA translation would fail. > + > + if (!ret) { > + cpu_physical_memory_set_dirty_lebitmap( > + (uint64_t *)unmap_bitmap.bitmap, > + giommu->iommu_offset + giommu->n.start, > + bitmap_size >> TARGET_PAGE_BITS); also, why here IOVAs can be used directly? Thanks Yan > + } else { > + error_report("VFIO_IOMMU_GET_DIRTY_BITMAP: %d %d", ret, errno); > + } > + > + g_free(unmap_bitmap.bitmap); > + return ret; > + } > + > while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { > /* > * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c > @@ -371,7 +428,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, > * the VGA ROM space. > */ > if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || > - (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && > + (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && > ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { > return 0; > } > @@ -511,7 +568,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) > iotlb->addr_mask + 1, vaddr, ret); > } > } else { > - ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1); > + ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, giommu); > if (ret) { > error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " > "0x%"HWADDR_PRIx") = %d (%m)", > @@ -814,7 +871,7 @@ static void vfio_listener_region_del(MemoryListener *listener, > } > > if (try_unmap) { > - ret = vfio_dma_unmap(container, iova, int128_get64(llsize)); > + ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); > if (ret) { > error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " > "0x%"HWADDR_PRIx") = %d (%m)", > -- > 2.7.0 >
On Tue, 12 Nov 2019 22:35:23 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > With vIOMMU, IO virtual address range can get unmapped while in pre-copy phase > of migration. In that case, unmap ioctl should return pages pinned in that range > and QEMU should find its correcponding guest physical addresses and report corresponding > those dirty. > > Note: This patch is not yet tested. I'm trying to see how I can test this code > path. > > Suggested-by: Alex Williamson <alex.williamson@redhat.com> > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/common.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 61 insertions(+), 4 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 66f1c64bf074..dc5768219d44 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -311,11 +311,30 @@ static bool vfio_devices_are_stopped_and_saving(void) > return true; > } > > +static bool vfio_devices_are_running_and_saving(void) > +{ > + VFIOGroup *group; > + VFIODevice *vbasedev; > + > + QLIST_FOREACH(group, &vfio_group_list, next) { > + QLIST_FOREACH(vbasedev, &group->device_list, next) { > + if ((vbasedev->device_state & VFIO_DEVICE_STATE_SAVING) && > + (vbasedev->device_state & VFIO_DEVICE_STATE_RUNNING)) { > + continue; > + } else { > + return false; > + } > + } > + } > + return true; > +} Suggests to generalize the other function to allow the caller to provide the mask and value to test for. > + > /* > * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 > */ > static int vfio_dma_unmap(VFIOContainer *container, > - hwaddr iova, ram_addr_t size) > + hwaddr iova, ram_addr_t size, > + VFIOGuestIOMMU *giommu) > { > struct vfio_iommu_type1_dma_unmap unmap = { > .argsz = sizeof(unmap), > @@ -324,6 +343,44 @@ static int vfio_dma_unmap(VFIOContainer *container, > .size = size, > }; > > + if (giommu && vfio_devices_are_running_and_saving()) { > + int ret; > + uint64_t bitmap_size; > + struct vfio_iommu_type1_dma_unmap_bitmap unmap_bitmap = { > + .argsz = sizeof(unmap_bitmap), > + .flags = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP, > + .iova = iova, > + .size = size, > + }; > + > + bitmap_size = BITS_TO_LONGS(size >> TARGET_PAGE_BITS) * > + sizeof(uint64_t); > + > + unmap_bitmap.bitmap = g_try_malloc0(bitmap_size); > + if (!unmap_bitmap.bitmap) { > + error_report("%s: Error allocating bitmap buffer of size 0x%lx", > + __func__, bitmap_size); > + return -ENOMEM; > + } > + > + unmap_bitmap.bitmap_size = bitmap_size; > + > + ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA_GET_BITMAP, > + &unmap_bitmap); > + > + if (!ret) { > + cpu_physical_memory_set_dirty_lebitmap( > + (uint64_t *)unmap_bitmap.bitmap, > + giommu->iommu_offset + giommu->n.start, > + bitmap_size >> TARGET_PAGE_BITS); +1 Yan's comments. > + } else { > + error_report("VFIO_IOMMU_GET_DIRTY_BITMAP: %d %d", ret, errno); > + } > + > + g_free(unmap_bitmap.bitmap); > + return ret; > + } > + > while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { > /* > * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c > @@ -371,7 +428,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, > * the VGA ROM space. > */ > if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || > - (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && > + (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && > ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { > return 0; > } > @@ -511,7 +568,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) > iotlb->addr_mask + 1, vaddr, ret); > } > } else { > - ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1); > + ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, giommu); > if (ret) { > error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " > "0x%"HWADDR_PRIx") = %d (%m)", > @@ -814,7 +871,7 @@ static void vfio_listener_region_del(MemoryListener *listener, > } > > if (try_unmap) { > - ret = vfio_dma_unmap(container, iova, int128_get64(llsize)); > + ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); > if (ret) { > error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " > "0x%"HWADDR_PRIx") = %d (%m)",
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 66f1c64bf074..dc5768219d44 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -311,11 +311,30 @@ static bool vfio_devices_are_stopped_and_saving(void) return true; } +static bool vfio_devices_are_running_and_saving(void) +{ + VFIOGroup *group; + VFIODevice *vbasedev; + + QLIST_FOREACH(group, &vfio_group_list, next) { + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if ((vbasedev->device_state & VFIO_DEVICE_STATE_SAVING) && + (vbasedev->device_state & VFIO_DEVICE_STATE_RUNNING)) { + continue; + } else { + return false; + } + } + } + return true; +} + /* * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 */ static int vfio_dma_unmap(VFIOContainer *container, - hwaddr iova, ram_addr_t size) + hwaddr iova, ram_addr_t size, + VFIOGuestIOMMU *giommu) { struct vfio_iommu_type1_dma_unmap unmap = { .argsz = sizeof(unmap), @@ -324,6 +343,44 @@ static int vfio_dma_unmap(VFIOContainer *container, .size = size, }; + if (giommu && vfio_devices_are_running_and_saving()) { + int ret; + uint64_t bitmap_size; + struct vfio_iommu_type1_dma_unmap_bitmap unmap_bitmap = { + .argsz = sizeof(unmap_bitmap), + .flags = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP, + .iova = iova, + .size = size, + }; + + bitmap_size = BITS_TO_LONGS(size >> TARGET_PAGE_BITS) * + sizeof(uint64_t); + + unmap_bitmap.bitmap = g_try_malloc0(bitmap_size); + if (!unmap_bitmap.bitmap) { + error_report("%s: Error allocating bitmap buffer of size 0x%lx", + __func__, bitmap_size); + return -ENOMEM; + } + + unmap_bitmap.bitmap_size = bitmap_size; + + ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA_GET_BITMAP, + &unmap_bitmap); + + if (!ret) { + cpu_physical_memory_set_dirty_lebitmap( + (uint64_t *)unmap_bitmap.bitmap, + giommu->iommu_offset + giommu->n.start, + bitmap_size >> TARGET_PAGE_BITS); + } else { + error_report("VFIO_IOMMU_GET_DIRTY_BITMAP: %d %d", ret, errno); + } + + g_free(unmap_bitmap.bitmap); + return ret; + } + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { /* * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c @@ -371,7 +428,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, * the VGA ROM space. */ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || - (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && + (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { return 0; } @@ -511,7 +568,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) iotlb->addr_mask + 1, vaddr, ret); } } else { - ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1); + ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, giommu); if (ret) { error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx") = %d (%m)", @@ -814,7 +871,7 @@ static void vfio_listener_region_del(MemoryListener *listener, } if (try_unmap) { - ret = vfio_dma_unmap(container, iova, int128_get64(llsize)); + ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); if (ret) { error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx") = %d (%m)",