Message ID | 20230222174915.5647-8-avihaih@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio: Add migration pre-copy support and device dirty tracking | expand |
On Wed, 22 Feb 2023 19:49:02 +0200 Avihai Horon <avihaih@nvidia.com> wrote: > There are already two places where dirty page bitmap allocation and > calculations are done in open code. With device dirty page tracking > being added in next patches, there are going to be even more places. > > To avoid code duplication, introduce VFIOBitmap struct and corresponding > alloc and dealloc functions and use them where applicable. > > Signed-off-by: Avihai Horon <avihaih@nvidia.com> > --- > hw/vfio/common.c | 89 ++++++++++++++++++++++++++++++++---------------- > 1 file changed, 60 insertions(+), 29 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index ac93b85632..84f08bdbbb 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -320,6 +320,41 @@ const MemoryRegionOps vfio_region_ops = { > * Device state interfaces > */ > > +typedef struct { > + unsigned long *bitmap; > + hwaddr size; > + hwaddr pages; > +} VFIOBitmap; > + > +static VFIOBitmap *vfio_bitmap_alloc(hwaddr size) > +{ > + VFIOBitmap *vbmap = g_try_new0(VFIOBitmap, 1); > + if (!vbmap) { > + errno = ENOMEM; > + > + return NULL; > + } > + > + vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); > + vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) / > + BITS_PER_BYTE; > + vbmap->bitmap = g_try_malloc0(vbmap->size); > + if (!vbmap->bitmap) { > + g_free(vbmap); > + errno = ENOMEM; > + > + return NULL; > + } > + > + return vbmap; > +} > + > +static void vfio_bitmap_dealloc(VFIOBitmap *vbmap) > +{ > + g_free(vbmap->bitmap); > + g_free(vbmap); > +} Nit, '_alloc' and '_free' seems like a more standard convention. Thanks, Alex
On 22/02/2023 23:40, Alex Williamson wrote: > External email: Use caution opening links or attachments > > > On Wed, 22 Feb 2023 19:49:02 +0200 > Avihai Horon <avihaih@nvidia.com> wrote: > >> There are already two places where dirty page bitmap allocation and >> calculations are done in open code. With device dirty page tracking >> being added in next patches, there are going to be even more places. >> >> To avoid code duplication, introduce VFIOBitmap struct and corresponding >> alloc and dealloc functions and use them where applicable. >> >> Signed-off-by: Avihai Horon <avihaih@nvidia.com> >> --- >> hw/vfio/common.c | 89 ++++++++++++++++++++++++++++++++---------------- >> 1 file changed, 60 insertions(+), 29 deletions(-) >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index ac93b85632..84f08bdbbb 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -320,6 +320,41 @@ const MemoryRegionOps vfio_region_ops = { >> * Device state interfaces >> */ >> >> +typedef struct { >> + unsigned long *bitmap; >> + hwaddr size; >> + hwaddr pages; >> +} VFIOBitmap; >> + >> +static VFIOBitmap *vfio_bitmap_alloc(hwaddr size) >> +{ >> + VFIOBitmap *vbmap = g_try_new0(VFIOBitmap, 1); >> + if (!vbmap) { >> + errno = ENOMEM; >> + >> + return NULL; >> + } >> + >> + vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); >> + vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) / >> + BITS_PER_BYTE; >> + vbmap->bitmap = g_try_malloc0(vbmap->size); >> + if (!vbmap->bitmap) { >> + g_free(vbmap); >> + errno = ENOMEM; >> + >> + return NULL; >> + } >> + >> + return vbmap; >> +} >> + >> +static void vfio_bitmap_dealloc(VFIOBitmap *vbmap) >> +{ >> + g_free(vbmap->bitmap); >> + g_free(vbmap); >> +} > Nit, '_alloc' and '_free' seems like a more standard convention. Sure, will change. Thanks.
On 2/22/23 18:49, Avihai Horon wrote: > There are already two places where dirty page bitmap allocation and > calculations are done in open code. With device dirty page tracking > being added in next patches, there are going to be even more places. > > To avoid code duplication, introduce VFIOBitmap struct and corresponding > alloc and dealloc functions and use them where applicable. > > Signed-off-by: Avihai Horon <avihaih@nvidia.com> > --- > hw/vfio/common.c | 89 ++++++++++++++++++++++++++++++++---------------- > 1 file changed, 60 insertions(+), 29 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index ac93b85632..84f08bdbbb 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -320,6 +320,41 @@ const MemoryRegionOps vfio_region_ops = { > * Device state interfaces > */ > > +typedef struct { > + unsigned long *bitmap; > + hwaddr size; > + hwaddr pages; > +} VFIOBitmap; > + > +static VFIOBitmap *vfio_bitmap_alloc(hwaddr size) > +{ > + VFIOBitmap *vbmap = g_try_new0(VFIOBitmap, 1); I think using g_malloc0() for the VFIOBitmap should be fine. If QEMU can not allocate a couple of bytes, we are in trouble anyway. Thanks, C. > + if (!vbmap) { > + errno = ENOMEM; > + > + return NULL; > + } > + > + vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); > + vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) / > + BITS_PER_BYTE; > + vbmap->bitmap = g_try_malloc0(vbmap->size); > + if (!vbmap->bitmap) { > + g_free(vbmap); > + errno = ENOMEM; > + > + return NULL; > + } > + > + return vbmap; > +} > + > +static void vfio_bitmap_dealloc(VFIOBitmap *vbmap) > +{ > + g_free(vbmap->bitmap); > + g_free(vbmap); > +} > + > bool vfio_mig_active(void) > { > VFIOGroup *group; > @@ -470,9 +505,14 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, > { > struct vfio_iommu_type1_dma_unmap *unmap; > struct vfio_bitmap *bitmap; > - uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); > + VFIOBitmap *vbmap; > int ret; > > + vbmap = vfio_bitmap_alloc(size); > + if (!vbmap) { > + return -errno; > + } > + > unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); > > unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); > @@ -486,35 +526,28 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, > * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize > * to qemu_real_host_page_size. > */ > - > bitmap->pgsize = qemu_real_host_page_size(); > - bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / > - BITS_PER_BYTE; > + bitmap->size = vbmap->size; > + bitmap->data = (__u64 *)vbmap->bitmap; > > - if (bitmap->size > container->max_dirty_bitmap_size) { > - error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, > - (uint64_t)bitmap->size); > + if (vbmap->size > container->max_dirty_bitmap_size) { > + error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap->size); > ret = -E2BIG; > goto unmap_exit; > } > > - bitmap->data = g_try_malloc0(bitmap->size); > - if (!bitmap->data) { > - ret = -ENOMEM; > - goto unmap_exit; > - } > - > ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); > if (!ret) { > - cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, > - iotlb->translated_addr, pages); > + cpu_physical_memory_set_dirty_lebitmap(vbmap->bitmap, > + iotlb->translated_addr, vbmap->pages); > } else { > error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); > } > > - g_free(bitmap->data); > unmap_exit: > g_free(unmap); > + vfio_bitmap_dealloc(vbmap); > + > return ret; > } > > @@ -1331,7 +1364,7 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, > { > struct vfio_iommu_type1_dirty_bitmap *dbitmap; > struct vfio_iommu_type1_dirty_bitmap_get *range; > - uint64_t pages; > + VFIOBitmap *vbmap; > int ret; > > if (!container->dirty_pages_supported) { > @@ -1341,6 +1374,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, > return 0; > } > > + vbmap = vfio_bitmap_alloc(size); > + if (!vbmap) { > + return -errno; > + } > + > dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); > > dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); > @@ -1355,15 +1393,8 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, > * to qemu_real_host_page_size. > */ > range->bitmap.pgsize = qemu_real_host_page_size(); > - > - pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size(); > - range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / > - BITS_PER_BYTE; > - range->bitmap.data = g_try_malloc0(range->bitmap.size); > - if (!range->bitmap.data) { > - ret = -ENOMEM; > - goto err_out; > - } > + range->bitmap.size = vbmap->size; > + range->bitmap.data = (__u64 *)vbmap->bitmap; > > ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); > if (ret) { > @@ -1374,14 +1405,14 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, > goto err_out; > } > > - cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data, > - ram_addr, pages); > + cpu_physical_memory_set_dirty_lebitmap(vbmap->bitmap, ram_addr, > + vbmap->pages); > > trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size, > range->bitmap.size, ram_addr); > err_out: > - g_free(range->bitmap.data); > g_free(dbitmap); > + vfio_bitmap_dealloc(vbmap); > > return ret; > }
On 27/02/2023 16:09, Cédric Le Goater wrote: > External email: Use caution opening links or attachments > > > On 2/22/23 18:49, Avihai Horon wrote: >> There are already two places where dirty page bitmap allocation and >> calculations are done in open code. With device dirty page tracking >> being added in next patches, there are going to be even more places. >> >> To avoid code duplication, introduce VFIOBitmap struct and corresponding >> alloc and dealloc functions and use them where applicable. >> >> Signed-off-by: Avihai Horon <avihaih@nvidia.com> >> --- >> hw/vfio/common.c | 89 ++++++++++++++++++++++++++++++++---------------- >> 1 file changed, 60 insertions(+), 29 deletions(-) >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index ac93b85632..84f08bdbbb 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -320,6 +320,41 @@ const MemoryRegionOps vfio_region_ops = { >> * Device state interfaces >> */ >> >> +typedef struct { >> + unsigned long *bitmap; >> + hwaddr size; >> + hwaddr pages; >> +} VFIOBitmap; >> + >> +static VFIOBitmap *vfio_bitmap_alloc(hwaddr size) >> +{ >> + VFIOBitmap *vbmap = g_try_new0(VFIOBitmap, 1); > > I think using g_malloc0() for the VFIOBitmap should be fine. If QEMU can > not allocate a couple of bytes, we are in trouble anyway. > Sure, this will simplify the code a bit. I will change it. Thanks. > > >> + if (!vbmap) { >> + errno = ENOMEM; >> + >> + return NULL; >> + } >> + >> + vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / >> qemu_real_host_page_size(); >> + vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * >> BITS_PER_BYTE) / >> + BITS_PER_BYTE; >> + vbmap->bitmap = g_try_malloc0(vbmap->size); >> + if (!vbmap->bitmap) { >> + g_free(vbmap); >> + errno = ENOMEM; >> + >> + return NULL; >> + } >> + >> + return vbmap; >> +} >> + >> +static void vfio_bitmap_dealloc(VFIOBitmap *vbmap) >> +{ >> + g_free(vbmap->bitmap); >> + g_free(vbmap); >> +} >> + >> bool vfio_mig_active(void) >> { >> VFIOGroup *group; >> @@ -470,9 +505,14 @@ static int vfio_dma_unmap_bitmap(VFIOContainer >> *container, >> { >> struct vfio_iommu_type1_dma_unmap *unmap; >> struct vfio_bitmap *bitmap; >> - uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / >> qemu_real_host_page_size(); >> + VFIOBitmap *vbmap; >> int ret; >> >> + vbmap = vfio_bitmap_alloc(size); >> + if (!vbmap) { >> + return -errno; >> + } >> + >> unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); >> >> unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); >> @@ -486,35 +526,28 @@ static int vfio_dma_unmap_bitmap(VFIOContainer >> *container, >> * qemu_real_host_page_size to mark those dirty. Hence set >> bitmap_pgsize >> * to qemu_real_host_page_size. >> */ >> - >> bitmap->pgsize = qemu_real_host_page_size(); >> - bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / >> - BITS_PER_BYTE; >> + bitmap->size = vbmap->size; >> + bitmap->data = (__u64 *)vbmap->bitmap; >> >> - if (bitmap->size > container->max_dirty_bitmap_size) { >> - error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, >> - (uint64_t)bitmap->size); >> + if (vbmap->size > container->max_dirty_bitmap_size) { >> + error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, >> vbmap->size); >> ret = -E2BIG; >> goto unmap_exit; >> } >> >> - bitmap->data = g_try_malloc0(bitmap->size); >> - if (!bitmap->data) { >> - ret = -ENOMEM; >> - goto unmap_exit; >> - } >> - >> ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); >> if (!ret) { >> - cpu_physical_memory_set_dirty_lebitmap((unsigned long >> *)bitmap->data, >> - iotlb->translated_addr, pages); >> + cpu_physical_memory_set_dirty_lebitmap(vbmap->bitmap, >> + iotlb->translated_addr, vbmap->pages); >> } else { >> error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); >> } >> >> - g_free(bitmap->data); >> unmap_exit: >> g_free(unmap); >> + vfio_bitmap_dealloc(vbmap); >> + >> return ret; >> } >> >> @@ -1331,7 +1364,7 @@ static int vfio_get_dirty_bitmap(VFIOContainer >> *container, uint64_t iova, >> { >> struct vfio_iommu_type1_dirty_bitmap *dbitmap; >> struct vfio_iommu_type1_dirty_bitmap_get *range; >> - uint64_t pages; >> + VFIOBitmap *vbmap; >> int ret; >> >> if (!container->dirty_pages_supported) { >> @@ -1341,6 +1374,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer >> *container, uint64_t iova, >> return 0; >> } >> >> + vbmap = vfio_bitmap_alloc(size); >> + if (!vbmap) { >> + return -errno; >> + } >> + >> dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); >> >> dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); >> @@ -1355,15 +1393,8 @@ static int vfio_get_dirty_bitmap(VFIOContainer >> *container, uint64_t iova, >> * to qemu_real_host_page_size. >> */ >> range->bitmap.pgsize = qemu_real_host_page_size(); >> - >> - pages = REAL_HOST_PAGE_ALIGN(range->size) / >> qemu_real_host_page_size(); >> - range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * >> BITS_PER_BYTE) / >> - BITS_PER_BYTE; >> - range->bitmap.data = g_try_malloc0(range->bitmap.size); >> - if (!range->bitmap.data) { >> - ret = -ENOMEM; >> - goto err_out; >> - } >> + range->bitmap.size = vbmap->size; >> + range->bitmap.data = (__u64 *)vbmap->bitmap; >> >> ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); >> if (ret) { >> @@ -1374,14 +1405,14 @@ static int >> vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, >> goto err_out; >> } >> >> - cpu_physical_memory_set_dirty_lebitmap((unsigned long >> *)range->bitmap.data, >> - ram_addr, pages); >> + cpu_physical_memory_set_dirty_lebitmap(vbmap->bitmap, ram_addr, >> + vbmap->pages); >> >> trace_vfio_get_dirty_bitmap(container->fd, range->iova, >> range->size, >> range->bitmap.size, ram_addr); >> err_out: >> - g_free(range->bitmap.data); >> g_free(dbitmap); >> + vfio_bitmap_dealloc(vbmap); >> >> return ret; >> } >
On 27/02/2023 14:09, Cédric Le Goater wrote: > On 2/22/23 18:49, Avihai Horon wrote: >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -320,6 +320,41 @@ const MemoryRegionOps vfio_region_ops = { >> * Device state interfaces >> */ >> +typedef struct { >> + unsigned long *bitmap; >> + hwaddr size; >> + hwaddr pages; >> +} VFIOBitmap; >> + >> +static VFIOBitmap *vfio_bitmap_alloc(hwaddr size) >> +{ >> + VFIOBitmap *vbmap = g_try_new0(VFIOBitmap, 1); > > I think using g_malloc0() for the VFIOBitmap should be fine. If QEMU can > not allocate a couple of bytes, we are in trouble anyway. > OOM situations are rather unpredictable, and switching to g_malloc0 means we will exit ungracefully in the middle of fetching dirty bitmaps. And this function (vfio_bitmap_alloc) overall will be allocating megabytes for terabyte guests. It would be ok if we are initializing, but this is at runtime when we do migration. I think we should stick with g_try_new0. exit on failure should be reserved to failure to switch the kernel migration state whereby we are likely to be dealing with a hardware failure and thus requires something more drastic. Joao
Hello Joao, On 3/2/23 14:24, Joao Martins wrote: > On 27/02/2023 14:09, Cédric Le Goater wrote: >> On 2/22/23 18:49, Avihai Horon wrote: >>> --- a/hw/vfio/common.c >>> +++ b/hw/vfio/common.c >>> @@ -320,6 +320,41 @@ const MemoryRegionOps vfio_region_ops = { >>> * Device state interfaces >>> */ >>> +typedef struct { >>> + unsigned long *bitmap; >>> + hwaddr size; >>> + hwaddr pages; >>> +} VFIOBitmap; >>> + >>> +static VFIOBitmap *vfio_bitmap_alloc(hwaddr size) >>> +{ >>> + VFIOBitmap *vbmap = g_try_new0(VFIOBitmap, 1); >> >> I think using g_malloc0() for the VFIOBitmap should be fine. If QEMU can >> not allocate a couple of bytes, we are in trouble anyway. >> > > OOM situations are rather unpredictable, and switching to g_malloc0 means we > will exit ungracefully in the middle of fetching dirty bitmaps. And this > function (vfio_bitmap_alloc) overall will be allocating megabytes for terabyte > guests. > > It would be ok if we are initializing, but this is at runtime when we do > migration. I think we should stick with g_try_new0. exit on failure should be > reserved to failure to switch the kernel migration state whereby we are likely > to be dealing with a hardware failure and thus requires something more drastic. I agree for large allocation : vbmap->bitmap = g_try_malloc0(vbmap->size); but not for the smaller ones, like VFIOBitmap. You would have to convert some other g_malloc0() calls, like the one allocating 'unmap' in vfio_dma_unmap_bitmap(), to be consistent. Given the size of VFIOBitmap, I think it could live on the stack in routine vfio_dma_unmap_bitmap() and routine vfio_get_dirty_bitmap() since the reference is not kept. The 'vbmap' attribute of vfio_giommu_dirty_notifier does not need to be a pointer either. vfio_bitmap_alloc(hwaddr size) could then become vfio_bitmap_init(VFIOBitmap *vbmap, hwaddr size). Anyhow, this is minor. It would simplify a bit the exit path and error handling. Thanks, C.
On 02/03/2023 14:52, Cédric Le Goater wrote: > Hello Joao, > On 3/2/23 14:24, Joao Martins wrote: >> On 27/02/2023 14:09, Cédric Le Goater wrote: >>> On 2/22/23 18:49, Avihai Horon wrote: >>>> --- a/hw/vfio/common.c >>>> +++ b/hw/vfio/common.c >>>> @@ -320,6 +320,41 @@ const MemoryRegionOps vfio_region_ops = { >>>> * Device state interfaces >>>> */ >>>> +typedef struct { >>>> + unsigned long *bitmap; >>>> + hwaddr size; >>>> + hwaddr pages; >>>> +} VFIOBitmap; >>>> + >>>> +static VFIOBitmap *vfio_bitmap_alloc(hwaddr size) >>>> +{ >>>> + VFIOBitmap *vbmap = g_try_new0(VFIOBitmap, 1); >>> >>> I think using g_malloc0() for the VFIOBitmap should be fine. If QEMU can >>> not allocate a couple of bytes, we are in trouble anyway. >>> >> >> OOM situations are rather unpredictable, and switching to g_malloc0 means we >> will exit ungracefully in the middle of fetching dirty bitmaps. And this >> function (vfio_bitmap_alloc) overall will be allocating megabytes for terabyte >> guests. >> >> It would be ok if we are initializing, but this is at runtime when we do >> migration. I think we should stick with g_try_new0. exit on failure should be >> reserved to failure to switch the kernel migration state whereby we are likely >> to be dealing with a hardware failure and thus requires something more drastic. > > I agree for large allocation : > > vbmap->bitmap = g_try_malloc0(vbmap->size); > > but not for the smaller ones, like VFIOBitmap. You would have to > convert some other g_malloc0() calls, like the one allocating 'unmap' > in vfio_dma_unmap_bitmap(), to be consistent. > > Given the size of VFIOBitmap, I think it could live on the stack in > routine vfio_dma_unmap_bitmap() and routine vfio_get_dirty_bitmap() > since the reference is not kept. > Both good points. Specially the g_malloc0 ones, though the dma unmap wouldn't be in use for a device that supports dirty tracking. But there's one where we add by mistake and that's the one vfio_device_feature_dma_logging_start_create(). It shouldn't be g_malloc0 there too. The rest, except dma_unmap and type1-iommu get_dirty_Bitmap functions, the others would argue that only happen in the initialization. > The 'vbmap' attribute of vfio_giommu_dirty_notifier does not need > to be a pointer either. > > vfio_bitmap_alloc(hwaddr size) could then become > vfio_bitmap_init(VFIOBitmap *vbmap, hwaddr size). > > Anyhow, this is minor. It would simplify a bit the exit path > and error handling. > By simplify presumably it's because vfio_bitmap_free() would be a single line and thus avoiding the new helper and instead we would just live with the vfio_bitmap_alloc(). I am at two minds with alloc vs init, considering we are still allocating the actual bitmap. Still lingering more over staying with alloc than init.
On 02/03/2023 14:52, Cédric Le Goater wrote: > Hello Joao, > > On 3/2/23 14:24, Joao Martins wrote: >> On 27/02/2023 14:09, Cédric Le Goater wrote: >>> On 2/22/23 18:49, Avihai Horon wrote: >>>> --- a/hw/vfio/common.c >>>> +++ b/hw/vfio/common.c >>>> @@ -320,6 +320,41 @@ const MemoryRegionOps vfio_region_ops = { >>>> * Device state interfaces >>>> */ >>>> +typedef struct { >>>> + unsigned long *bitmap; >>>> + hwaddr size; >>>> + hwaddr pages; >>>> +} VFIOBitmap; >>>> + >>>> +static VFIOBitmap *vfio_bitmap_alloc(hwaddr size) >>>> +{ >>>> + VFIOBitmap *vbmap = g_try_new0(VFIOBitmap, 1); >>> >>> I think using g_malloc0() for the VFIOBitmap should be fine. If QEMU can >>> not allocate a couple of bytes, we are in trouble anyway. >>> >> >> OOM situations are rather unpredictable, and switching to g_malloc0 means we >> will exit ungracefully in the middle of fetching dirty bitmaps. And this >> function (vfio_bitmap_alloc) overall will be allocating megabytes for terabyte >> guests. >> >> It would be ok if we are initializing, but this is at runtime when we do >> migration. I think we should stick with g_try_new0. exit on failure should be >> reserved to failure to switch the kernel migration state whereby we are likely >> to be dealing with a hardware failure and thus requires something more drastic. > > I agree for large allocation : > > vbmap->bitmap = g_try_malloc0(vbmap->size); > > but not for the smaller ones, like VFIOBitmap. You would have to > convert some other g_malloc0() calls, like the one allocating 'unmap' > in vfio_dma_unmap_bitmap(), to be consistent. > > Given the size of VFIOBitmap, I think it could live on the stack in > routine vfio_dma_unmap_bitmap() and routine vfio_get_dirty_bitmap() > since the reference is not kept. > > The 'vbmap' attribute of vfio_giommu_dirty_notifier does not need > to be a pointer either. > > vfio_bitmap_alloc(hwaddr size) could then become > vfio_bitmap_init(VFIOBitmap *vbmap, hwaddr size). > > Anyhow, this is minor. It would simplify a bit the exit path > and error handling. > FWIW, I've addressed this on v3, following your suggestion. Joao
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index ac93b85632..84f08bdbbb 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -320,6 +320,41 @@ const MemoryRegionOps vfio_region_ops = { * Device state interfaces */ +typedef struct { + unsigned long *bitmap; + hwaddr size; + hwaddr pages; +} VFIOBitmap; + +static VFIOBitmap *vfio_bitmap_alloc(hwaddr size) +{ + VFIOBitmap *vbmap = g_try_new0(VFIOBitmap, 1); + if (!vbmap) { + errno = ENOMEM; + + return NULL; + } + + vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); + vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) / + BITS_PER_BYTE; + vbmap->bitmap = g_try_malloc0(vbmap->size); + if (!vbmap->bitmap) { + g_free(vbmap); + errno = ENOMEM; + + return NULL; + } + + return vbmap; +} + +static void vfio_bitmap_dealloc(VFIOBitmap *vbmap) +{ + g_free(vbmap->bitmap); + g_free(vbmap); +} + bool vfio_mig_active(void) { VFIOGroup *group; @@ -470,9 +505,14 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, { struct vfio_iommu_type1_dma_unmap *unmap; struct vfio_bitmap *bitmap; - uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); + VFIOBitmap *vbmap; int ret; + vbmap = vfio_bitmap_alloc(size); + if (!vbmap) { + return -errno; + } + unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); @@ -486,35 +526,28 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize * to qemu_real_host_page_size. */ - bitmap->pgsize = qemu_real_host_page_size(); - bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / - BITS_PER_BYTE; + bitmap->size = vbmap->size; + bitmap->data = (__u64 *)vbmap->bitmap; - if (bitmap->size > container->max_dirty_bitmap_size) { - error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, - (uint64_t)bitmap->size); + if (vbmap->size > container->max_dirty_bitmap_size) { + error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap->size); ret = -E2BIG; goto unmap_exit; } - bitmap->data = g_try_malloc0(bitmap->size); - if (!bitmap->data) { - ret = -ENOMEM; - goto unmap_exit; - } - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); if (!ret) { - cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, - iotlb->translated_addr, pages); + cpu_physical_memory_set_dirty_lebitmap(vbmap->bitmap, + iotlb->translated_addr, vbmap->pages); } else { error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); } - g_free(bitmap->data); unmap_exit: g_free(unmap); + vfio_bitmap_dealloc(vbmap); + return ret; } @@ -1331,7 +1364,7 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, { struct vfio_iommu_type1_dirty_bitmap *dbitmap; struct vfio_iommu_type1_dirty_bitmap_get *range; - uint64_t pages; + VFIOBitmap *vbmap; int ret; if (!container->dirty_pages_supported) { @@ -1341,6 +1374,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, return 0; } + vbmap = vfio_bitmap_alloc(size); + if (!vbmap) { + return -errno; + } + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); @@ -1355,15 +1393,8 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, * to qemu_real_host_page_size. */ range->bitmap.pgsize = qemu_real_host_page_size(); - - pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size(); - range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / - BITS_PER_BYTE; - range->bitmap.data = g_try_malloc0(range->bitmap.size); - if (!range->bitmap.data) { - ret = -ENOMEM; - goto err_out; - } + range->bitmap.size = vbmap->size; + range->bitmap.data = (__u64 *)vbmap->bitmap; ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); if (ret) { @@ -1374,14 +1405,14 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, goto err_out; } - cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data, - ram_addr, pages); + cpu_physical_memory_set_dirty_lebitmap(vbmap->bitmap, ram_addr, + vbmap->pages); trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size, range->bitmap.size, ram_addr); err_out: - g_free(range->bitmap.data); g_free(dbitmap); + vfio_bitmap_dealloc(vbmap); return ret; }
There are already two places where dirty page bitmap allocation and calculations are done in open code. With device dirty page tracking being added in next patches, there are going to be even more places. To avoid code duplication, introduce VFIOBitmap struct and corresponding alloc and dealloc functions and use them where applicable. Signed-off-by: Avihai Horon <avihaih@nvidia.com> --- hw/vfio/common.c | 89 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 29 deletions(-)