diff mbox series

[v2,03/12] vfio: Collect container iova range info

Message ID 20230913080423.523953-4-eric.auger@redhat.com (mailing list archive)
State New, archived
Headers show
Series VIRTIO-IOMMU/VFIO: Don't assume 64b IOVA space | expand

Commit Message

Eric Auger Sept. 13, 2023, 8:01 a.m. UTC
Collect iova range information if VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
capability is supported.

This allows to propagate the information though the IOMMU MR
set_iova_ranges() callback so that virtual IOMMUs
get aware of those aperture constraints.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 include/hw/vfio/vfio-common.h |  2 ++
 hw/vfio/common.c              | 45 +++++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

Comments

Cédric Le Goater Sept. 13, 2023, 12:55 p.m. UTC | #1
On 9/13/23 10:01, Eric Auger wrote:
> Collect iova range information if VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
> capability is supported.
> 
> This allows to propagate the information though the IOMMU MR
> set_iova_ranges() callback so that virtual IOMMUs
> get aware of those aperture constraints.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> ---
>   include/hw/vfio/vfio-common.h |  2 ++
>   hw/vfio/common.c              | 45 +++++++++++++++++++++++++++++++++--
>   2 files changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index da43d27352..74b9b27270 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -98,6 +98,8 @@ typedef struct VFIOContainer {
>       QLIST_HEAD(, VFIOGroup) group_list;
>       QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
>       QLIST_ENTRY(VFIOContainer) next;
> +    unsigned nr_iovas;
> +    struct  vfio_iova_range *iova_ranges;
>   } VFIOContainer;
>   
>   typedef struct VFIOGuestIOMMU {
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 9aac21abb7..26da38de05 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1157,6 +1157,14 @@ static void vfio_listener_region_add(MemoryListener *listener,
>               goto fail;
>           }
>   
> +        ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
> +                container->nr_iovas, (struct Range *)container->iova_ranges,
> +                &err);
> +        if (ret) {
> +            g_free(giommu);
> +            goto fail;
> +        }
> +
>           ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
>                                                       &err);
>           if (ret) {
> @@ -1981,6 +1989,29 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>       return true;
>   }
>   
> +static void vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
> +                                     unsigned int *nr_iovas,
> +                                     struct  vfio_iova_range **iova_ranges)

I guess there is no point in returning an error since we can not
assign default values.

> +{
> +    struct vfio_info_cap_header *hdr;
> +    struct vfio_iommu_type1_info_cap_iova_range *cap;
> +
> +    hdr = vfio_get_iommu_type1_info_cap(info,
> +                                        VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> +    if (hdr == NULL) {

May be :

     if (!hdr) {


> +        return;
> +    }
> +
> +    cap = (void *)hdr;
> +    *nr_iovas = cap->nr_iovas;
> +

I would add a trace event with the #iovas.

Thanks,

C.


> +    if (*nr_iovas == 0) {
> +        return;
> +    }
> +    *iova_ranges = g_memdup2(cap->iova_ranges,
> +                             *nr_iovas * sizeof(struct  vfio_iova_range));
> +}
> +
>   static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
>                                             struct vfio_region_info *info)
>   {
> @@ -2433,6 +2464,12 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
>       }
>   }
>   
> +static void vfio_free_container(VFIOContainer *container)
> +{
> +    g_free(container->iova_ranges);
> +    g_free(container);
> +}
> +
>   static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>                                     Error **errp)
>   {
> @@ -2550,6 +2587,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>           if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
>               container->dma_max_mappings = 65535;
>           }
> +
> +        vfio_get_info_iova_range(info, &container->nr_iovas,
> +                                 &container->iova_ranges);
> +
>           vfio_get_iommu_info_migration(container, info);
>           g_free(info);
>   
> @@ -2663,7 +2704,7 @@ enable_discards_exit:
>       vfio_ram_block_discard_disable(container, false);
>   
>   free_container_exit:
> -    g_free(container);
> +    vfio_free_container(container);
>   
>   close_fd_exit:
>       close(fd);
> @@ -2717,7 +2758,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
>   
>           trace_vfio_disconnect_container(container->fd);
>           close(container->fd);
> -        g_free(container);
> +        vfio_free_container(container);
>   
>           vfio_put_address_space(space);
>       }
Alex Williamson Sept. 19, 2023, 3:44 p.m. UTC | #2
On Wed, 13 Sep 2023 10:01:38 +0200
Eric Auger <eric.auger@redhat.com> wrote:

> Collect iova range information if VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
> capability is supported.
> 
> This allows to propagate the information though the IOMMU MR
> set_iova_ranges() callback so that virtual IOMMUs
> get aware of those aperture constraints.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> ---
>  include/hw/vfio/vfio-common.h |  2 ++
>  hw/vfio/common.c              | 45 +++++++++++++++++++++++++++++++++--
>  2 files changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index da43d27352..74b9b27270 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -98,6 +98,8 @@ typedef struct VFIOContainer {
>      QLIST_HEAD(, VFIOGroup) group_list;
>      QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
>      QLIST_ENTRY(VFIOContainer) next;
> +    unsigned nr_iovas;
> +    struct  vfio_iova_range *iova_ranges;
>  } VFIOContainer;
>  
>  typedef struct VFIOGuestIOMMU {
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 9aac21abb7..26da38de05 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1157,6 +1157,14 @@ static void vfio_listener_region_add(MemoryListener *listener,
>              goto fail;
>          }
>  
> +        ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
> +                container->nr_iovas, (struct Range *)container->iova_ranges,
> +                &err);

The semantics of calling this with nr_iovas == 0 and the vIOMMU driver
ignoring that it's being told there are no usable iova ranges is rather
strange.  Should nr_iovas be initialized to -1 for that or should this
call be conditional on non-zero nr_iovas?

Also, vfio_get_info_iova_range() is only called in the type1 container
path and the IOVA range info capability has only existed since kernel
v5.4.  So we need to do something sane even if we don't have the kernel
telling us about the IOVA ranges.  I think this precludes the assert in
the final patch of the series or else new QEMU on an old kernel is
broken.

> +        if (ret) {
> +            g_free(giommu);
> +            goto fail;
> +        }
> +
>          ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
>                                                      &err);
>          if (ret) {
> @@ -1981,6 +1989,29 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>      return true;
>  }
>  
> +static void vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
> +                                     unsigned int *nr_iovas,
> +                                     struct  vfio_iova_range **iova_ranges)

Just pass the VFIOContainer pointer?  Thanks,

Alex

> +{
> +    struct vfio_info_cap_header *hdr;
> +    struct vfio_iommu_type1_info_cap_iova_range *cap;
> +
> +    hdr = vfio_get_iommu_type1_info_cap(info,
> +                                        VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> +    if (hdr == NULL) {
> +        return;
> +    }
> +
> +    cap = (void *)hdr;
> +    *nr_iovas = cap->nr_iovas;
> +
> +    if (*nr_iovas == 0) {
> +        return;
> +    }
> +    *iova_ranges = g_memdup2(cap->iova_ranges,
> +                             *nr_iovas * sizeof(struct  vfio_iova_range));
> +}
> +
>  static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
>                                            struct vfio_region_info *info)
>  {
> @@ -2433,6 +2464,12 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
>      }
>  }
>  
> +static void vfio_free_container(VFIOContainer *container)
> +{
> +    g_free(container->iova_ranges);
> +    g_free(container);
> +}
> +
>  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>                                    Error **errp)
>  {
> @@ -2550,6 +2587,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>          if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
>              container->dma_max_mappings = 65535;
>          }
> +
> +        vfio_get_info_iova_range(info, &container->nr_iovas,
> +                                 &container->iova_ranges);
> +
>          vfio_get_iommu_info_migration(container, info);
>          g_free(info);
>  
> @@ -2663,7 +2704,7 @@ enable_discards_exit:
>      vfio_ram_block_discard_disable(container, false);
>  
>  free_container_exit:
> -    g_free(container);
> +    vfio_free_container(container);
>  
>  close_fd_exit:
>      close(fd);
> @@ -2717,7 +2758,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
>  
>          trace_vfio_disconnect_container(container->fd);
>          close(container->fd);
> -        g_free(container);
> +        vfio_free_container(container);
>  
>          vfio_put_address_space(space);
>      }
Eric Auger Sept. 20, 2023, 7:15 a.m. UTC | #3
Hi Alex,

On 9/19/23 17:44, Alex Williamson wrote:
> On Wed, 13 Sep 2023 10:01:38 +0200
> Eric Auger <eric.auger@redhat.com> wrote:
>
>> Collect iova range information if VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
>> capability is supported.
>>
>> This allows to propagate the information though the IOMMU MR
>> set_iova_ranges() callback so that virtual IOMMUs
>> get aware of those aperture constraints.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> ---
>>  include/hw/vfio/vfio-common.h |  2 ++
>>  hw/vfio/common.c              | 45 +++++++++++++++++++++++++++++++++--
>>  2 files changed, 45 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index da43d27352..74b9b27270 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -98,6 +98,8 @@ typedef struct VFIOContainer {
>>      QLIST_HEAD(, VFIOGroup) group_list;
>>      QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
>>      QLIST_ENTRY(VFIOContainer) next;
>> +    unsigned nr_iovas;
>> +    struct  vfio_iova_range *iova_ranges;
>>  } VFIOContainer;
>>  
>>  typedef struct VFIOGuestIOMMU {
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 9aac21abb7..26da38de05 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1157,6 +1157,14 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>              goto fail;
>>          }
>>  
>> +        ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
>> +                container->nr_iovas, (struct Range *)container->iova_ranges,
>> +                &err);
> The semantics of calling this with nr_iovas == 0 and the vIOMMU driver
> ignoring that it's being told there are no usable iova ranges is rather
> strange.  Should nr_iovas be initialized to -1 for that or should this
> call be conditional on non-zero nr_iovas?
>
> Also, vfio_get_info_iova_range() is only called in the type1 container
> path and the IOVA range info capability has only existed since kernel
> v5.4.  So we need to do something sane even if we don't have the kernel
> telling us about the IOVA ranges.  I think this precludes the assert in
> the final patch of the series or else new QEMU on an old kernel is
> broken.

Correct. I totally missed that point.

Thanks!

Eric
>
>> +        if (ret) {
>> +            g_free(giommu);
>> +            goto fail;
>> +        }
>> +
>>          ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
>>                                                      &err);
>>          if (ret) {
>> @@ -1981,6 +1989,29 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>>      return true;
>>  }
>>  
>> +static void vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
>> +                                     unsigned int *nr_iovas,
>> +                                     struct  vfio_iova_range **iova_ranges)
> Just pass the VFIOContainer pointer?  Thanks,
>
> Alex
>
>> +{
>> +    struct vfio_info_cap_header *hdr;
>> +    struct vfio_iommu_type1_info_cap_iova_range *cap;
>> +
>> +    hdr = vfio_get_iommu_type1_info_cap(info,
>> +                                        VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
>> +    if (hdr == NULL) {
>> +        return;
>> +    }
>> +
>> +    cap = (void *)hdr;
>> +    *nr_iovas = cap->nr_iovas;
>> +
>> +    if (*nr_iovas == 0) {
>> +        return;
>> +    }
>> +    *iova_ranges = g_memdup2(cap->iova_ranges,
>> +                             *nr_iovas * sizeof(struct  vfio_iova_range));
>> +}
>> +
>>  static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
>>                                            struct vfio_region_info *info)
>>  {
>> @@ -2433,6 +2464,12 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
>>      }
>>  }
>>  
>> +static void vfio_free_container(VFIOContainer *container)
>> +{
>> +    g_free(container->iova_ranges);
>> +    g_free(container);
>> +}
>> +
>>  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>>                                    Error **errp)
>>  {
>> @@ -2550,6 +2587,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>>          if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
>>              container->dma_max_mappings = 65535;
>>          }
>> +
>> +        vfio_get_info_iova_range(info, &container->nr_iovas,
>> +                                 &container->iova_ranges);
>> +
>>          vfio_get_iommu_info_migration(container, info);
>>          g_free(info);
>>  
>> @@ -2663,7 +2704,7 @@ enable_discards_exit:
>>      vfio_ram_block_discard_disable(container, false);
>>  
>>  free_container_exit:
>> -    g_free(container);
>> +    vfio_free_container(container);
>>  
>>  close_fd_exit:
>>      close(fd);
>> @@ -2717,7 +2758,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
>>  
>>          trace_vfio_disconnect_container(container->fd);
>>          close(container->fd);
>> -        g_free(container);
>> +        vfio_free_container(container);
>>  
>>          vfio_put_address_space(space);
>>      }
Eric Auger Sept. 20, 2023, 7:38 a.m. UTC | #4
Hi Cédric,

On 9/13/23 14:55, Cédric Le Goater wrote:
> On 9/13/23 10:01, Eric Auger wrote:
>> Collect iova range information if VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
>> capability is supported.
>>
>> This allows to propagate the information though the IOMMU MR
>> set_iova_ranges() callback so that virtual IOMMUs
>> get aware of those aperture constraints.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> ---
>>   include/hw/vfio/vfio-common.h |  2 ++
>>   hw/vfio/common.c              | 45 +++++++++++++++++++++++++++++++++--
>>   2 files changed, 45 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h
>> b/include/hw/vfio/vfio-common.h
>> index da43d27352..74b9b27270 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -98,6 +98,8 @@ typedef struct VFIOContainer {
>>       QLIST_HEAD(, VFIOGroup) group_list;
>>       QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
>>       QLIST_ENTRY(VFIOContainer) next;
>> +    unsigned nr_iovas;
>> +    struct  vfio_iova_range *iova_ranges;
>>   } VFIOContainer;
>>     typedef struct VFIOGuestIOMMU {
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 9aac21abb7..26da38de05 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1157,6 +1157,14 @@ static void
>> vfio_listener_region_add(MemoryListener *listener,
>>               goto fail;
>>           }
>>   +        ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
>> +                container->nr_iovas, (struct Range
>> *)container->iova_ranges,
>> +                &err);
>> +        if (ret) {
>> +            g_free(giommu);
>> +            goto fail;
>> +        }
>> +
>>           ret = memory_region_register_iommu_notifier(section->mr,
>> &giommu->n,
>>                                                       &err);
>>           if (ret) {
>> @@ -1981,6 +1989,29 @@ bool vfio_get_info_dma_avail(struct
>> vfio_iommu_type1_info *info,
>>       return true;
>>   }
>>   +static void vfio_get_info_iova_range(struct vfio_iommu_type1_info
>> *info,
>> +                                     unsigned int *nr_iovas,
>> +                                     struct  vfio_iova_range
>> **iova_ranges)
>
> I guess there is no point in returning an error since we can not
> assign default values.
Actually this will return a boolean depending on the whether the
capability is supported,
as reported by Alex. I should have get inspired of
vfio_get_info_dma_avail()!
>
>> +{
>> +    struct vfio_info_cap_header *hdr;
>> +    struct vfio_iommu_type1_info_cap_iova_range *cap;
>> +
>> +    hdr = vfio_get_iommu_type1_info_cap(info,
>> +                                       
>> VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
>> +    if (hdr == NULL) {
>
> May be :
>
>     if (!hdr) {
yep. I guess I copy/pasted the vfio_get_info_dma_avail() code here ;-)
>
>
>> +        return;
>> +    }
>> +
>> +    cap = (void *)hdr;
>> +    *nr_iovas = cap->nr_iovas;
>> +
>
> I would add a trace event with the #iovas.
I do agree tracing the resv regions is usefyl however I would be tempted
to have this trace point elsewhere, maybe at the place where
vfio_host_win_add is called to trace aperture min/max and in the
set_iova cb. Because here I would need to enumerate the regions to trace
them and usually trace points do not add any code.

Thanks

Eric
>
> Thanks,
>
> C.
>
>
>> +    if (*nr_iovas == 0) {
>> +        return;
>> +    }
>> +    *iova_ranges = g_memdup2(cap->iova_ranges,
>> +                             *nr_iovas * sizeof(struct 
>> vfio_iova_range));
>> +}
>> +
>>   static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
>>                                             struct vfio_region_info
>> *info)
>>   {
>> @@ -2433,6 +2464,12 @@ static void
>> vfio_get_iommu_info_migration(VFIOContainer *container,
>>       }
>>   }
>>   +static void vfio_free_container(VFIOContainer *container)
>> +{
>> +    g_free(container->iova_ranges);
>> +    g_free(container);
>> +}
>> +
>>   static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>>                                     Error **errp)
>>   {
>> @@ -2550,6 +2587,10 @@ static int vfio_connect_container(VFIOGroup
>> *group, AddressSpace *as,
>>           if (!vfio_get_info_dma_avail(info,
>> &container->dma_max_mappings)) {
>>               container->dma_max_mappings = 65535;
>>           }
>> +
>> +        vfio_get_info_iova_range(info, &container->nr_iovas,
>> +                                 &container->iova_ranges);
>> +
>>           vfio_get_iommu_info_migration(container, info);
>>           g_free(info);
>>   @@ -2663,7 +2704,7 @@ enable_discards_exit:
>>       vfio_ram_block_discard_disable(container, false);
>>     free_container_exit:
>> -    g_free(container);
>> +    vfio_free_container(container);
>>     close_fd_exit:
>>       close(fd);
>> @@ -2717,7 +2758,7 @@ static void vfio_disconnect_container(VFIOGroup
>> *group)
>>             trace_vfio_disconnect_container(container->fd);
>>           close(container->fd);
>> -        g_free(container);
>> +        vfio_free_container(container);
>>             vfio_put_address_space(space);
>>       }
>
Eric Auger Sept. 20, 2023, 7:39 a.m. UTC | #5
Hi Alex,

On 9/19/23 17:44, Alex Williamson wrote:
> On Wed, 13 Sep 2023 10:01:38 +0200
> Eric Auger <eric.auger@redhat.com> wrote:
>
>> Collect iova range information if VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
>> capability is supported.
>>
>> This allows to propagate the information though the IOMMU MR
>> set_iova_ranges() callback so that virtual IOMMUs
>> get aware of those aperture constraints.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> ---
>>  include/hw/vfio/vfio-common.h |  2 ++
>>  hw/vfio/common.c              | 45 +++++++++++++++++++++++++++++++++--
>>  2 files changed, 45 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index da43d27352..74b9b27270 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -98,6 +98,8 @@ typedef struct VFIOContainer {
>>      QLIST_HEAD(, VFIOGroup) group_list;
>>      QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
>>      QLIST_ENTRY(VFIOContainer) next;
>> +    unsigned nr_iovas;
>> +    struct  vfio_iova_range *iova_ranges;
>>  } VFIOContainer;
>>  
>>  typedef struct VFIOGuestIOMMU {
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 9aac21abb7..26da38de05 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1157,6 +1157,14 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>              goto fail;
>>          }
>>  
>> +        ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
>> +                container->nr_iovas, (struct Range *)container->iova_ranges,
>> +                &err);
> The semantics of calling this with nr_iovas == 0 and the vIOMMU driver
> ignoring that it's being told there are no usable iova ranges is rather
> strange.  Should nr_iovas be initialized to -1 for that or should this
> call be conditional on non-zero nr_iovas?
>
> Also, vfio_get_info_iova_range() is only called in the type1 container
> path and the IOVA range info capability has only existed since kernel
> v5.4.  So we need to do something sane even if we don't have the kernel
> telling us about the IOVA ranges.  I think this precludes the assert in
> the final patch of the series or else new QEMU on an old kernel is
> broken.
>
>> +        if (ret) {
>> +            g_free(giommu);
>> +            goto fail;
>> +        }
>> +
>>          ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
>>                                                      &err);
>>          if (ret) {
>> @@ -1981,6 +1989,29 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>>      return true;
>>  }
>>  
>> +static void vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
>> +                                     unsigned int *nr_iovas,
>> +                                     struct  vfio_iova_range **iova_ranges)
> Just pass the VFIOContainer pointer?  Thanks,
Sorry I miss this comment: yup.

Eric
>
> Alex
>
>> +{
>> +    struct vfio_info_cap_header *hdr;
>> +    struct vfio_iommu_type1_info_cap_iova_range *cap;
>> +
>> +    hdr = vfio_get_iommu_type1_info_cap(info,
>> +                                        VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
>> +    if (hdr == NULL) {
>> +        return;
>> +    }
>> +
>> +    cap = (void *)hdr;
>> +    *nr_iovas = cap->nr_iovas;
>> +
>> +    if (*nr_iovas == 0) {
>> +        return;
>> +    }
>> +    *iova_ranges = g_memdup2(cap->iova_ranges,
>> +                             *nr_iovas * sizeof(struct  vfio_iova_range));
>> +}
>> +
>>  static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
>>                                            struct vfio_region_info *info)
>>  {
>> @@ -2433,6 +2464,12 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
>>      }
>>  }
>>  
>> +static void vfio_free_container(VFIOContainer *container)
>> +{
>> +    g_free(container->iova_ranges);
>> +    g_free(container);
>> +}
>> +
>>  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>>                                    Error **errp)
>>  {
>> @@ -2550,6 +2587,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>>          if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
>>              container->dma_max_mappings = 65535;
>>          }
>> +
>> +        vfio_get_info_iova_range(info, &container->nr_iovas,
>> +                                 &container->iova_ranges);
>> +
>>          vfio_get_iommu_info_migration(container, info);
>>          g_free(info);
>>  
>> @@ -2663,7 +2704,7 @@ enable_discards_exit:
>>      vfio_ram_block_discard_disable(container, false);
>>  
>>  free_container_exit:
>> -    g_free(container);
>> +    vfio_free_container(container);
>>  
>>  close_fd_exit:
>>      close(fd);
>> @@ -2717,7 +2758,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
>>  
>>          trace_vfio_disconnect_container(container->fd);
>>          close(container->fd);
>> -        g_free(container);
>> +        vfio_free_container(container);
>>  
>>          vfio_put_address_space(space);
>>      }
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index da43d27352..74b9b27270 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -98,6 +98,8 @@  typedef struct VFIOContainer {
     QLIST_HEAD(, VFIOGroup) group_list;
     QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
     QLIST_ENTRY(VFIOContainer) next;
+    unsigned nr_iovas;
+    struct  vfio_iova_range *iova_ranges;
 } VFIOContainer;
 
 typedef struct VFIOGuestIOMMU {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9aac21abb7..26da38de05 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1157,6 +1157,14 @@  static void vfio_listener_region_add(MemoryListener *listener,
             goto fail;
         }
 
+        ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
+                container->nr_iovas, (struct Range *)container->iova_ranges,
+                &err);
+        if (ret) {
+            g_free(giommu);
+            goto fail;
+        }
+
         ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
                                                     &err);
         if (ret) {
@@ -1981,6 +1989,29 @@  bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
     return true;
 }
 
+static void vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
+                                     unsigned int *nr_iovas,
+                                     struct  vfio_iova_range **iova_ranges)
+{
+    struct vfio_info_cap_header *hdr;
+    struct vfio_iommu_type1_info_cap_iova_range *cap;
+
+    hdr = vfio_get_iommu_type1_info_cap(info,
+                                        VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+    if (hdr == NULL) {
+        return;
+    }
+
+    cap = (void *)hdr;
+    *nr_iovas = cap->nr_iovas;
+
+    if (*nr_iovas == 0) {
+        return;
+    }
+    *iova_ranges = g_memdup2(cap->iova_ranges,
+                             *nr_iovas * sizeof(struct  vfio_iova_range));
+}
+
 static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
                                           struct vfio_region_info *info)
 {
@@ -2433,6 +2464,12 @@  static void vfio_get_iommu_info_migration(VFIOContainer *container,
     }
 }
 
+static void vfio_free_container(VFIOContainer *container)
+{
+    g_free(container->iova_ranges);
+    g_free(container);
+}
+
 static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
                                   Error **errp)
 {
@@ -2550,6 +2587,10 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
         if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
             container->dma_max_mappings = 65535;
         }
+
+        vfio_get_info_iova_range(info, &container->nr_iovas,
+                                 &container->iova_ranges);
+
         vfio_get_iommu_info_migration(container, info);
         g_free(info);
 
@@ -2663,7 +2704,7 @@  enable_discards_exit:
     vfio_ram_block_discard_disable(container, false);
 
 free_container_exit:
-    g_free(container);
+    vfio_free_container(container);
 
 close_fd_exit:
     close(fd);
@@ -2717,7 +2758,7 @@  static void vfio_disconnect_container(VFIOGroup *group)
 
         trace_vfio_disconnect_container(container->fd);
         close(container->fd);
-        g_free(container);
+        vfio_free_container(container);
 
         vfio_put_address_space(space);
     }