diff mbox series

[RFC,v2,1/2] vhost-vdpa: Implement IOVA->GPA tree

Message ID 20241004124445.3802090-2-jonah.palmer@oracle.com (mailing list archive)
State New
Headers show
Series Handling aliased guest memory maps in vhost-vDPA SVQs | expand

Commit Message

Jonah Palmer Oct. 4, 2024, 12:44 p.m. UTC
Implements the IOVA->GPA tree for handling mapping, unmapping, and
translations for guest memory regions.

When the guest has overlapping memory regions, an HVA to IOVA translation
may return an incorrect IOVA when searching the IOVA->HVA tree. This is
due to one HVA range being contained (overlapping) in another HVA range
in the IOVA->HVA tree. By creating an IOVA->GPA tree, we can use GPAs to
translate and find the correct IOVA for guest memory regions.

Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
---
 hw/virtio/vhost-iova-tree.c | 78 +++++++++++++++++++++++++++++++++++--
 hw/virtio/vhost-iova-tree.h |  5 +++
 hw/virtio/vhost-vdpa.c      | 20 ++++++----
 3 files changed, 92 insertions(+), 11 deletions(-)

Comments

Eugenio Perez Martin Oct. 4, 2024, 3:17 p.m. UTC | #1
On Fri, Oct 4, 2024 at 2:45 PM Jonah Palmer <jonah.palmer@oracle.com> wrote:
>
> Implements the IOVA->GPA tree for handling mapping, unmapping, and
> translations for guest memory regions.
>
> When the guest has overlapping memory regions, an HVA to IOVA translation
> may return an incorrect IOVA when searching the IOVA->HVA tree. This is
> due to one HVA range being contained (overlapping) in another HVA range
> in the IOVA->HVA tree. By creating an IOVA->GPA tree, we can use GPAs to
> translate and find the correct IOVA for guest memory regions.
>

Yes, this first patch is super close to what I meant, just one issue
and a pair of nits here and there.

I'd leave the second patch as an optimization on top, if the numbers
prove that adding the code is worth it.

> Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
> ---
>  hw/virtio/vhost-iova-tree.c | 78 +++++++++++++++++++++++++++++++++++--
>  hw/virtio/vhost-iova-tree.h |  5 +++
>  hw/virtio/vhost-vdpa.c      | 20 ++++++----
>  3 files changed, 92 insertions(+), 11 deletions(-)
>
> diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
> index 3d03395a77..e33fd56225 100644
> --- a/hw/virtio/vhost-iova-tree.c
> +++ b/hw/virtio/vhost-iova-tree.c
> @@ -28,12 +28,15 @@ struct VhostIOVATree {
>
>      /* IOVA address to qemu memory maps. */
>      IOVATree *iova_taddr_map;
> +
> +    /* IOVA address to guest memory maps. */
> +    IOVATree *iova_gpa_map;
>  };
>
>  /**
> - * Create a new IOVA tree
> + * Create a new VhostIOVATree
>   *
> - * Returns the new IOVA tree
> + * Returns the new VhostIOVATree
>   */
>  VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
>  {
> @@ -44,6 +47,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
>      tree->iova_last = iova_last;
>
>      tree->iova_taddr_map = iova_tree_new();
> +    tree->iova_gpa_map = iova_tree_new();
>      return tree;
>  }
>
> @@ -53,6 +57,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
>  void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
>  {
>      iova_tree_destroy(iova_tree->iova_taddr_map);
> +    iova_tree_destroy(iova_tree->iova_gpa_map);
>      g_free(iova_tree);
>  }
>
> @@ -71,7 +76,7 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
>  }
>
>  /**
> - * Allocate a new mapping
> + * Allocate a new mapping in the IOVA->HVA tree
>   *
>   * @tree: The iova tree
>   * @map: The iova map
> @@ -108,3 +113,70 @@ void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
>  {
>      iova_tree_remove(iova_tree->iova_taddr_map, map);
>  }
> +
> +/**
> + * Find the IOVA address stored from a guest memory address
> + *
> + * @tree: The VhostIOVATree
> + * @map: The map with the guest memory address
> + *
> + * Return the stored mapping, or NULL if not found.
> + */
> +const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *tree,
> +                                            const DMAMap *map)

Nit: Not an english native, but I find vhost_iova_tree should not be
broken for coherency with the rest of the functions. What about
vhost_iova_tree_find_iova_gpa, like _gpa variant?

> +{
> +    return iova_tree_find_iova(tree->iova_gpa_map, map);
> +}
> +
> +/**
> + * Allocate new mappings in the IOVA->HVA & IOVA->GPA trees
> + *
> + * @tree: The VhostIOVATree
> + * @map: The iova map
> + * @gpa: The guest physical address (GPA)
> + *
> + * Returns:
> + * - IOVA_OK if the map fits both containers
> + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
> + * - IOVA_ERR_NOMEM if the IOVA->HVA tree cannot allocate more space
> + *
> + * It returns an assigned iova in map->iova if return value is IOVA_OK.
> + */
> +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr gpa)
> +{
> +    int ret;
> +
> +    /* Some vhost devices don't like addr 0. Skip first page */
> +    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
> +
> +    if (map->translated_addr + map->size < map->translated_addr ||
> +        map->perm == IOMMU_NONE) {
> +        return IOVA_ERR_INVALID;
> +    }
> +
> +    /* Allocate a node in the IOVA->HVA tree */
> +    ret = iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
> +                              tree->iova_last);

Why not call vhost_iova_tree_map_alloc instead of duplicating it here?

> +    if (unlikely(ret != IOVA_OK)) {
> +        return ret;
> +    }
> +
> +    /* Insert a node in the IOVA->GPA tree */
> +    map->translated_addr = gpa;
> +    return iova_tree_insert(tree->iova_gpa_map, map);
> +}
> +
> +/**
> + * Remove existing mappings from the IOVA->HVA & IOVA->GPA trees
> + *
> + * @iova_tree: The VhostIOVATree
> + * @map: The map to remove
> + */
> +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map)
> +{
> +    /* Remove the existing mapping from the IOVA->GPA tree */
> +    iova_tree_remove(iova_tree->iova_gpa_map, map);
> +
> +    /* Remove the corresponding mapping from the IOVA->HVA tree */
> +    iova_tree_remove(iova_tree->iova_taddr_map, map);

If we remove it blindly from both trees, we are keeping the bug, isn't it?

I think the remove should receive the "gpa" as a parameter, same as
alloc_gpa. After that, vhost_iova_tree_remove_gpa looks the right iova
into iova_gpa_map. And only after that, it removes that iova from
iova_tree_remove.

If it makes things easier it could receive (hwaddr gpa, size_t len) or
all of the info in a DMAMap. What do you think?

> +}
> diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
> index 4adfd79ff0..511c6d18ae 100644
> --- a/hw/virtio/vhost-iova-tree.h
> +++ b/hw/virtio/vhost-iova-tree.h
> @@ -23,5 +23,10 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
>                                          const DMAMap *map);
>  int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
>  void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
> +const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *iova_tree,
> +                                            const DMAMap *map);
> +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map,
> +                                  hwaddr gpa);
> +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map);
>
>  #endif
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3cdaa12ed5..591ff426e7 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -365,9 +365,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>          mem_region.size = int128_get64(llsize) - 1,
>          mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
>
> -        r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region);
> +        r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region,
> +                                          section->offset_within_address_space);
>          if (unlikely(r != IOVA_OK)) {
>              error_report("Can't allocate a mapping (%d)", r);
> +
> +            /* Insertion to IOVA->GPA tree failed */
> +            if (mem_region.translated_addr ==
> +                section->offset_within_address_space) {
> +                goto fail_map;
> +            }

We can move this cleanup code into vhost_iova_tree_map_alloc_gpa, isn't it?

>              goto fail;
>          }
>
> @@ -386,7 +393,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>
>  fail_map:
>      if (s->shadow_data) {
> -        vhost_iova_tree_remove(s->iova_tree, mem_region);
> +        vhost_iova_tree_remove_gpa(s->iova_tree, mem_region);
>      }
>
>  fail:
> @@ -440,21 +447,18 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>
>      if (s->shadow_data) {
>          const DMAMap *result;
> -        const void *vaddr = memory_region_get_ram_ptr(section->mr) +
> -            section->offset_within_region +
> -            (iova - section->offset_within_address_space);
>          DMAMap mem_region = {
> -            .translated_addr = (hwaddr)(uintptr_t)vaddr,
> +            .translated_addr = section->offset_within_address_space,
>              .size = int128_get64(llsize) - 1,
>          };
>
> -        result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
> +        result = vhost_iova_gpa_tree_find_iova(s->iova_tree, &mem_region);
>          if (!result) {
>              /* The memory listener map wasn't mapped */
>              return;
>          }
>          iova = result->iova;
> -        vhost_iova_tree_remove(s->iova_tree, *result);
> +        vhost_iova_tree_remove_gpa(s->iova_tree, *result);
>      }
>      vhost_vdpa_iotlb_batch_begin_once(s);
>      /*
> --
> 2.43.5
>
Jonah Palmer Oct. 4, 2024, 6:48 p.m. UTC | #2
On 10/4/24 11:17 AM, Eugenio Perez Martin wrote:
> On Fri, Oct 4, 2024 at 2:45 PM Jonah Palmer <jonah.palmer@oracle.com> wrote:
>>
>> Implements the IOVA->GPA tree for handling mapping, unmapping, and
>> translations for guest memory regions.
>>
>> When the guest has overlapping memory regions, an HVA to IOVA translation
>> may return an incorrect IOVA when searching the IOVA->HVA tree. This is
>> due to one HVA range being contained (overlapping) in another HVA range
>> in the IOVA->HVA tree. By creating an IOVA->GPA tree, we can use GPAs to
>> translate and find the correct IOVA for guest memory regions.
>>
> 
> Yes, this first patch is super close to what I meant, just one issue
> and a pair of nits here and there.
> 
> I'd leave the second patch as an optimization on top, if the numbers
> prove that adding the code is worth it.
> 

Ah okay, gotcha. I also wasn't sure if what you mentioned below on the 
previous series you also wanted implemented or if these would also be 
optimizations on top.

[Adding code to the vhost_iova_tree layer for handling multiple buffers 
returned from translation for the memory area where each iovec covers]:
-----------------------------------------------------------------------
"Let's say that SVQ wants to translate the HVA range
0xfeda0000-0xfedd0000. So it makes available for the device two
chained buffers: One with addr=0x1000 len=0x20000 and the other one
with addr=(0x20000c1000 len=0x10000).

The VirtIO device should be able to translate these two buffers in
isolation and chain them. Not optimal but it helps to keep QEMU source
clean, as the device already must support it."

[Adding a permission check to iova_tree_find_address_iterator and match 
the range by permissions]:
-----------------------------------------------------------------------
"About the permissions, maybe we can make the permissions to be part of
the lookup? Instead of returning them at iova_tree_find_iova, make
them match at iova_tree_find_address_iterator."

But I understand that the problems with this is that we're assuming the 
SVQ translation will always be done in a transient manner.

>> Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
>> ---
>>   hw/virtio/vhost-iova-tree.c | 78 +++++++++++++++++++++++++++++++++++--
>>   hw/virtio/vhost-iova-tree.h |  5 +++
>>   hw/virtio/vhost-vdpa.c      | 20 ++++++----
>>   3 files changed, 92 insertions(+), 11 deletions(-)
>>
>> diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
>> index 3d03395a77..e33fd56225 100644
>> --- a/hw/virtio/vhost-iova-tree.c
>> +++ b/hw/virtio/vhost-iova-tree.c
>> @@ -28,12 +28,15 @@ struct VhostIOVATree {
>>
>>       /* IOVA address to qemu memory maps. */
>>       IOVATree *iova_taddr_map;
>> +
>> +    /* IOVA address to guest memory maps. */
>> +    IOVATree *iova_gpa_map;
>>   };
>>
>>   /**
>> - * Create a new IOVA tree
>> + * Create a new VhostIOVATree
>>    *
>> - * Returns the new IOVA tree
>> + * Returns the new VhostIOVATree
>>    */
>>   VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
>>   {
>> @@ -44,6 +47,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
>>       tree->iova_last = iova_last;
>>
>>       tree->iova_taddr_map = iova_tree_new();
>> +    tree->iova_gpa_map = iova_tree_new();
>>       return tree;
>>   }
>>
>> @@ -53,6 +57,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
>>   void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
>>   {
>>       iova_tree_destroy(iova_tree->iova_taddr_map);
>> +    iova_tree_destroy(iova_tree->iova_gpa_map);
>>       g_free(iova_tree);
>>   }
>>
>> @@ -71,7 +76,7 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
>>   }
>>
>>   /**
>> - * Allocate a new mapping
>> + * Allocate a new mapping in the IOVA->HVA tree
>>    *
>>    * @tree: The iova tree
>>    * @map: The iova map
>> @@ -108,3 +113,70 @@ void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
>>   {
>>       iova_tree_remove(iova_tree->iova_taddr_map, map);
>>   }
>> +
>> +/**
>> + * Find the IOVA address stored from a guest memory address
>> + *
>> + * @tree: The VhostIOVATree
>> + * @map: The map with the guest memory address
>> + *
>> + * Return the stored mapping, or NULL if not found.
>> + */
>> +const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *tree,
>> +                                            const DMAMap *map)
> 
> Nit: Not an english native, but I find vhost_iova_tree should not be
> broken for coherency with the rest of the functions. What about
> vhost_iova_tree_find_iova_gpa, like _gpa variant?
> 

Yea, I totally understand what you mean here and I have *no problem* 
making it into vhost_iova_tree_find_iova_gpa.

Just to add my two cents on this, for what it's worth, now that we have 
both an IOVA->HVA tree and a IOVA->GPA tree, coming up with function 
names that operate on this new tree while conforming to the 
vhost_iova_tree convention and being descriptive in the naming is a bit 
difficult.

For example, to me, vhost_iova_tree_find_iova_gpa would seem a bit 
misleading to me if I didn't know about it beforehand (or was just 
seeing it for the first time). Like, are we finding the IOVA or GPA or 
both? And what tree are we operating on?

If this was some personal code I was writing and I had free reign over 
it, I personally would go with a format like:

vhost_<tree this function concerns>_tree_<action>

So a name like vhost_iova_gpa_tree_find_iova communicates to me that 
we're operating on the iova_gpa (IOVA->GPA) tree and our action is to 
find_iova (find the IOVA).

Similarly for something like vhost_iova_gpa_tree_remove or 
vhost_iova_hva_tree_remove, etc.

But obviously this is the complete opposite of personal code and 
certainly not something that's needed so I'm totally okay with renaming 
it to vhost_iova_tree_find_iova_gpa :)

>> +{
>> +    return iova_tree_find_iova(tree->iova_gpa_map, map);
>> +}
>> +
>> +/**
>> + * Allocate new mappings in the IOVA->HVA & IOVA->GPA trees
>> + *
>> + * @tree: The VhostIOVATree
>> + * @map: The iova map
>> + * @gpa: The guest physical address (GPA)
>> + *
>> + * Returns:
>> + * - IOVA_OK if the map fits both containers
>> + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
>> + * - IOVA_ERR_NOMEM if the IOVA->HVA tree cannot allocate more space
>> + *
>> + * It returns an assigned iova in map->iova if return value is IOVA_OK.
>> + */
>> +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr gpa)
>> +{
>> +    int ret;
>> +
>> +    /* Some vhost devices don't like addr 0. Skip first page */
>> +    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
>> +
>> +    if (map->translated_addr + map->size < map->translated_addr ||
>> +        map->perm == IOMMU_NONE) {
>> +        return IOVA_ERR_INVALID;
>> +    }
>> +
>> +    /* Allocate a node in the IOVA->HVA tree */
>> +    ret = iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
>> +                              tree->iova_last);
> 
> Why not call vhost_iova_tree_map_alloc instead of duplicating it here?
> 

Great question with no good answer! For some reason I thought against 
putting it in there but will do that in the next series.

>> +    if (unlikely(ret != IOVA_OK)) {
>> +        return ret;
>> +    }
>> +
>> +    /* Insert a node in the IOVA->GPA tree */
>> +    map->translated_addr = gpa;
>> +    return iova_tree_insert(tree->iova_gpa_map, map);
>> +}
>> +
>> +/**
>> + * Remove existing mappings from the IOVA->HVA & IOVA->GPA trees
>> + *
>> + * @iova_tree: The VhostIOVATree
>> + * @map: The map to remove
>> + */
>> +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map)
>> +{
>> +    /* Remove the existing mapping from the IOVA->GPA tree */
>> +    iova_tree_remove(iova_tree->iova_gpa_map, map);
>> +
>> +    /* Remove the corresponding mapping from the IOVA->HVA tree */
>> +    iova_tree_remove(iova_tree->iova_taddr_map, map);
> 
> If we remove it blindly from both trees, we are keeping the bug, isn't it?
> 
> I think the remove should receive the "gpa" as a parameter, same as
> alloc_gpa. After that, vhost_iova_tree_remove_gpa looks the right iova
> into iova_gpa_map. And only after that, it removes that iova from
> iova_tree_remove.
> 
> If it makes things easier it could receive (hwaddr gpa, size_t len) or
> all of the info in a DMAMap. What do you think?
> 

Initially that was my plan but this only gets called in 
vhost_vdpa_listener_region_add/del and in both functions, by the time 
this removal function is called, we already have the correct IOVA.

In vhost_vdpa_listener_region_add, we just allocated that IOVA and saved 
it in mem_region. In vhost_vdpa_listener_region_del, we already found 
the IOVA via vhost_iova_gpa_tree_find_iova prior to calling the removal 
function.

But I could be misunderstanding something here. Let me know if I am.

>> +}
>> diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
>> index 4adfd79ff0..511c6d18ae 100644
>> --- a/hw/virtio/vhost-iova-tree.h
>> +++ b/hw/virtio/vhost-iova-tree.h
>> @@ -23,5 +23,10 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
>>                                           const DMAMap *map);
>>   int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
>>   void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
>> +const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *iova_tree,
>> +                                            const DMAMap *map);
>> +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map,
>> +                                  hwaddr gpa);
>> +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map);
>>
>>   #endif
>> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
>> index 3cdaa12ed5..591ff426e7 100644
>> --- a/hw/virtio/vhost-vdpa.c
>> +++ b/hw/virtio/vhost-vdpa.c
>> @@ -365,9 +365,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>>           mem_region.size = int128_get64(llsize) - 1,
>>           mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
>>
>> -        r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region);
>> +        r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region,
>> +                                          section->offset_within_address_space);
>>           if (unlikely(r != IOVA_OK)) {
>>               error_report("Can't allocate a mapping (%d)", r);
>> +
>> +            /* Insertion to IOVA->GPA tree failed */
>> +            if (mem_region.translated_addr ==
>> +                section->offset_within_address_space) {
>> +                goto fail_map;
>> +            }
> 
> We can move this cleanup code into vhost_iova_tree_map_alloc_gpa, isn't it?
> 

Sure can. We'd still need to check if vhost_iova_tree_map_alloc_gpa 
returned IOVA_OK though and goto the fail label.

>>               goto fail;
>>           }
>>
>> @@ -386,7 +393,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>>
>>   fail_map:
>>       if (s->shadow_data) {
>> -        vhost_iova_tree_remove(s->iova_tree, mem_region);
>> +        vhost_iova_tree_remove_gpa(s->iova_tree, mem_region);
>>       }
>>
>>   fail:
>> @@ -440,21 +447,18 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>>
>>       if (s->shadow_data) {
>>           const DMAMap *result;
>> -        const void *vaddr = memory_region_get_ram_ptr(section->mr) +
>> -            section->offset_within_region +
>> -            (iova - section->offset_within_address_space);
>>           DMAMap mem_region = {
>> -            .translated_addr = (hwaddr)(uintptr_t)vaddr,
>> +            .translated_addr = section->offset_within_address_space,
>>               .size = int128_get64(llsize) - 1,
>>           };
>>
>> -        result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
>> +        result = vhost_iova_gpa_tree_find_iova(s->iova_tree, &mem_region);
>>           if (!result) {
>>               /* The memory listener map wasn't mapped */
>>               return;
>>           }
>>           iova = result->iova;
>> -        vhost_iova_tree_remove(s->iova_tree, *result);
>> +        vhost_iova_tree_remove_gpa(s->iova_tree, *result);
>>       }
>>       vhost_vdpa_iotlb_batch_begin_once(s);
>>       /*
>> --
>> 2.43.5
>>
>
diff mbox series

Patch

diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
index 3d03395a77..e33fd56225 100644
--- a/hw/virtio/vhost-iova-tree.c
+++ b/hw/virtio/vhost-iova-tree.c
@@ -28,12 +28,15 @@  struct VhostIOVATree {
 
     /* IOVA address to qemu memory maps. */
     IOVATree *iova_taddr_map;
+
+    /* IOVA address to guest memory maps. */
+    IOVATree *iova_gpa_map;
 };
 
 /**
- * Create a new IOVA tree
+ * Create a new VhostIOVATree
  *
- * Returns the new IOVA tree
+ * Returns the new VhostIOVATree
  */
 VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
 {
@@ -44,6 +47,7 @@  VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
     tree->iova_last = iova_last;
 
     tree->iova_taddr_map = iova_tree_new();
+    tree->iova_gpa_map = iova_tree_new();
     return tree;
 }
 
@@ -53,6 +57,7 @@  VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
 void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
 {
     iova_tree_destroy(iova_tree->iova_taddr_map);
+    iova_tree_destroy(iova_tree->iova_gpa_map);
     g_free(iova_tree);
 }
 
@@ -71,7 +76,7 @@  const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
 }
 
 /**
- * Allocate a new mapping
+ * Allocate a new mapping in the IOVA->HVA tree
  *
  * @tree: The iova tree
  * @map: The iova map
@@ -108,3 +113,70 @@  void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
 {
     iova_tree_remove(iova_tree->iova_taddr_map, map);
 }
+
+/**
+ * Find the IOVA address stored from a guest memory address
+ *
+ * @tree: The VhostIOVATree
+ * @map: The map with the guest memory address
+ *
+ * Return the stored mapping, or NULL if not found.
+ */
+const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *tree,
+                                            const DMAMap *map)
+{
+    return iova_tree_find_iova(tree->iova_gpa_map, map);
+}
+
+/**
+ * Allocate new mappings in the IOVA->HVA & IOVA->GPA trees
+ *
+ * @tree: The VhostIOVATree
+ * @map: The iova map
+ * @gpa: The guest physical address (GPA)
+ *
+ * Returns:
+ * - IOVA_OK if the map fits both containers
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
+ * - IOVA_ERR_NOMEM if the IOVA->HVA tree cannot allocate more space
+ *
+ * It returns an assigned iova in map->iova if return value is IOVA_OK.
+ */
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr gpa)
+{
+    int ret;
+
+    /* Some vhost devices don't like addr 0. Skip first page */
+    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
+
+    if (map->translated_addr + map->size < map->translated_addr ||
+        map->perm == IOMMU_NONE) {
+        return IOVA_ERR_INVALID;
+    }
+
+    /* Allocate a node in the IOVA->HVA tree */
+    ret = iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
+                              tree->iova_last);
+    if (unlikely(ret != IOVA_OK)) {
+        return ret;
+    }
+
+    /* Insert a node in the IOVA->GPA tree */
+    map->translated_addr = gpa;
+    return iova_tree_insert(tree->iova_gpa_map, map);
+}
+
+/**
+ * Remove existing mappings from the IOVA->HVA & IOVA->GPA trees
+ *
+ * @iova_tree: The VhostIOVATree
+ * @map: The map to remove
+ */
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map)
+{
+    /* Remove the existing mapping from the IOVA->GPA tree */
+    iova_tree_remove(iova_tree->iova_gpa_map, map);
+
+    /* Remove the corresponding mapping from the IOVA->HVA tree */
+    iova_tree_remove(iova_tree->iova_taddr_map, map);
+}
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
index 4adfd79ff0..511c6d18ae 100644
--- a/hw/virtio/vhost-iova-tree.h
+++ b/hw/virtio/vhost-iova-tree.h
@@ -23,5 +23,10 @@  const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
                                         const DMAMap *map);
 int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
 void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
+const DMAMap *vhost_iova_gpa_tree_find_iova(const VhostIOVATree *iova_tree,
+                                            const DMAMap *map);
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map,
+                                  hwaddr gpa);
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map);
 
 #endif
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 3cdaa12ed5..591ff426e7 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -365,9 +365,16 @@  static void vhost_vdpa_listener_region_add(MemoryListener *listener,
         mem_region.size = int128_get64(llsize) - 1,
         mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
 
-        r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region);
+        r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region,
+                                          section->offset_within_address_space);
         if (unlikely(r != IOVA_OK)) {
             error_report("Can't allocate a mapping (%d)", r);
+
+            /* Insertion to IOVA->GPA tree failed */
+            if (mem_region.translated_addr ==
+                section->offset_within_address_space) {
+                goto fail_map;
+            }
             goto fail;
         }
 
@@ -386,7 +393,7 @@  static void vhost_vdpa_listener_region_add(MemoryListener *listener,
 
 fail_map:
     if (s->shadow_data) {
-        vhost_iova_tree_remove(s->iova_tree, mem_region);
+        vhost_iova_tree_remove_gpa(s->iova_tree, mem_region);
     }
 
 fail:
@@ -440,21 +447,18 @@  static void vhost_vdpa_listener_region_del(MemoryListener *listener,
 
     if (s->shadow_data) {
         const DMAMap *result;
-        const void *vaddr = memory_region_get_ram_ptr(section->mr) +
-            section->offset_within_region +
-            (iova - section->offset_within_address_space);
         DMAMap mem_region = {
-            .translated_addr = (hwaddr)(uintptr_t)vaddr,
+            .translated_addr = section->offset_within_address_space,
             .size = int128_get64(llsize) - 1,
         };
 
-        result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
+        result = vhost_iova_gpa_tree_find_iova(s->iova_tree, &mem_region);
         if (!result) {
             /* The memory listener map wasn't mapped */
             return;
         }
         iova = result->iova;
-        vhost_iova_tree_remove(s->iova_tree, *result);
+        vhost_iova_tree_remove_gpa(s->iova_tree, *result);
     }
     vhost_vdpa_iotlb_batch_begin_once(s);
     /*