diff mbox series

[22/31] vhost: Add VhostIOVATree

Message ID 20220121202733.404989-23-eperezma@redhat.com (mailing list archive)
State New, archived
Headers show
Series vDPA shadow virtqueue | expand

Commit Message

Eugenio Perez Martin Jan. 21, 2022, 8:27 p.m. UTC
This tree is able to look for a translated address from an IOVA address.

At first glance it is similar to util/iova-tree. However, SVQ working on
devices with limited IOVA space need more capabilities, like allocating
IOVA chunks or performing reverse translations (qemu addresses to iova).

The allocation capability, as "assign a free IOVA address to this chunk
of memory in qemu's address space" allows shadow virtqueue to create a
new address space that is not restricted by guest's addressable one, so
we can allocate shadow vqs vrings outside of it.

It duplicates the tree so it can search efficiently both directions,
and it will signal overlap if iova or the translated address is
present in any tree.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 hw/virtio/vhost-iova-tree.h |  27 +++++++
 hw/virtio/vhost-iova-tree.c | 157 ++++++++++++++++++++++++++++++++++++
 hw/virtio/meson.build       |   2 +-
 3 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100644 hw/virtio/vhost-iova-tree.h
 create mode 100644 hw/virtio/vhost-iova-tree.c

Comments

Jason Wang Jan. 30, 2022, 5:21 a.m. UTC | #1
在 2022/1/22 上午4:27, Eugenio Pérez 写道:
> This tree is able to look for a translated address from an IOVA address.
>
> At first glance it is similar to util/iova-tree. However, SVQ working on
> devices with limited IOVA space need more capabilities,


So did the IOVA tree (e.g l2 vtd can only work in the range of GAW and 
without RMRRs).


>   like allocating
> IOVA chunks or performing reverse translations (qemu addresses to iova).


This looks like a general request as well. So I wonder if we can simply 
extend iova tree instead.

Thanks


>
> The allocation capability, as "assign a free IOVA address to this chunk
> of memory in qemu's address space" allows shadow virtqueue to create a
> new address space that is not restricted by guest's addressable one, so
> we can allocate shadow vqs vrings outside of it.
>
> It duplicates the tree so it can search efficiently both directions,
> and it will signal overlap if iova or the translated address is
> present in any tree.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   hw/virtio/vhost-iova-tree.h |  27 +++++++
>   hw/virtio/vhost-iova-tree.c | 157 ++++++++++++++++++++++++++++++++++++
>   hw/virtio/meson.build       |   2 +-
>   3 files changed, 185 insertions(+), 1 deletion(-)
>   create mode 100644 hw/virtio/vhost-iova-tree.h
>   create mode 100644 hw/virtio/vhost-iova-tree.c
>
> diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
> new file mode 100644
> index 0000000000..610394eaf1
> --- /dev/null
> +++ b/hw/virtio/vhost-iova-tree.h
> @@ -0,0 +1,27 @@
> +/*
> + * vhost software live migration ring
> + *
> + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
> + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
> +#define HW_VIRTIO_VHOST_IOVA_TREE_H
> +
> +#include "qemu/iova-tree.h"
> +#include "exec/memory.h"
> +
> +typedef struct VhostIOVATree VhostIOVATree;
> +
> +VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
> +void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
> +G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
> +
> +const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
> +                                        const DMAMap *map);
> +int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
> +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
> +
> +#endif
> diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
> new file mode 100644
> index 0000000000..0021dbaf54
> --- /dev/null
> +++ b/hw/virtio/vhost-iova-tree.c
> @@ -0,0 +1,157 @@
> +/*
> + * vhost software live migration ring
> + *
> + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
> + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/iova-tree.h"
> +#include "vhost-iova-tree.h"
> +
> +#define iova_min_addr qemu_real_host_page_size
> +
> +/**
> + * VhostIOVATree, able to:
> + * - Translate iova address
> + * - Reverse translate iova address (from translated to iova)
> + * - Allocate IOVA regions for translated range (potentially slow operation)
> + *
> + * Note that it cannot remove nodes.
> + */
> +struct VhostIOVATree {
> +    /* First addresable iova address in the device */
> +    uint64_t iova_first;
> +
> +    /* Last addressable iova address in the device */
> +    uint64_t iova_last;
> +
> +    /* IOVA address to qemu memory maps. */
> +    IOVATree *iova_taddr_map;
> +
> +    /* QEMU virtual memory address to iova maps */
> +    GTree *taddr_iova_map;
> +};
> +
> +static gint vhost_iova_tree_cmp_taddr(gconstpointer a, gconstpointer b,
> +                                      gpointer data)
> +{
> +    const DMAMap *m1 = a, *m2 = b;
> +
> +    if (m1->translated_addr > m2->translated_addr + m2->size) {
> +        return 1;
> +    }
> +
> +    if (m1->translated_addr + m1->size < m2->translated_addr) {
> +        return -1;
> +    }
> +
> +    /* Overlapped */
> +    return 0;
> +}
> +
> +/**
> + * Create a new IOVA tree
> + *
> + * Returns the new IOVA tree
> + */
> +VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
> +{
> +    VhostIOVATree *tree = g_new(VhostIOVATree, 1);
> +
> +    /* Some devices does not like 0 addresses */
> +    tree->iova_first = MAX(iova_first, iova_min_addr);
> +    tree->iova_last = iova_last;
> +
> +    tree->iova_taddr_map = iova_tree_new();
> +    tree->taddr_iova_map = g_tree_new_full(vhost_iova_tree_cmp_taddr, NULL,
> +                                           NULL, g_free);
> +    return tree;
> +}
> +
> +/**
> + * Delete an iova tree
> + */
> +void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
> +{
> +    iova_tree_destroy(iova_tree->iova_taddr_map);
> +    g_tree_unref(iova_tree->taddr_iova_map);
> +    g_free(iova_tree);
> +}
> +
> +/**
> + * Find the IOVA address stored from a memory address
> + *
> + * @tree     The iova tree
> + * @map      The map with the memory address
> + *
> + * Return the stored mapping, or NULL if not found.
> + */
> +const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
> +                                        const DMAMap *map)
> +{
> +    return g_tree_lookup(tree->taddr_iova_map, map);
> +}
> +
> +/**
> + * Allocate a new mapping
> + *
> + * @tree  The iova tree
> + * @map   The iova map
> + *
> + * Returns:
> + * - IOVA_OK if the map fits in the container
> + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
> + * - IOVA_ERR_OVERLAP if the tree already contains that map
> + * - IOVA_ERR_NOMEM if tree cannot allocate more space.
> + *
> + * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
> + */
> +int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
> +{
> +    /* Some vhost devices does not like addr 0. Skip first page */
> +    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
> +    DMAMap *new;
> +    int r;
> +
> +    if (map->translated_addr + map->size < map->translated_addr ||
> +        map->perm == IOMMU_NONE) {
> +        return IOVA_ERR_INVALID;
> +    }
> +
> +    /* Check for collisions in translated addresses */
> +    if (vhost_iova_tree_find_iova(tree, map)) {
> +        return IOVA_ERR_OVERLAP;
> +    }
> +
> +    /* Allocate a node in IOVA address */
> +    r = iova_tree_alloc(tree->iova_taddr_map, map, iova_first,
> +                        tree->iova_last);
> +    if (r != IOVA_OK) {
> +        return r;
> +    }
> +
> +    /* Allocate node in qemu -> iova translations */
> +    new = g_malloc(sizeof(*new));
> +    memcpy(new, map, sizeof(*new));
> +    g_tree_insert(tree->taddr_iova_map, new, new);
> +    return IOVA_OK;
> +}
> +
> +/**
> + * Remove existing mappings from iova tree
> + *
> + * @param  iova_tree  The vhost iova tree
> + * @param  map        The map to remove
> + */
> +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
> +{
> +    const DMAMap *overlap;
> +
> +    iova_tree_remove(iova_tree->iova_taddr_map, map);
> +    while ((overlap = vhost_iova_tree_find_iova(iova_tree, map))) {
> +        g_tree_remove(iova_tree->taddr_iova_map, overlap);
> +    }
> +}
> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
> index 2dc87613bc..6047670804 100644
> --- a/hw/virtio/meson.build
> +++ b/hw/virtio/meson.build
> @@ -11,7 +11,7 @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
>   
>   virtio_ss = ss.source_set()
>   virtio_ss.add(files('virtio.c'))
> -virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
> +virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
>   virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
>   virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
>   virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
Eugenio Perez Martin Feb. 1, 2022, 5:27 p.m. UTC | #2
On Sun, Jan 30, 2022 at 6:21 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2022/1/22 上午4:27, Eugenio Pérez 写道:
> > This tree is able to look for a translated address from an IOVA address.
> >
> > At first glance it is similar to util/iova-tree. However, SVQ working on
> > devices with limited IOVA space need more capabilities,
>
>
> So did the IOVA tree (e.g l2 vtd can only work in the range of GAW and
> without RMRRs).
>
>
> >   like allocating
> > IOVA chunks or performing reverse translations (qemu addresses to iova).
>
>
> This looks like a general request as well. So I wonder if we can simply
> extend iova tree instead.
>

While both are true, I don't see code that performs allocations or
qemu vaddr to iova translations. But if the changes can be integrated
into iova-tree that would be great for sure.

The main drawback I see is the need to maintain two trees instead of
one for users of iova-tree. While complexity does not grow, it needs
to double the amount of work needed.

Thanks!

> Thanks
>
>
> >
> > The allocation capability, as "assign a free IOVA address to this chunk
> > of memory in qemu's address space" allows shadow virtqueue to create a
> > new address space that is not restricted by guest's addressable one, so
> > we can allocate shadow vqs vrings outside of it.
> >
> > It duplicates the tree so it can search efficiently both directions,
> > and it will signal overlap if iova or the translated address is
> > present in any tree.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> >   hw/virtio/vhost-iova-tree.h |  27 +++++++
> >   hw/virtio/vhost-iova-tree.c | 157 ++++++++++++++++++++++++++++++++++++
> >   hw/virtio/meson.build       |   2 +-
> >   3 files changed, 185 insertions(+), 1 deletion(-)
> >   create mode 100644 hw/virtio/vhost-iova-tree.h
> >   create mode 100644 hw/virtio/vhost-iova-tree.c
> >
> > diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
> > new file mode 100644
> > index 0000000000..610394eaf1
> > --- /dev/null
> > +++ b/hw/virtio/vhost-iova-tree.h
> > @@ -0,0 +1,27 @@
> > +/*
> > + * vhost software live migration ring
> > + *
> > + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
> > + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
> > + *
> > + * SPDX-License-Identifier: GPL-2.0-or-later
> > + */
> > +
> > +#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
> > +#define HW_VIRTIO_VHOST_IOVA_TREE_H
> > +
> > +#include "qemu/iova-tree.h"
> > +#include "exec/memory.h"
> > +
> > +typedef struct VhostIOVATree VhostIOVATree;
> > +
> > +VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
> > +void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
> > +G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
> > +
> > +const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
> > +                                        const DMAMap *map);
> > +int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
> > +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
> > +
> > +#endif
> > diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
> > new file mode 100644
> > index 0000000000..0021dbaf54
> > --- /dev/null
> > +++ b/hw/virtio/vhost-iova-tree.c
> > @@ -0,0 +1,157 @@
> > +/*
> > + * vhost software live migration ring
> > + *
> > + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
> > + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
> > + *
> > + * SPDX-License-Identifier: GPL-2.0-or-later
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/iova-tree.h"
> > +#include "vhost-iova-tree.h"
> > +
> > +#define iova_min_addr qemu_real_host_page_size
> > +
> > +/**
> > + * VhostIOVATree, able to:
> > + * - Translate iova address
> > + * - Reverse translate iova address (from translated to iova)
> > + * - Allocate IOVA regions for translated range (potentially slow operation)
> > + *
> > + * Note that it cannot remove nodes.
> > + */
> > +struct VhostIOVATree {
> > +    /* First addresable iova address in the device */
> > +    uint64_t iova_first;
> > +
> > +    /* Last addressable iova address in the device */
> > +    uint64_t iova_last;
> > +
> > +    /* IOVA address to qemu memory maps. */
> > +    IOVATree *iova_taddr_map;
> > +
> > +    /* QEMU virtual memory address to iova maps */
> > +    GTree *taddr_iova_map;
> > +};
> > +
> > +static gint vhost_iova_tree_cmp_taddr(gconstpointer a, gconstpointer b,
> > +                                      gpointer data)
> > +{
> > +    const DMAMap *m1 = a, *m2 = b;
> > +
> > +    if (m1->translated_addr > m2->translated_addr + m2->size) {
> > +        return 1;
> > +    }
> > +
> > +    if (m1->translated_addr + m1->size < m2->translated_addr) {
> > +        return -1;
> > +    }
> > +
> > +    /* Overlapped */
> > +    return 0;
> > +}
> > +
> > +/**
> > + * Create a new IOVA tree
> > + *
> > + * Returns the new IOVA tree
> > + */
> > +VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
> > +{
> > +    VhostIOVATree *tree = g_new(VhostIOVATree, 1);
> > +
> > +    /* Some devices does not like 0 addresses */
> > +    tree->iova_first = MAX(iova_first, iova_min_addr);
> > +    tree->iova_last = iova_last;
> > +
> > +    tree->iova_taddr_map = iova_tree_new();
> > +    tree->taddr_iova_map = g_tree_new_full(vhost_iova_tree_cmp_taddr, NULL,
> > +                                           NULL, g_free);
> > +    return tree;
> > +}
> > +
> > +/**
> > + * Delete an iova tree
> > + */
> > +void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
> > +{
> > +    iova_tree_destroy(iova_tree->iova_taddr_map);
> > +    g_tree_unref(iova_tree->taddr_iova_map);
> > +    g_free(iova_tree);
> > +}
> > +
> > +/**
> > + * Find the IOVA address stored from a memory address
> > + *
> > + * @tree     The iova tree
> > + * @map      The map with the memory address
> > + *
> > + * Return the stored mapping, or NULL if not found.
> > + */
> > +const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
> > +                                        const DMAMap *map)
> > +{
> > +    return g_tree_lookup(tree->taddr_iova_map, map);
> > +}
> > +
> > +/**
> > + * Allocate a new mapping
> > + *
> > + * @tree  The iova tree
> > + * @map   The iova map
> > + *
> > + * Returns:
> > + * - IOVA_OK if the map fits in the container
> > + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
> > + * - IOVA_ERR_OVERLAP if the tree already contains that map
> > + * - IOVA_ERR_NOMEM if tree cannot allocate more space.
> > + *
> > + * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
> > + */
> > +int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
> > +{
> > +    /* Some vhost devices does not like addr 0. Skip first page */
> > +    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
> > +    DMAMap *new;
> > +    int r;
> > +
> > +    if (map->translated_addr + map->size < map->translated_addr ||
> > +        map->perm == IOMMU_NONE) {
> > +        return IOVA_ERR_INVALID;
> > +    }
> > +
> > +    /* Check for collisions in translated addresses */
> > +    if (vhost_iova_tree_find_iova(tree, map)) {
> > +        return IOVA_ERR_OVERLAP;
> > +    }
> > +
> > +    /* Allocate a node in IOVA address */
> > +    r = iova_tree_alloc(tree->iova_taddr_map, map, iova_first,
> > +                        tree->iova_last);
> > +    if (r != IOVA_OK) {
> > +        return r;
> > +    }
> > +
> > +    /* Allocate node in qemu -> iova translations */
> > +    new = g_malloc(sizeof(*new));
> > +    memcpy(new, map, sizeof(*new));
> > +    g_tree_insert(tree->taddr_iova_map, new, new);
> > +    return IOVA_OK;
> > +}
> > +
> > +/**
> > + * Remove existing mappings from iova tree
> > + *
> > + * @param  iova_tree  The vhost iova tree
> > + * @param  map        The map to remove
> > + */
> > +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
> > +{
> > +    const DMAMap *overlap;
> > +
> > +    iova_tree_remove(iova_tree->iova_taddr_map, map);
> > +    while ((overlap = vhost_iova_tree_find_iova(iova_tree, map))) {
> > +        g_tree_remove(iova_tree->taddr_iova_map, overlap);
> > +    }
> > +}
> > diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
> > index 2dc87613bc..6047670804 100644
> > --- a/hw/virtio/meson.build
> > +++ b/hw/virtio/meson.build
> > @@ -11,7 +11,7 @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
> >
> >   virtio_ss = ss.source_set()
> >   virtio_ss.add(files('virtio.c'))
> > -virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
> > +virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
> >   virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
> >   virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
> >   virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
>
Jason Wang Feb. 8, 2022, 8:17 a.m. UTC | #3
在 2022/2/2 上午1:27, Eugenio Perez Martin 写道:
> On Sun, Jan 30, 2022 at 6:21 AM Jason Wang <jasowang@redhat.com> wrote:
>>
>> 在 2022/1/22 上午4:27, Eugenio Pérez 写道:
>>> This tree is able to look for a translated address from an IOVA address.
>>>
>>> At first glance it is similar to util/iova-tree. However, SVQ working on
>>> devices with limited IOVA space need more capabilities,
>>
>> So did the IOVA tree (e.g l2 vtd can only work in the range of GAW and
>> without RMRRs).
>>
>>
>>>    like allocating
>>> IOVA chunks or performing reverse translations (qemu addresses to iova).
>>
>> This looks like a general request as well. So I wonder if we can simply
>> extend iova tree instead.
>>
> While both are true, I don't see code that performs allocations or
> qemu vaddr to iova translations. But if the changes can be integrated
> into iova-tree that would be great for sure.
>
> The main drawback I see is the need to maintain two trees instead of
> one for users of iova-tree. While complexity does not grow, it needs
> to double the amount of work needed.


If you care about the performance, we can disable the reverse mapping 
during the allocation. For vIOMMU users it won't notice any performance 
penalty.

Thanks


>
> Thanks!
>
>> Thanks
>>
>>
>>> The allocation capability, as "assign a free IOVA address to this chunk
>>> of memory in qemu's address space" allows shadow virtqueue to create a
>>> new address space that is not restricted by guest's addressable one, so
>>> we can allocate shadow vqs vrings outside of it.
>>>
>>> It duplicates the tree so it can search efficiently both directions,
>>> and it will signal overlap if iova or the translated address is
>>> present in any tree.
>>>
>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>>> ---
>>>    hw/virtio/vhost-iova-tree.h |  27 +++++++
>>>    hw/virtio/vhost-iova-tree.c | 157 ++++++++++++++++++++++++++++++++++++
>>>    hw/virtio/meson.build       |   2 +-
>>>    3 files changed, 185 insertions(+), 1 deletion(-)
>>>    create mode 100644 hw/virtio/vhost-iova-tree.h
>>>    create mode 100644 hw/virtio/vhost-iova-tree.c
>>>
>>> diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
>>> new file mode 100644
>>> index 0000000000..610394eaf1
>>> --- /dev/null
>>> +++ b/hw/virtio/vhost-iova-tree.h
>>> @@ -0,0 +1,27 @@
>>> +/*
>>> + * vhost software live migration ring
>>> + *
>>> + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
>>> + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
>>> +#define HW_VIRTIO_VHOST_IOVA_TREE_H
>>> +
>>> +#include "qemu/iova-tree.h"
>>> +#include "exec/memory.h"
>>> +
>>> +typedef struct VhostIOVATree VhostIOVATree;
>>> +
>>> +VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
>>> +void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
>>> +G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
>>> +
>>> +const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
>>> +                                        const DMAMap *map);
>>> +int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
>>> +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
>>> +
>>> +#endif
>>> diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
>>> new file mode 100644
>>> index 0000000000..0021dbaf54
>>> --- /dev/null
>>> +++ b/hw/virtio/vhost-iova-tree.c
>>> @@ -0,0 +1,157 @@
>>> +/*
>>> + * vhost software live migration ring
>>> + *
>>> + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
>>> + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "qemu/iova-tree.h"
>>> +#include "vhost-iova-tree.h"
>>> +
>>> +#define iova_min_addr qemu_real_host_page_size
>>> +
>>> +/**
>>> + * VhostIOVATree, able to:
>>> + * - Translate iova address
>>> + * - Reverse translate iova address (from translated to iova)
>>> + * - Allocate IOVA regions for translated range (potentially slow operation)
>>> + *
>>> + * Note that it cannot remove nodes.
>>> + */
>>> +struct VhostIOVATree {
>>> +    /* First addresable iova address in the device */
>>> +    uint64_t iova_first;
>>> +
>>> +    /* Last addressable iova address in the device */
>>> +    uint64_t iova_last;
>>> +
>>> +    /* IOVA address to qemu memory maps. */
>>> +    IOVATree *iova_taddr_map;
>>> +
>>> +    /* QEMU virtual memory address to iova maps */
>>> +    GTree *taddr_iova_map;
>>> +};
>>> +
>>> +static gint vhost_iova_tree_cmp_taddr(gconstpointer a, gconstpointer b,
>>> +                                      gpointer data)
>>> +{
>>> +    const DMAMap *m1 = a, *m2 = b;
>>> +
>>> +    if (m1->translated_addr > m2->translated_addr + m2->size) {
>>> +        return 1;
>>> +    }
>>> +
>>> +    if (m1->translated_addr + m1->size < m2->translated_addr) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    /* Overlapped */
>>> +    return 0;
>>> +}
>>> +
>>> +/**
>>> + * Create a new IOVA tree
>>> + *
>>> + * Returns the new IOVA tree
>>> + */
>>> +VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
>>> +{
>>> +    VhostIOVATree *tree = g_new(VhostIOVATree, 1);
>>> +
>>> +    /* Some devices does not like 0 addresses */
>>> +    tree->iova_first = MAX(iova_first, iova_min_addr);
>>> +    tree->iova_last = iova_last;
>>> +
>>> +    tree->iova_taddr_map = iova_tree_new();
>>> +    tree->taddr_iova_map = g_tree_new_full(vhost_iova_tree_cmp_taddr, NULL,
>>> +                                           NULL, g_free);
>>> +    return tree;
>>> +}
>>> +
>>> +/**
>>> + * Delete an iova tree
>>> + */
>>> +void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
>>> +{
>>> +    iova_tree_destroy(iova_tree->iova_taddr_map);
>>> +    g_tree_unref(iova_tree->taddr_iova_map);
>>> +    g_free(iova_tree);
>>> +}
>>> +
>>> +/**
>>> + * Find the IOVA address stored from a memory address
>>> + *
>>> + * @tree     The iova tree
>>> + * @map      The map with the memory address
>>> + *
>>> + * Return the stored mapping, or NULL if not found.
>>> + */
>>> +const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
>>> +                                        const DMAMap *map)
>>> +{
>>> +    return g_tree_lookup(tree->taddr_iova_map, map);
>>> +}
>>> +
>>> +/**
>>> + * Allocate a new mapping
>>> + *
>>> + * @tree  The iova tree
>>> + * @map   The iova map
>>> + *
>>> + * Returns:
>>> + * - IOVA_OK if the map fits in the container
>>> + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
>>> + * - IOVA_ERR_OVERLAP if the tree already contains that map
>>> + * - IOVA_ERR_NOMEM if tree cannot allocate more space.
>>> + *
>>> + * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
>>> + */
>>> +int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
>>> +{
>>> +    /* Some vhost devices does not like addr 0. Skip first page */
>>> +    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
>>> +    DMAMap *new;
>>> +    int r;
>>> +
>>> +    if (map->translated_addr + map->size < map->translated_addr ||
>>> +        map->perm == IOMMU_NONE) {
>>> +        return IOVA_ERR_INVALID;
>>> +    }
>>> +
>>> +    /* Check for collisions in translated addresses */
>>> +    if (vhost_iova_tree_find_iova(tree, map)) {
>>> +        return IOVA_ERR_OVERLAP;
>>> +    }
>>> +
>>> +    /* Allocate a node in IOVA address */
>>> +    r = iova_tree_alloc(tree->iova_taddr_map, map, iova_first,
>>> +                        tree->iova_last);
>>> +    if (r != IOVA_OK) {
>>> +        return r;
>>> +    }
>>> +
>>> +    /* Allocate node in qemu -> iova translations */
>>> +    new = g_malloc(sizeof(*new));
>>> +    memcpy(new, map, sizeof(*new));
>>> +    g_tree_insert(tree->taddr_iova_map, new, new);
>>> +    return IOVA_OK;
>>> +}
>>> +
>>> +/**
>>> + * Remove existing mappings from iova tree
>>> + *
>>> + * @param  iova_tree  The vhost iova tree
>>> + * @param  map        The map to remove
>>> + */
>>> +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
>>> +{
>>> +    const DMAMap *overlap;
>>> +
>>> +    iova_tree_remove(iova_tree->iova_taddr_map, map);
>>> +    while ((overlap = vhost_iova_tree_find_iova(iova_tree, map))) {
>>> +        g_tree_remove(iova_tree->taddr_iova_map, overlap);
>>> +    }
>>> +}
>>> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
>>> index 2dc87613bc..6047670804 100644
>>> --- a/hw/virtio/meson.build
>>> +++ b/hw/virtio/meson.build
>>> @@ -11,7 +11,7 @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
>>>
>>>    virtio_ss = ss.source_set()
>>>    virtio_ss.add(files('virtio.c'))
>>> -virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
>>> +virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
>>>    virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
>>>    virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
>>>    virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
diff mbox series

Patch

diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
new file mode 100644
index 0000000000..610394eaf1
--- /dev/null
+++ b/hw/virtio/vhost-iova-tree.h
@@ -0,0 +1,27 @@ 
+/*
+ * vhost software live migration ring
+ *
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
+#define HW_VIRTIO_VHOST_IOVA_TREE_H
+
+#include "qemu/iova-tree.h"
+#include "exec/memory.h"
+
+typedef struct VhostIOVATree VhostIOVATree;
+
+VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
+
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
+                                        const DMAMap *map);
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
+
+#endif
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
new file mode 100644
index 0000000000..0021dbaf54
--- /dev/null
+++ b/hw/virtio/vhost-iova-tree.c
@@ -0,0 +1,157 @@ 
+/*
+ * vhost software live migration ring
+ *
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iova-tree.h"
+#include "vhost-iova-tree.h"
+
+#define iova_min_addr qemu_real_host_page_size
+
+/**
+ * VhostIOVATree, able to:
+ * - Translate iova address
+ * - Reverse translate iova address (from translated to iova)
+ * - Allocate IOVA regions for translated range (potentially slow operation)
+ *
+ * Note that it cannot remove nodes.
+ */
+struct VhostIOVATree {
+    /* First addresable iova address in the device */
+    uint64_t iova_first;
+
+    /* Last addressable iova address in the device */
+    uint64_t iova_last;
+
+    /* IOVA address to qemu memory maps. */
+    IOVATree *iova_taddr_map;
+
+    /* QEMU virtual memory address to iova maps */
+    GTree *taddr_iova_map;
+};
+
+static gint vhost_iova_tree_cmp_taddr(gconstpointer a, gconstpointer b,
+                                      gpointer data)
+{
+    const DMAMap *m1 = a, *m2 = b;
+
+    if (m1->translated_addr > m2->translated_addr + m2->size) {
+        return 1;
+    }
+
+    if (m1->translated_addr + m1->size < m2->translated_addr) {
+        return -1;
+    }
+
+    /* Overlapped */
+    return 0;
+}
+
+/**
+ * Create a new IOVA tree
+ *
+ * Returns the new IOVA tree
+ */
+VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
+{
+    VhostIOVATree *tree = g_new(VhostIOVATree, 1);
+
+    /* Some devices does not like 0 addresses */
+    tree->iova_first = MAX(iova_first, iova_min_addr);
+    tree->iova_last = iova_last;
+
+    tree->iova_taddr_map = iova_tree_new();
+    tree->taddr_iova_map = g_tree_new_full(vhost_iova_tree_cmp_taddr, NULL,
+                                           NULL, g_free);
+    return tree;
+}
+
+/**
+ * Delete an iova tree
+ */
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
+{
+    iova_tree_destroy(iova_tree->iova_taddr_map);
+    g_tree_unref(iova_tree->taddr_iova_map);
+    g_free(iova_tree);
+}
+
+/**
+ * Find the IOVA address stored from a memory address
+ *
+ * @tree     The iova tree
+ * @map      The map with the memory address
+ *
+ * Return the stored mapping, or NULL if not found.
+ */
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
+                                        const DMAMap *map)
+{
+    return g_tree_lookup(tree->taddr_iova_map, map);
+}
+
+/**
+ * Allocate a new mapping
+ *
+ * @tree  The iova tree
+ * @map   The iova map
+ *
+ * Returns:
+ * - IOVA_OK if the map fits in the container
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
+ * - IOVA_ERR_OVERLAP if the tree already contains that map
+ * - IOVA_ERR_NOMEM if tree cannot allocate more space.
+ *
+ * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
+ */
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
+{
+    /* Some vhost devices does not like addr 0. Skip first page */
+    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
+    DMAMap *new;
+    int r;
+
+    if (map->translated_addr + map->size < map->translated_addr ||
+        map->perm == IOMMU_NONE) {
+        return IOVA_ERR_INVALID;
+    }
+
+    /* Check for collisions in translated addresses */
+    if (vhost_iova_tree_find_iova(tree, map)) {
+        return IOVA_ERR_OVERLAP;
+    }
+
+    /* Allocate a node in IOVA address */
+    r = iova_tree_alloc(tree->iova_taddr_map, map, iova_first,
+                        tree->iova_last);
+    if (r != IOVA_OK) {
+        return r;
+    }
+
+    /* Allocate node in qemu -> iova translations */
+    new = g_malloc(sizeof(*new));
+    memcpy(new, map, sizeof(*new));
+    g_tree_insert(tree->taddr_iova_map, new, new);
+    return IOVA_OK;
+}
+
+/**
+ * Remove existing mappings from iova tree
+ *
+ * @param  iova_tree  The vhost iova tree
+ * @param  map        The map to remove
+ */
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
+{
+    const DMAMap *overlap;
+
+    iova_tree_remove(iova_tree->iova_taddr_map, map);
+    while ((overlap = vhost_iova_tree_find_iova(iova_tree, map))) {
+        g_tree_remove(iova_tree->taddr_iova_map, overlap);
+    }
+}
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index 2dc87613bc..6047670804 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -11,7 +11,7 @@  softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
 
 virtio_ss = ss.source_set()
 virtio_ss.add(files('virtio.c'))
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
 virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
 virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
 virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))