@@ -31,6 +31,9 @@ struct VhostIOVATree {
/* Allocated IOVA addresses */
IOVATree *iova_map;
+
+ /* GPA->IOVA address memory maps */
+ IOVATree *gpa_iova_map;
};
/**
@@ -48,6 +51,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
tree->iova_taddr_map = iova_tree_new();
tree->iova_map = iova_tree_new();
+ tree->gpa_iova_map = gpa_tree_new();
return tree;
}
@@ -58,6 +62,7 @@ void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
{
iova_tree_destroy(iova_tree->iova_taddr_map);
iova_tree_destroy(iova_tree->iova_map);
+ iova_tree_destroy(iova_tree->gpa_iova_map);
g_free(iova_tree);
}
@@ -122,3 +127,65 @@ void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
iova_tree_remove(iova_tree->iova_taddr_map, map);
iova_tree_remove(iova_tree->iova_map, map);
}
+
+/**
+ * Find the IOVA address stored from a guest memory address (GPA)
+ *
+ * @tree: The VhostIOVATree
+ * @map: The map with the guest memory address
+ *
+ * Returns the stored GPA->IOVA mapping, or NULL if not found.
+ */
+const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *tree,
+ const DMAMap *map)
+{
+ return iova_tree_find_iova(tree->gpa_iova_map, map);
+}
+
+/**
+ * Allocate a new IOVA range and add the mapping to the GPA->IOVA tree
+ *
+ * @tree: The VhostIOVATree
+ * @map: The IOVA mapping
+ * @taddr: The translated address (GPA)
+ *
+ * Returns:
+ * - IOVA_OK if the map fits both containers
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
+ * - IOVA_ERR_NOMEM if the IOVA-only tree cannot allocate more space
+ *
+ * It returns an assigned IOVA in map->iova if the return value is IOVA_OK.
+ */
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr taddr)
+{
+ int ret;
+
+ /* Some vhost devices don't like addr 0. Skip first page */
+ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
+
+ if (taddr + map->size < taddr || map->perm == IOMMU_NONE) {
+ return IOVA_ERR_INVALID;
+ }
+
+ /* Allocate a node in the IOVA-only tree */
+ ret = iova_tree_alloc_map(tree->iova_map, map, iova_first, tree->iova_last);
+ if (unlikely(ret != IOVA_OK)) {
+ return ret;
+ }
+
+ /* Insert a node in the GPA->IOVA tree */
+ map->translated_addr = taddr;
+ return gpa_tree_insert(tree->gpa_iova_map, map);
+}
+
+/**
+ * Remove existing mappings from the IOVA-only and GPA->IOVA trees
+ *
+ * @tree: The VhostIOVATree
+ * @map: The map to remove
+ */
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map)
+{
+ iova_tree_remove(iova_tree->gpa_iova_map, map);
+ iova_tree_remove(iova_tree->iova_map, map);
+}
@@ -24,5 +24,10 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map,
hwaddr taddr);
void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
+const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *iova_tree,
+ const DMAMap *map);
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map,
+ hwaddr taddr);
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map);
#endif
@@ -16,6 +16,7 @@
#include "qemu/log.h"
#include "qemu/memalign.h"
#include "linux-headers/linux/vhost.h"
+#include "exec/ramblock.h"
/**
* Validate the transport device features that both guests can use with the SVQ
@@ -88,14 +89,31 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
}
for (size_t i = 0; i < num; ++i) {
- DMAMap needle = {
- .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
- .size = iovec[i].iov_len,
- };
Int128 needle_last, map_last;
size_t off;
+ RAMBlock *rb;
+ hwaddr gpa;
+ ram_addr_t offset;
+ const DMAMap *map;
+ DMAMap needle;
+
+ rb = qemu_ram_block_from_host(iovec[i].iov_base, false, &offset);
+ if (rb) {
+ gpa = rb->offset + offset;
+
+ needle = (DMAMap) {
+ .translated_addr = gpa,
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
+ } else {
+ needle = (DMAMap) {
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+ }
- const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
/*
* Map cannot be NULL since iova map contains all guest space and
* qemu already has a physical address mapped
@@ -360,17 +360,17 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
llsize = int128_sub(llend, int128_make64(iova));
if (s->shadow_data) {
int r;
- hwaddr hw_vaddr = (hwaddr)(uintptr_t)vaddr;
+ hwaddr gpa = section->offset_within_address_space;
mem_region.size = int128_get64(llsize) - 1,
mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
- r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region, hw_vaddr);
+ r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region, gpa);
if (unlikely(r != IOVA_OK)) {
error_report("Can't allocate a mapping (%d)", r);
- if (mem_region.translated_addr == hw_vaddr) {
- error_report("Insertion to IOVA->HVA tree failed");
+ if (mem_region.translated_addr == gpa) {
+ error_report("Insertion to GPA->IOVA tree failed");
/* Remove the mapping from the IOVA-only tree */
goto fail_map;
}
@@ -392,7 +392,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
fail_map:
if (s->shadow_data) {
- vhost_iova_tree_remove(s->iova_tree, mem_region);
+ vhost_iova_tree_remove_gpa(s->iova_tree, mem_region);
}
fail:
@@ -446,21 +446,18 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
if (s->shadow_data) {
const DMAMap *result;
- const void *vaddr = memory_region_get_ram_ptr(section->mr) +
- section->offset_within_region +
- (iova - section->offset_within_address_space);
DMAMap mem_region = {
- .translated_addr = (hwaddr)(uintptr_t)vaddr,
+ .translated_addr = section->offset_within_address_space,
.size = int128_get64(llsize) - 1,
};
- result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
+ result = vhost_iova_tree_find_gpa(s->iova_tree, &mem_region);
if (!result) {
/* The memory listener map wasn't mapped */
return;
}
iova = result->iova;
- vhost_iova_tree_remove(s->iova_tree, *result);
+ vhost_iova_tree_remove_gpa(s->iova_tree, *result);
}
vhost_vdpa_iotlb_batch_begin_once(s);
/*
@@ -40,6 +40,28 @@ typedef struct DMAMap {
} QEMU_PACKED DMAMap;
typedef gboolean (*iova_tree_iterator)(DMAMap *map);
+/**
+ * gpa_tree_new:
+ *
+ * Create a new GPA->IOVA tree.
+ *
+ * Returns: the tree point on success, or NULL otherwise.
+ */
+IOVATree *gpa_tree_new(void);
+
+/**
+ * gpa_tree_insert:
+ *
+ * @tree: The GPA->IOVA tree we're inserting the mapping to
+ * @map: The GPA->IOVA mapping to insert
+ *
+ * Inserts a GPA range to the GPA->IOVA tree. If there are overlapped
+ * ranges, IOVA_ERR_OVERLAP will be returned.
+ *
+ * Return: 0 if successful, < 0 otherwise.
+ */
+int gpa_tree_insert(IOVATree *tree, const DMAMap *map);
+
/**
* iova_tree_new:
*
@@ -257,3 +257,49 @@ void iova_tree_destroy(IOVATree *tree)
g_tree_destroy(tree->tree);
g_free(tree);
}
+
+static int gpa_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
+{
+ const DMAMap *m1 = a, *m2 = b;
+
+ if (m1->translated_addr > m2->translated_addr + m2->size) {
+ return 1;
+ }
+
+ if (m1->translated_addr + m1->size < m2->translated_addr) {
+ return -1;
+ }
+
+ /* Overlapped */
+ return 0;
+}
+
+IOVATree *gpa_tree_new(void)
+{
+ IOVATree *gpa_tree = g_new0(IOVATree, 1);
+
+ gpa_tree->tree = g_tree_new_full(gpa_tree_compare, NULL, g_free, NULL);
+
+ return gpa_tree;
+}
+
+int gpa_tree_insert(IOVATree *tree, const DMAMap *map)
+{
+ DMAMap *new;
+
+ if (map->translated_addr + map->size < map->translated_addr ||
+ map->perm == IOMMU_NONE) {
+ return IOVA_ERR_INVALID;
+ }
+
+ /* We don't allow inserting ranges that overlap with existing ones */
+ if (iova_tree_find(tree, map)) {
+ return IOVA_ERR_OVERLAP;
+ }
+
+ new = g_new0(DMAMap, 1);
+ memcpy(new, map, sizeof(*new));
+ iova_tree_insert_internal(tree->tree, new);
+
+ return IOVA_OK;
+}
Creates and supports a GPA->IOVA tree and a partial IOVA->HVA tree by splitting up guest-backed memory maps and host-only memory maps from the full IOVA->HVA tree. That is, any guest-backed memory maps are now stored in the GPA->IOVA tree and host-only memory maps stay in the IOVA->HVA tree. The qemu_ram_block_from_host() function in vhost_svq_translate_addr() is used to determine if the HVA (from the iovec) is backed by guest memory by attempting to infer a RAM block from it. If a valid RAMBlock is returned, we derive the GPA from it and search the GPA->IOVA tree for the corresponding IOVA. If an invalid RAMBlock is returned, the HVA is used to search the IOVA->HVA tree for the corresponding IOVA. This method of determining if an HVA is backed by guest memory is far from optimal, especially for memory buffers backed by host-only memory. In the next patch, this method will be improved by utilizing the in_addr and out_addr members of a VirtQueueElement to determine if an incoming address for translation is backed by guest or host-only memory. Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com> --- hw/virtio/vhost-iova-tree.c | 67 ++++++++++++++++++++++++++++++ hw/virtio/vhost-iova-tree.h | 5 +++ hw/virtio/vhost-shadow-virtqueue.c | 28 ++++++++++--- hw/virtio/vhost-vdpa.c | 19 ++++----- include/qemu/iova-tree.h | 22 ++++++++++ util/iova-tree.c | 46 ++++++++++++++++++++ 6 files changed, 171 insertions(+), 16 deletions(-)