diff mbox series

[v6,7/8] xen: mapcache: Add support for grant mappings

Message ID 20240516154804.1114245-8-edgar.iglesias@gmail.com (mailing list archive)
State New
Headers show
Series xen: Support grant mappings | expand

Commit Message

Edgar E. Iglesias May 16, 2024, 3:48 p.m. UTC
From: "Edgar E. Iglesias" <edgar.iglesias@amd.com>

Add a second mapcache for grant mappings. The mapcache for
grants needs to work with XC_PAGE_SIZE granularity since
we can't map larger ranges than what has been granted to us.

Like with foreign mappings (xen_memory), machines using grants
are expected to initialize the xen_grants MR and map it
into their address-map accordingly.

Signed-off-by: Edgar E. Iglesias <edgar.iglesias@amd.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
---
 hw/xen/xen-hvm-common.c         |  12 ++-
 hw/xen/xen-mapcache.c           | 163 ++++++++++++++++++++++++++------
 include/hw/xen/xen-hvm-common.h |   3 +
 include/sysemu/xen.h            |   7 ++
 4 files changed, 152 insertions(+), 33 deletions(-)

Comments

Manos Pitsidianakis May 23, 2024, 7:35 a.m. UTC | #1
On Thu, 16 May 2024 18:48, "Edgar E. Iglesias" <edgar.iglesias@gmail.com> wrote:
>From: "Edgar E. Iglesias" <edgar.iglesias@amd.com>
>
>Add a second mapcache for grant mappings. The mapcache for
>grants needs to work with XC_PAGE_SIZE granularity since
>we can't map larger ranges than what has been granted to us.
>
>Like with foreign mappings (xen_memory), machines using grants
>are expected to initialize the xen_grants MR and map it
>into their address-map accordingly.
>
>Signed-off-by: Edgar E. Iglesias <edgar.iglesias@amd.com>
>Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
>---
> hw/xen/xen-hvm-common.c         |  12 ++-
> hw/xen/xen-mapcache.c           | 163 ++++++++++++++++++++++++++------
> include/hw/xen/xen-hvm-common.h |   3 +
> include/sysemu/xen.h            |   7 ++
> 4 files changed, 152 insertions(+), 33 deletions(-)
>
>diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
>index a0a0252da0..b8ace1c368 100644
>--- a/hw/xen/xen-hvm-common.c
>+++ b/hw/xen/xen-hvm-common.c
>@@ -10,12 +10,18 @@
> #include "hw/boards.h"
> #include "hw/xen/arch_hvm.h"
> 
>-MemoryRegion xen_memory;
>+MemoryRegion xen_memory, xen_grants;
> 
>-/* Check for xen memory.  */
>+/* Check for any kind of xen memory, foreign mappings or grants.  */
> bool xen_mr_is_memory(MemoryRegion *mr)
> {
>-    return mr == &xen_memory;
>+    return mr == &xen_memory || mr == &xen_grants;
>+}
>+
>+/* Check specifically for grants.  */
>+bool xen_mr_is_grants(MemoryRegion *mr)
>+{
>+    return mr == &xen_grants;
> }
> 
> void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
>diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
>index a07c47b0b1..1cbc2aeaa9 100644
>--- a/hw/xen/xen-mapcache.c
>+++ b/hw/xen/xen-mapcache.c
>@@ -14,6 +14,7 @@
> 
> #include <sys/resource.h>
> 
>+#include "hw/xen/xen-hvm-common.h"
> #include "hw/xen/xen_native.h"
> #include "qemu/bitmap.h"
> 
>@@ -21,6 +22,8 @@
> #include "sysemu/xen-mapcache.h"
> #include "trace.h"
> 
>+#include <xenevtchn.h>
>+#include <xengnttab.h>
> 
> #if HOST_LONG_BITS == 32
> #  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
>@@ -41,6 +44,7 @@ typedef struct MapCacheEntry {
>     unsigned long *valid_mapping;
>     uint32_t lock;
> #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
>+#define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)

Might we get more entry kinds in the future? (for example foreign maps). 
Maybe this could be an enum.

>     uint8_t flags;
>     hwaddr size;
>     struct MapCacheEntry *next;
>@@ -71,6 +75,8 @@ typedef struct MapCache {
> } MapCache;
> 
> static MapCache *mapcache;
>+static MapCache *mapcache_grants;
>+static xengnttab_handle *xen_region_gnttabdev;
> 
> static inline void mapcache_lock(MapCache *mc)
> {
>@@ -131,6 +137,12 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
>     unsigned long max_mcache_size;
>     unsigned int bucket_shift;
> 
>+    xen_region_gnttabdev = xengnttab_open(NULL, 0);
>+    if (xen_region_gnttabdev == NULL) {
>+        error_report("mapcache: Failed to open gnttab device");
>+        exit(EXIT_FAILURE);
>+    }
>+
>     if (HOST_LONG_BITS == 32) {
>         bucket_shift = 16;
>     } else {
>@@ -159,6 +171,15 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
>     mapcache = xen_map_cache_init_single(f, opaque,
>                                          bucket_shift,
>                                          max_mcache_size);
>+
>+    /*
>+     * Grant mappings must use XC_PAGE_SIZE granularity since we can't
>+     * map anything beyond the number of pages granted to us.
>+     */
>+    mapcache_grants = xen_map_cache_init_single(f, opaque,
>+                                                XC_PAGE_SHIFT,
>+                                                max_mcache_size);
>+
>     setrlimit(RLIMIT_AS, &rlimit_as);
> }
> 
>@@ -168,17 +189,24 @@ static void xen_remap_bucket(MapCache *mc,
>                              hwaddr size,
>                              hwaddr address_index,
>                              bool dummy,
>+                             bool grant,
>+                             bool is_write,
>                              ram_addr_t ram_offset)
> {
>     uint8_t *vaddr_base;
>-    xen_pfn_t *pfns;
>+    uint32_t *refs = NULL;
>+    xen_pfn_t *pfns = NULL;
>     int *err;

You should use g_autofree to perform automatic cleanup on function exit 
instead of manually freeing, since the allocations should only live 
within the function call.

>     unsigned int i;
>     hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
> 
>     trace_xen_remap_bucket(address_index);
> 
>-    pfns = g_new0(xen_pfn_t, nb_pfn);
>+    if (grant) {
>+        refs = g_new0(uint32_t, nb_pfn);
>+    } else {
>+        pfns = g_new0(xen_pfn_t, nb_pfn);
>+    }
>     err = g_new0(int, nb_pfn);
> 
>     if (entry->vaddr_base != NULL) {
>@@ -207,21 +235,51 @@ static void xen_remap_bucket(MapCache *mc,
>     g_free(entry->valid_mapping);
>     entry->valid_mapping = NULL;
> 
>-    for (i = 0; i < nb_pfn; i++) {
>-        pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
>+    if (grant) {
>+        hwaddr grant_base = address_index - (ram_offset >> XC_PAGE_SHIFT);
>+
>+        for (i = 0; i < nb_pfn; i++) {
>+            refs[i] = grant_base + i;
>+        }
>+    } else {
>+        for (i = 0; i < nb_pfn; i++) {
>+            pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
>+        }
>     }
> 
>-    /*
>-     * If the caller has requested the mapping at a specific address use
>-     * MAP_FIXED to make sure it's honored.
>-     */
>+    entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
>+
>     if (!dummy) {
>-        vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
>-                                           PROT_READ | PROT_WRITE,
>-                                           vaddr ? MAP_FIXED : 0,
>-                                           nb_pfn, pfns, err);

Since err is not NULL here, the function might return a valid pointer 
but individual frames might have failed.


>+        if (grant) {
>+            int prot = PROT_READ;
>+
>+            if (is_write) {
>+                prot |= PROT_WRITE;
>+            }
>+
>+            entry->flags |= XEN_MAPCACHE_ENTRY_GRANT;
>+            assert(vaddr == NULL);
>+            vaddr_base = xengnttab_map_domain_grant_refs(xen_region_gnttabdev,
>+                                                         nb_pfn,
>+                                                         xen_domid, refs,
>+                                                         prot);
>+        } else {
>+            /*
>+             * If the caller has requested the mapping at a specific address use
>+             * MAP_FIXED to make sure it's honored.
>+             *
>+             * We don't yet support upgrading mappings from RO to RW, to handle
>+             * models using ordinary address_space_rw(), foreign mappings ignore
>+             * is_write and are always mapped RW.
>+             */
>+            vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
>+                                               PROT_READ | PROT_WRITE,
>+                                               vaddr ? MAP_FIXED : 0,
>+                                               nb_pfn, pfns, err);
>+        }
>         if (vaddr_base == NULL) {
>-            perror("xenforeignmemory_map2");
>+            perror(grant ? "xengnttab_map_domain_grant_refs"
>+                           : "xenforeignmemory_map2");
>             exit(-1);
>         }
>     } else {
>@@ -261,6 +319,7 @@ static void xen_remap_bucket(MapCache *mc,
>         }
>     }
> 
>+    g_free(refs);
>     g_free(pfns);
>     g_free(err);
> }
>@@ -268,7 +327,8 @@ static void xen_remap_bucket(MapCache *mc,
> static uint8_t *xen_map_cache_unlocked(MapCache *mc,
>                                        hwaddr phys_addr, hwaddr size,
>                                        ram_addr_t ram_offset,
>-                                       uint8_t lock, bool dma, bool is_write)
>+                                       uint8_t lock, bool dma,
>+                                       bool grant, bool is_write)
> {
>     MapCacheEntry *entry, *pentry = NULL,
>                   *free_entry = NULL, *free_pentry = NULL;
>@@ -340,7 +400,7 @@ tryagain:
>         entry = g_new0(MapCacheEntry, 1);
>         pentry->next = entry;
>         xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
>-                         ram_offset);
>+                         grant, is_write, ram_offset);
>     } else if (!entry->lock) {
>         if (!entry->vaddr_base || entry->paddr_index != address_index ||
>                 entry->size != cache_size ||
>@@ -348,7 +408,7 @@ tryagain:
>                     test_bit_size >> XC_PAGE_SHIFT,
>                     entry->valid_mapping)) {
>             xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
>-                             ram_offset);
>+                             grant, is_write, ram_offset);
>         }
>     }
> 
>@@ -399,12 +459,28 @@ uint8_t *xen_map_cache(MemoryRegion *mr,
>                        uint8_t lock, bool dma,
>                        bool is_write)
> {
>+    bool grant = xen_mr_is_grants(mr);
>+    MapCache *mc = grant ? mapcache_grants : mapcache;
>     uint8_t *p;
> 
>-    mapcache_lock(mapcache);
>-    p = xen_map_cache_unlocked(mapcache, phys_addr, size, ram_addr_offset,
>-                               lock, dma, is_write);
>-    mapcache_unlock(mapcache);
>+    if (grant) {
>+        /*
>+         * Grants are only supported via address_space_map(). Anything
>+         * else is considered a user/guest error.
>+         *
>+         * QEMU generally doesn't expect these mappings to ever fail, so
>+         * if this happens we report an error message and abort().
>+         */
>+        if (!lock) {

Nested if conditions that can be flattened, i.e. this could be 

if (grant && !lock)

>+            error_report("Trying access a grant reference without mapping it.");

s/Trying access a grant/Tried to access a grant/

>+            abort();
>+        }
>+    }
>+
>+    mapcache_lock(mc);
>+    p = xen_map_cache_unlocked(mc, phys_addr, size, ram_addr_offset,
>+                               lock, dma, grant, is_write);
>+    mapcache_unlock(mc);
>     return p;
> }
> 
>@@ -449,7 +525,14 @@ static ram_addr_t xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
> 
> ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
> {
>-    return xen_ram_addr_from_mapcache_single(mapcache, ptr);
>+    ram_addr_t addr;
>+
>+    addr = xen_ram_addr_from_mapcache_single(mapcache, ptr);
>+    if (addr == RAM_ADDR_INVALID) {
>+        addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr);
>+    }
>+
>+    return addr;
> }
> 
> static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
>@@ -460,6 +543,7 @@ static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
>     hwaddr paddr_index;
>     hwaddr size;
>     int found = 0;
>+    int rc;
> 
>     QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
>         if (reventry->vaddr_req == buffer) {
>@@ -502,7 +586,14 @@ static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
>     }
> 
>     ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
>-    if (munmap(entry->vaddr_base, entry->size) != 0) {
>+    if (entry->flags & XEN_MAPCACHE_ENTRY_GRANT) {
>+        rc = xengnttab_unmap(xen_region_gnttabdev, entry->vaddr_base,
>+                             entry->size >> mc->bucket_shift);
>+    } else {
>+        rc = munmap(entry->vaddr_base, entry->size);
>+    }
>+
>+    if (rc) {
>         perror("unmap fails");
>         exit(-1);
>     }
>@@ -521,14 +612,24 @@ typedef struct XenMapCacheData {
>     uint8_t *buffer;
> } XenMapCacheData;
> 
>+static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t *buffer)
>+{
>+    mapcache_lock(mc);
>+    xen_invalidate_map_cache_entry_unlocked(mc, buffer);
>+    mapcache_unlock(mc);
>+}
>+
>+static void xen_invalidate_map_cache_entry_all(uint8_t *buffer)
>+{
>+    xen_invalidate_map_cache_entry_single(mapcache, buffer);
>+    xen_invalidate_map_cache_entry_single(mapcache_grants, buffer);
>+}
>+
> static void xen_invalidate_map_cache_entry_bh(void *opaque)
> {
>     XenMapCacheData *data = opaque;
> 
>-    mapcache_lock(mapcache);
>-    xen_invalidate_map_cache_entry_unlocked(mapcache, data->buffer);
>-    mapcache_unlock(mapcache);
>-
>+    xen_invalidate_map_cache_entry_all(data->buffer);
>     aio_co_wake(data->co);
> }
> 
>@@ -543,9 +644,7 @@ void coroutine_mixed_fn xen_invalidate_map_cache_entry(uint8_t *buffer)
>                                 xen_invalidate_map_cache_entry_bh, &data);
>         qemu_coroutine_yield();
>     } else {
>-        mapcache_lock(mapcache);
>-        xen_invalidate_map_cache_entry_unlocked(mapcache, buffer);
>-        mapcache_unlock(mapcache);
>+        xen_invalidate_map_cache_entry_all(buffer);
>     }
> }
> 
>@@ -597,6 +696,7 @@ void xen_invalidate_map_cache(void)
>     bdrv_drain_all();
> 
>     xen_invalidate_map_cache_single(mapcache);
>+    xen_invalidate_map_cache_single(mapcache_grants);
> }
> 
> static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
>@@ -632,13 +732,16 @@ static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
>         return NULL;
>     }
> 
>+    assert((entry->flags & XEN_MAPCACHE_ENTRY_GRANT) == 0);
>+
>     address_index  = new_phys_addr >> mc->bucket_shift;
>     address_offset = new_phys_addr & (mc->bucket_size - 1);
> 
>     trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
> 
>     xen_remap_bucket(mc, entry, entry->vaddr_base,
>-                     cache_size, address_index, false, old_phys_addr);
>+                     cache_size, address_index, false,
>+                     false, false, old_phys_addr);
>     if (!test_bits(address_offset >> XC_PAGE_SHIFT,
>                 test_bit_size >> XC_PAGE_SHIFT,
>                 entry->valid_mapping)) {
>diff --git a/include/hw/xen/xen-hvm-common.h b/include/hw/xen/xen-hvm-common.h
>index 65a51aac2e..3d796235dc 100644
>--- a/include/hw/xen/xen-hvm-common.h
>+++ b/include/hw/xen/xen-hvm-common.h
>@@ -16,6 +16,7 @@
> #include <xen/hvm/ioreq.h>
> 
> extern MemoryRegion xen_memory;
>+extern MemoryRegion xen_grants;
> extern MemoryListener xen_io_listener;
> extern DeviceListener xen_device_listener;
> 
>@@ -29,6 +30,8 @@ extern DeviceListener xen_device_listener;
>     do { } while (0)
> #endif
> 
>+#define XEN_GRANT_ADDR_OFF (1ULL << 63)
>+
> static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
> {
>     return shared_page->vcpu_ioreq[i].vp_eport;
>diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
>index dc72f83bcb..19dccf4d71 100644
>--- a/include/sysemu/xen.h
>+++ b/include/sysemu/xen.h
>@@ -35,6 +35,7 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
>                    struct MemoryRegion *mr, Error **errp);
> 
> bool xen_mr_is_memory(MemoryRegion *mr);
>+bool xen_mr_is_grants(MemoryRegion *mr);
> 
> #else /* !CONFIG_XEN_IS_POSSIBLE */
> 
>@@ -55,6 +56,12 @@ static inline bool xen_mr_is_memory(MemoryRegion *mr)
>     return false;
> }
> 
>+static inline bool xen_mr_is_grants(MemoryRegion *mr)
>+{
>+    g_assert_not_reached();
>+    return false;
>+}
>+
> #endif /* CONFIG_XEN_IS_POSSIBLE */
> 
> #endif
>-- 
>2.40.1
>
>
Edgar E. Iglesias May 23, 2024, 10:23 a.m. UTC | #2
On Thu, May 23, 2024 at 9:47 AM Manos Pitsidianakis <
manos.pitsidianakis@linaro.org> wrote:

> On Thu, 16 May 2024 18:48, "Edgar E. Iglesias" <edgar.iglesias@gmail.com>
> wrote:
> >From: "Edgar E. Iglesias" <edgar.iglesias@amd.com>
> >
> >Add a second mapcache for grant mappings. The mapcache for
> >grants needs to work with XC_PAGE_SIZE granularity since
> >we can't map larger ranges than what has been granted to us.
> >
> >Like with foreign mappings (xen_memory), machines using grants
> >are expected to initialize the xen_grants MR and map it
> >into their address-map accordingly.
> >
> >Signed-off-by: Edgar E. Iglesias <edgar.iglesias@amd.com>
> >Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
> >---
> > hw/xen/xen-hvm-common.c         |  12 ++-
> > hw/xen/xen-mapcache.c           | 163 ++++++++++++++++++++++++++------
> > include/hw/xen/xen-hvm-common.h |   3 +
> > include/sysemu/xen.h            |   7 ++
> > 4 files changed, 152 insertions(+), 33 deletions(-)
> >
> >diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
> >index a0a0252da0..b8ace1c368 100644
> >--- a/hw/xen/xen-hvm-common.c
> >+++ b/hw/xen/xen-hvm-common.c
> >@@ -10,12 +10,18 @@
> > #include "hw/boards.h"
> > #include "hw/xen/arch_hvm.h"
> >
> >-MemoryRegion xen_memory;
> >+MemoryRegion xen_memory, xen_grants;
> >
> >-/* Check for xen memory.  */
> >+/* Check for any kind of xen memory, foreign mappings or grants.  */
> > bool xen_mr_is_memory(MemoryRegion *mr)
> > {
> >-    return mr == &xen_memory;
> >+    return mr == &xen_memory || mr == &xen_grants;
> >+}
> >+
> >+/* Check specifically for grants.  */
> >+bool xen_mr_is_grants(MemoryRegion *mr)
> >+{
> >+    return mr == &xen_grants;
> > }
> >
> > void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion
> *mr,
> >diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
> >index a07c47b0b1..1cbc2aeaa9 100644
> >--- a/hw/xen/xen-mapcache.c
> >+++ b/hw/xen/xen-mapcache.c
> >@@ -14,6 +14,7 @@
> >
> > #include <sys/resource.h>
> >
> >+#include "hw/xen/xen-hvm-common.h"
> > #include "hw/xen/xen_native.h"
> > #include "qemu/bitmap.h"
> >
> >@@ -21,6 +22,8 @@
> > #include "sysemu/xen-mapcache.h"
> > #include "trace.h"
> >
> >+#include <xenevtchn.h>
> >+#include <xengnttab.h>
> >
> > #if HOST_LONG_BITS == 32
> > #  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
> >@@ -41,6 +44,7 @@ typedef struct MapCacheEntry {
> >     unsigned long *valid_mapping;
> >     uint32_t lock;
> > #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
> >+#define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)
>
> Might we get more entry kinds in the future? (for example foreign maps).
> Maybe this could be an enum.
>
>
Perhaps. Foreign mappings are already supported, this flag separates
ordinary foreign mappings from grant foreign mappings.
IMO, since this is not an external interface it's probably better to change
it once we have a concrete use-case at hand.



> >     uint8_t flags;
> >     hwaddr size;
> >     struct MapCacheEntry *next;
> >@@ -71,6 +75,8 @@ typedef struct MapCache {
> > } MapCache;
> >
> > static MapCache *mapcache;
> >+static MapCache *mapcache_grants;
> >+static xengnttab_handle *xen_region_gnttabdev;
> >
> > static inline void mapcache_lock(MapCache *mc)
> > {
> >@@ -131,6 +137,12 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f,
> void *opaque)
> >     unsigned long max_mcache_size;
> >     unsigned int bucket_shift;
> >
> >+    xen_region_gnttabdev = xengnttab_open(NULL, 0);
> >+    if (xen_region_gnttabdev == NULL) {
> >+        error_report("mapcache: Failed to open gnttab device");
> >+        exit(EXIT_FAILURE);
> >+    }
> >+
> >     if (HOST_LONG_BITS == 32) {
> >         bucket_shift = 16;
> >     } else {
> >@@ -159,6 +171,15 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f,
> void *opaque)
> >     mapcache = xen_map_cache_init_single(f, opaque,
> >                                          bucket_shift,
> >                                          max_mcache_size);
> >+
> >+    /*
> >+     * Grant mappings must use XC_PAGE_SIZE granularity since we can't
> >+     * map anything beyond the number of pages granted to us.
> >+     */
> >+    mapcache_grants = xen_map_cache_init_single(f, opaque,
> >+                                                XC_PAGE_SHIFT,
> >+                                                max_mcache_size);
> >+
> >     setrlimit(RLIMIT_AS, &rlimit_as);
> > }
> >
> >@@ -168,17 +189,24 @@ static void xen_remap_bucket(MapCache *mc,
> >                              hwaddr size,
> >                              hwaddr address_index,
> >                              bool dummy,
> >+                             bool grant,
> >+                             bool is_write,
> >                              ram_addr_t ram_offset)
> > {
> >     uint8_t *vaddr_base;
> >-    xen_pfn_t *pfns;
> >+    uint32_t *refs = NULL;
> >+    xen_pfn_t *pfns = NULL;
> >     int *err;
>
> You should use g_autofree to perform automatic cleanup on function exit
> instead of manually freeing, since the allocations should only live
> within the function call.
>
>
Sounds good, I'll do that in the next version.



> >     unsigned int i;
> >     hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
> >
> >     trace_xen_remap_bucket(address_index);
> >
> >-    pfns = g_new0(xen_pfn_t, nb_pfn);
> >+    if (grant) {
> >+        refs = g_new0(uint32_t, nb_pfn);
> >+    } else {
> >+        pfns = g_new0(xen_pfn_t, nb_pfn);
> >+    }
> >     err = g_new0(int, nb_pfn);
> >
> >     if (entry->vaddr_base != NULL) {
> >@@ -207,21 +235,51 @@ static void xen_remap_bucket(MapCache *mc,
> >     g_free(entry->valid_mapping);
> >     entry->valid_mapping = NULL;
> >
> >-    for (i = 0; i < nb_pfn; i++) {
> >-        pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT))
> + i;
> >+    if (grant) {
> >+        hwaddr grant_base = address_index - (ram_offset >>
> XC_PAGE_SHIFT);
> >+
> >+        for (i = 0; i < nb_pfn; i++) {
> >+            refs[i] = grant_base + i;
> >+        }
> >+    } else {
> >+        for (i = 0; i < nb_pfn; i++) {
> >+            pfns[i] = (address_index << (mc->bucket_shift -
> XC_PAGE_SHIFT)) + i;
> >+        }
> >     }
> >
> >-    /*
> >-     * If the caller has requested the mapping at a specific address use
> >-     * MAP_FIXED to make sure it's honored.
> >-     */
> >+    entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
> >+
> >     if (!dummy) {
> >-        vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
> >-                                           PROT_READ | PROT_WRITE,
> >-                                           vaddr ? MAP_FIXED : 0,
> >-                                           nb_pfn, pfns, err);
>
> Since err is not NULL here, the function might return a valid pointer
> but individual frames might have failed.
>
>
Yes but AFAICT, the case when some pages fail foreign mapping is handled
further down the function (see the valid_mappings bitmap).
Note that this series isn't really changing this existing behaviour for
foreign mappings. In any case, If we spot a bug in existing code, I'm happy
to fix it.



>
> >+        if (grant) {
> >+            int prot = PROT_READ;
> >+
> >+            if (is_write) {
> >+                prot |= PROT_WRITE;
> >+            }
> >+
> >+            entry->flags |= XEN_MAPCACHE_ENTRY_GRANT;
> >+            assert(vaddr == NULL);
> >+            vaddr_base =
> xengnttab_map_domain_grant_refs(xen_region_gnttabdev,
> >+                                                         nb_pfn,
> >+                                                         xen_domid, refs,
> >+                                                         prot);
> >+        } else {
> >+            /*
> >+             * If the caller has requested the mapping at a specific
> address use
> >+             * MAP_FIXED to make sure it's honored.
> >+             *
> >+             * We don't yet support upgrading mappings from RO to RW, to
> handle
> >+             * models using ordinary address_space_rw(), foreign
> mappings ignore
> >+             * is_write and are always mapped RW.
> >+             */
> >+            vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid,
> vaddr,
> >+                                               PROT_READ | PROT_WRITE,
> >+                                               vaddr ? MAP_FIXED : 0,
> >+                                               nb_pfn, pfns, err);
> >+        }
> >         if (vaddr_base == NULL) {
> >-            perror("xenforeignmemory_map2");
> >+            perror(grant ? "xengnttab_map_domain_grant_refs"
> >+                           : "xenforeignmemory_map2");
> >             exit(-1);
> >         }
> >     } else {
> >@@ -261,6 +319,7 @@ static void xen_remap_bucket(MapCache *mc,
> >         }
> >     }
> >
> >+    g_free(refs);
> >     g_free(pfns);
> >     g_free(err);
> > }
> >@@ -268,7 +327,8 @@ static void xen_remap_bucket(MapCache *mc,
> > static uint8_t *xen_map_cache_unlocked(MapCache *mc,
> >                                        hwaddr phys_addr, hwaddr size,
> >                                        ram_addr_t ram_offset,
> >-                                       uint8_t lock, bool dma, bool
> is_write)
> >+                                       uint8_t lock, bool dma,
> >+                                       bool grant, bool is_write)
> > {
> >     MapCacheEntry *entry, *pentry = NULL,
> >                   *free_entry = NULL, *free_pentry = NULL;
> >@@ -340,7 +400,7 @@ tryagain:
> >         entry = g_new0(MapCacheEntry, 1);
> >         pentry->next = entry;
> >         xen_remap_bucket(mc, entry, NULL, cache_size, address_index,
> dummy,
> >-                         ram_offset);
> >+                         grant, is_write, ram_offset);
> >     } else if (!entry->lock) {
> >         if (!entry->vaddr_base || entry->paddr_index != address_index ||
> >                 entry->size != cache_size ||
> >@@ -348,7 +408,7 @@ tryagain:
> >                     test_bit_size >> XC_PAGE_SHIFT,
> >                     entry->valid_mapping)) {
> >             xen_remap_bucket(mc, entry, NULL, cache_size, address_index,
> dummy,
> >-                             ram_offset);
> >+                             grant, is_write, ram_offset);
> >         }
> >     }
> >
> >@@ -399,12 +459,28 @@ uint8_t *xen_map_cache(MemoryRegion *mr,
> >                        uint8_t lock, bool dma,
> >                        bool is_write)
> > {
> >+    bool grant = xen_mr_is_grants(mr);
> >+    MapCache *mc = grant ? mapcache_grants : mapcache;
> >     uint8_t *p;
> >
> >-    mapcache_lock(mapcache);
> >-    p = xen_map_cache_unlocked(mapcache, phys_addr, size,
> ram_addr_offset,
> >-                               lock, dma, is_write);
> >-    mapcache_unlock(mapcache);
> >+    if (grant) {
> >+        /*
> >+         * Grants are only supported via address_space_map(). Anything
> >+         * else is considered a user/guest error.
> >+         *
> >+         * QEMU generally doesn't expect these mappings to ever fail, so
> >+         * if this happens we report an error message and abort().
> >+         */
> >+        if (!lock) {
>
> Nested if conditions that can be flattened, i.e. this could be
>
> if (grant && !lock)
>


Sounds good, will flatten this in the next version.


>
> >+            error_report("Trying access a grant reference without
> mapping it.");
>
> s/Trying access a grant/Tried to access a grant/
>
>
Will fix it, thanks!

Best regards,
Edgar



> >+            abort();
> >+        }
> >+    }
> >+
> >+    mapcache_lock(mc);
> >+    p = xen_map_cache_unlocked(mc, phys_addr, size, ram_addr_offset,
> >+                               lock, dma, grant, is_write);
> >+    mapcache_unlock(mc);
> >     return p;
> > }
> >
> >@@ -449,7 +525,14 @@ static ram_addr_t
> xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
> >
> > ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
> > {
> >-    return xen_ram_addr_from_mapcache_single(mapcache, ptr);
> >+    ram_addr_t addr;
> >+
> >+    addr = xen_ram_addr_from_mapcache_single(mapcache, ptr);
> >+    if (addr == RAM_ADDR_INVALID) {
> >+        addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr);
> >+    }
> >+
> >+    return addr;
> > }
> >
> > static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
> >@@ -460,6 +543,7 @@ static void
> xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
> >     hwaddr paddr_index;
> >     hwaddr size;
> >     int found = 0;
> >+    int rc;
> >
> >     QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
> >         if (reventry->vaddr_req == buffer) {
> >@@ -502,7 +586,14 @@ static void
> xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
> >     }
> >
> >     ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
> >-    if (munmap(entry->vaddr_base, entry->size) != 0) {
> >+    if (entry->flags & XEN_MAPCACHE_ENTRY_GRANT) {
> >+        rc = xengnttab_unmap(xen_region_gnttabdev, entry->vaddr_base,
> >+                             entry->size >> mc->bucket_shift);
> >+    } else {
> >+        rc = munmap(entry->vaddr_base, entry->size);
> >+    }
> >+
> >+    if (rc) {
> >         perror("unmap fails");
> >         exit(-1);
> >     }
> >@@ -521,14 +612,24 @@ typedef struct XenMapCacheData {
> >     uint8_t *buffer;
> > } XenMapCacheData;
> >
> >+static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t
> *buffer)
> >+{
> >+    mapcache_lock(mc);
> >+    xen_invalidate_map_cache_entry_unlocked(mc, buffer);
> >+    mapcache_unlock(mc);
> >+}
> >+
> >+static void xen_invalidate_map_cache_entry_all(uint8_t *buffer)
> >+{
> >+    xen_invalidate_map_cache_entry_single(mapcache, buffer);
> >+    xen_invalidate_map_cache_entry_single(mapcache_grants, buffer);
> >+}
> >+
> > static void xen_invalidate_map_cache_entry_bh(void *opaque)
> > {
> >     XenMapCacheData *data = opaque;
> >
> >-    mapcache_lock(mapcache);
> >-    xen_invalidate_map_cache_entry_unlocked(mapcache, data->buffer);
> >-    mapcache_unlock(mapcache);
> >-
> >+    xen_invalidate_map_cache_entry_all(data->buffer);
> >     aio_co_wake(data->co);
> > }
> >
> >@@ -543,9 +644,7 @@ void coroutine_mixed_fn
> xen_invalidate_map_cache_entry(uint8_t *buffer)
> >                                 xen_invalidate_map_cache_entry_bh,
> &data);
> >         qemu_coroutine_yield();
> >     } else {
> >-        mapcache_lock(mapcache);
> >-        xen_invalidate_map_cache_entry_unlocked(mapcache, buffer);
> >-        mapcache_unlock(mapcache);
> >+        xen_invalidate_map_cache_entry_all(buffer);
> >     }
> > }
> >
> >@@ -597,6 +696,7 @@ void xen_invalidate_map_cache(void)
> >     bdrv_drain_all();
> >
> >     xen_invalidate_map_cache_single(mapcache);
> >+    xen_invalidate_map_cache_single(mapcache_grants);
> > }
> >
> > static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
> >@@ -632,13 +732,16 @@ static uint8_t
> *xen_replace_cache_entry_unlocked(MapCache *mc,
> >         return NULL;
> >     }
> >
> >+    assert((entry->flags & XEN_MAPCACHE_ENTRY_GRANT) == 0);
> >+
> >     address_index  = new_phys_addr >> mc->bucket_shift;
> >     address_offset = new_phys_addr & (mc->bucket_size - 1);
> >
> >     trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
> >
> >     xen_remap_bucket(mc, entry, entry->vaddr_base,
> >-                     cache_size, address_index, false, old_phys_addr);
> >+                     cache_size, address_index, false,
> >+                     false, false, old_phys_addr);
> >     if (!test_bits(address_offset >> XC_PAGE_SHIFT,
> >                 test_bit_size >> XC_PAGE_SHIFT,
> >                 entry->valid_mapping)) {
> >diff --git a/include/hw/xen/xen-hvm-common.h
> b/include/hw/xen/xen-hvm-common.h
> >index 65a51aac2e..3d796235dc 100644
> >--- a/include/hw/xen/xen-hvm-common.h
> >+++ b/include/hw/xen/xen-hvm-common.h
> >@@ -16,6 +16,7 @@
> > #include <xen/hvm/ioreq.h>
> >
> > extern MemoryRegion xen_memory;
> >+extern MemoryRegion xen_grants;
> > extern MemoryListener xen_io_listener;
> > extern DeviceListener xen_device_listener;
> >
> >@@ -29,6 +30,8 @@ extern DeviceListener xen_device_listener;
> >     do { } while (0)
> > #endif
> >
> >+#define XEN_GRANT_ADDR_OFF (1ULL << 63)
> >+
> > static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int
> i)
> > {
> >     return shared_page->vcpu_ioreq[i].vp_eport;
> >diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
> >index dc72f83bcb..19dccf4d71 100644
> >--- a/include/sysemu/xen.h
> >+++ b/include/sysemu/xen.h
> >@@ -35,6 +35,7 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
> >                    struct MemoryRegion *mr, Error **errp);
> >
> > bool xen_mr_is_memory(MemoryRegion *mr);
> >+bool xen_mr_is_grants(MemoryRegion *mr);
> >
> > #else /* !CONFIG_XEN_IS_POSSIBLE */
> >
> >@@ -55,6 +56,12 @@ static inline bool xen_mr_is_memory(MemoryRegion *mr)
> >     return false;
> > }
> >
> >+static inline bool xen_mr_is_grants(MemoryRegion *mr)
> >+{
> >+    g_assert_not_reached();
> >+    return false;
> >+}
> >+
> > #endif /* CONFIG_XEN_IS_POSSIBLE */
> >
> > #endif
> >--
> >2.40.1
> >
> >
>
Stefano Stabellini May 23, 2024, 6:30 p.m. UTC | #3
On Thu, 23 May 2024, Edgar E. Iglesias wrote:
> On Thu, May 23, 2024 at 9:47 AM Manos Pitsidianakis <manos.pitsidianakis@linaro.org> wrote:
>       On Thu, 16 May 2024 18:48, "Edgar E. Iglesias" <edgar.iglesias@gmail.com> wrote:
>       >From: "Edgar E. Iglesias" <edgar.iglesias@amd.com>
>       >
>       >Add a second mapcache for grant mappings. The mapcache for
>       >grants needs to work with XC_PAGE_SIZE granularity since
>       >we can't map larger ranges than what has been granted to us.
>       >
>       >Like with foreign mappings (xen_memory), machines using grants
>       >are expected to initialize the xen_grants MR and map it
>       >into their address-map accordingly.
>       >
>       >Signed-off-by: Edgar E. Iglesias <edgar.iglesias@amd.com>
>       >Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
>       >---
>       > hw/xen/xen-hvm-common.c         |  12 ++-
>       > hw/xen/xen-mapcache.c           | 163 ++++++++++++++++++++++++++------
>       > include/hw/xen/xen-hvm-common.h |   3 +
>       > include/sysemu/xen.h            |   7 ++
>       > 4 files changed, 152 insertions(+), 33 deletions(-)
>       >
>       >diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
>       >index a0a0252da0..b8ace1c368 100644
>       >--- a/hw/xen/xen-hvm-common.c
>       >+++ b/hw/xen/xen-hvm-common.c
>       >@@ -10,12 +10,18 @@
>       > #include "hw/boards.h"
>       > #include "hw/xen/arch_hvm.h"
>       >
>       >-MemoryRegion xen_memory;
>       >+MemoryRegion xen_memory, xen_grants;
>       >
>       >-/* Check for xen memory.  */
>       >+/* Check for any kind of xen memory, foreign mappings or grants.  */
>       > bool xen_mr_is_memory(MemoryRegion *mr)
>       > {
>       >-    return mr == &xen_memory;
>       >+    return mr == &xen_memory || mr == &xen_grants;
>       >+}
>       >+
>       >+/* Check specifically for grants.  */
>       >+bool xen_mr_is_grants(MemoryRegion *mr)
>       >+{
>       >+    return mr == &xen_grants;
>       > }
>       >
>       > void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
>       >diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
>       >index a07c47b0b1..1cbc2aeaa9 100644
>       >--- a/hw/xen/xen-mapcache.c
>       >+++ b/hw/xen/xen-mapcache.c
>       >@@ -14,6 +14,7 @@
>       >
>       > #include <sys/resource.h>
>       >
>       >+#include "hw/xen/xen-hvm-common.h"
>       > #include "hw/xen/xen_native.h"
>       > #include "qemu/bitmap.h"
>       >
>       >@@ -21,6 +22,8 @@
>       > #include "sysemu/xen-mapcache.h"
>       > #include "trace.h"
>       >
>       >+#include <xenevtchn.h>
>       >+#include <xengnttab.h>
>       >
>       > #if HOST_LONG_BITS == 32
>       > #  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
>       >@@ -41,6 +44,7 @@ typedef struct MapCacheEntry {
>       >     unsigned long *valid_mapping;
>       >     uint32_t lock;
>       > #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
>       >+#define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)
> 
>       Might we get more entry kinds in the future? (for example foreign maps).
>       Maybe this could be an enum.
> 
> 
> Perhaps. Foreign mappings are already supported, this flag separates ordinary foreign mappings from grant foreign mappings.
> IMO, since this is not an external interface it's probably better to change it once we have a concrete use-case at hand.
> 
>  
>       >     uint8_t flags;
>       >     hwaddr size;
>       >     struct MapCacheEntry *next;
>       >@@ -71,6 +75,8 @@ typedef struct MapCache {
>       > } MapCache;
>       >
>       > static MapCache *mapcache;
>       >+static MapCache *mapcache_grants;
>       >+static xengnttab_handle *xen_region_gnttabdev;
>       >
>       > static inline void mapcache_lock(MapCache *mc)
>       > {
>       >@@ -131,6 +137,12 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
>       >     unsigned long max_mcache_size;
>       >     unsigned int bucket_shift;
>       >
>       >+    xen_region_gnttabdev = xengnttab_open(NULL, 0);
>       >+    if (xen_region_gnttabdev == NULL) {
>       >+        error_report("mapcache: Failed to open gnttab device");
>       >+        exit(EXIT_FAILURE);
>       >+    }
>       >+
>       >     if (HOST_LONG_BITS == 32) {
>       >         bucket_shift = 16;
>       >     } else {
>       >@@ -159,6 +171,15 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
>       >     mapcache = xen_map_cache_init_single(f, opaque,
>       >                                          bucket_shift,
>       >                                          max_mcache_size);
>       >+
>       >+    /*
>       >+     * Grant mappings must use XC_PAGE_SIZE granularity since we can't
>       >+     * map anything beyond the number of pages granted to us.
>       >+     */
>       >+    mapcache_grants = xen_map_cache_init_single(f, opaque,
>       >+                                                XC_PAGE_SHIFT,
>       >+                                                max_mcache_size);
>       >+
>       >     setrlimit(RLIMIT_AS, &rlimit_as);
>       > }
>       >
>       >@@ -168,17 +189,24 @@ static void xen_remap_bucket(MapCache *mc,
>       >                              hwaddr size,
>       >                              hwaddr address_index,
>       >                              bool dummy,
>       >+                             bool grant,
>       >+                             bool is_write,
>       >                              ram_addr_t ram_offset)
>       > {
>       >     uint8_t *vaddr_base;
>       >-    xen_pfn_t *pfns;
>       >+    uint32_t *refs = NULL;
>       >+    xen_pfn_t *pfns = NULL;
>       >     int *err;
> 
>       You should use g_autofree to perform automatic cleanup on function exit
>       instead of manually freeing, since the allocations should only live
>       within the function call.
> 
> 
> Sounds good, I'll do that in the next version.
> 
>  
>       >     unsigned int i;
>       >     hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
>       >
>       >     trace_xen_remap_bucket(address_index);
>       >
>       >-    pfns = g_new0(xen_pfn_t, nb_pfn);
>       >+    if (grant) {
>       >+        refs = g_new0(uint32_t, nb_pfn);
>       >+    } else {
>       >+        pfns = g_new0(xen_pfn_t, nb_pfn);
>       >+    }
>       >     err = g_new0(int, nb_pfn);
>       >
>       >     if (entry->vaddr_base != NULL) {
>       >@@ -207,21 +235,51 @@ static void xen_remap_bucket(MapCache *mc,
>       >     g_free(entry->valid_mapping);
>       >     entry->valid_mapping = NULL;
>       >
>       >-    for (i = 0; i < nb_pfn; i++) {
>       >-        pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
>       >+    if (grant) {
>       >+        hwaddr grant_base = address_index - (ram_offset >> XC_PAGE_SHIFT);
>       >+
>       >+        for (i = 0; i < nb_pfn; i++) {
>       >+            refs[i] = grant_base + i;
>       >+        }
>       >+    } else {
>       >+        for (i = 0; i < nb_pfn; i++) {
>       >+            pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
>       >+        }
>       >     }
>       >
>       >-    /*
>       >-     * If the caller has requested the mapping at a specific address use
>       >-     * MAP_FIXED to make sure it's honored.
>       >-     */
>       >+    entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
>       >+
>       >     if (!dummy) {
>       >-        vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
>       >-                                           PROT_READ | PROT_WRITE,
>       >-                                           vaddr ? MAP_FIXED : 0,
>       >-                                           nb_pfn, pfns, err);
> 
>       Since err is not NULL here, the function might return a valid pointer
>       but individual frames might have failed.
> 
> 
> Yes but AFAICT, the case when some pages fail foreign mapping is handled further down the function (see the valid_mappings bitmap).
> Note that this series isn't really changing this existing behaviour for foreign mappings. In any case, If we spot a bug in existing code,
> I'm happy to fix it.
> 
>  
> 
>       >+        if (grant) {
>       >+            int prot = PROT_READ;
>       >+
>       >+            if (is_write) {
>       >+                prot |= PROT_WRITE;
>       >+            }
>       >+
>       >+            entry->flags |= XEN_MAPCACHE_ENTRY_GRANT;
>       >+            assert(vaddr == NULL);
>       >+            vaddr_base = xengnttab_map_domain_grant_refs(xen_region_gnttabdev,
>       >+                                                         nb_pfn,
>       >+                                                         xen_domid, refs,
>       >+                                                         prot);
>       >+        } else {
>       >+            /*
>       >+             * If the caller has requested the mapping at a specific address use
>       >+             * MAP_FIXED to make sure it's honored.
>       >+             *
>       >+             * We don't yet support upgrading mappings from RO to RW, to handle
>       >+             * models using ordinary address_space_rw(), foreign mappings ignore
>       >+             * is_write and are always mapped RW.
>       >+             */
>       >+            vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
>       >+                                               PROT_READ | PROT_WRITE,
>       >+                                               vaddr ? MAP_FIXED : 0,
>       >+                                               nb_pfn, pfns, err);
>       >+        }
>       >         if (vaddr_base == NULL) {
>       >-            perror("xenforeignmemory_map2");
>       >+            perror(grant ? "xengnttab_map_domain_grant_refs"
>       >+                           : "xenforeignmemory_map2");
>       >             exit(-1);
>       >         }
>       >     } else {
>       >@@ -261,6 +319,7 @@ static void xen_remap_bucket(MapCache *mc,
>       >         }
>       >     }
>       >
>       >+    g_free(refs);
>       >     g_free(pfns);
>       >     g_free(err);
>       > }
>       >@@ -268,7 +327,8 @@ static void xen_remap_bucket(MapCache *mc,
>       > static uint8_t *xen_map_cache_unlocked(MapCache *mc,
>       >                                        hwaddr phys_addr, hwaddr size,
>       >                                        ram_addr_t ram_offset,
>       >-                                       uint8_t lock, bool dma, bool is_write)
>       >+                                       uint8_t lock, bool dma,
>       >+                                       bool grant, bool is_write)
>       > {
>       >     MapCacheEntry *entry, *pentry = NULL,
>       >                   *free_entry = NULL, *free_pentry = NULL;
>       >@@ -340,7 +400,7 @@ tryagain:
>       >         entry = g_new0(MapCacheEntry, 1);
>       >         pentry->next = entry;
>       >         xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
>       >-                         ram_offset);
>       >+                         grant, is_write, ram_offset);
>       >     } else if (!entry->lock) {
>       >         if (!entry->vaddr_base || entry->paddr_index != address_index ||
>       >                 entry->size != cache_size ||
>       >@@ -348,7 +408,7 @@ tryagain:
>       >                     test_bit_size >> XC_PAGE_SHIFT,
>       >                     entry->valid_mapping)) {
>       >             xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
>       >-                             ram_offset);
>       >+                             grant, is_write, ram_offset);
>       >         }
>       >     }
>       >
>       >@@ -399,12 +459,28 @@ uint8_t *xen_map_cache(MemoryRegion *mr,
>       >                        uint8_t lock, bool dma,
>       >                        bool is_write)
>       > {
>       >+    bool grant = xen_mr_is_grants(mr);
>       >+    MapCache *mc = grant ? mapcache_grants : mapcache;
>       >     uint8_t *p;
>       >
>       >-    mapcache_lock(mapcache);
>       >-    p = xen_map_cache_unlocked(mapcache, phys_addr, size, ram_addr_offset,
>       >-                               lock, dma, is_write);
>       >-    mapcache_unlock(mapcache);
>       >+    if (grant) {
>       >+        /*
>       >+         * Grants are only supported via address_space_map(). Anything
>       >+         * else is considered a user/guest error.
>       >+         *
>       >+         * QEMU generally doesn't expect these mappings to ever fail, so
>       >+         * if this happens we report an error message and abort().
>       >+         */
>       >+        if (!lock) {
> 
>       Nested if conditions that can be flattened, i.e. this could be
> 
>       if (grant && !lock)
> 
> 
> 
> Sounds good, will flatten this in the next version.
>  
> 
>       >+            error_report("Trying access a grant reference without mapping it.");
> 
>       s/Trying access a grant/Tried to access a grant/
> 
> 
> Will fix it, thanks!

Please retain my ack
diff mbox series

Patch

diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index a0a0252da0..b8ace1c368 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -10,12 +10,18 @@ 
 #include "hw/boards.h"
 #include "hw/xen/arch_hvm.h"
 
-MemoryRegion xen_memory;
+MemoryRegion xen_memory, xen_grants;
 
-/* Check for xen memory.  */
+/* Check for any kind of xen memory, foreign mappings or grants.  */
 bool xen_mr_is_memory(MemoryRegion *mr)
 {
-    return mr == &xen_memory;
+    return mr == &xen_memory || mr == &xen_grants;
+}
+
+/* Check specifically for grants.  */
+bool xen_mr_is_grants(MemoryRegion *mr)
+{
+    return mr == &xen_grants;
 }
 
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index a07c47b0b1..1cbc2aeaa9 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -14,6 +14,7 @@ 
 
 #include <sys/resource.h>
 
+#include "hw/xen/xen-hvm-common.h"
 #include "hw/xen/xen_native.h"
 #include "qemu/bitmap.h"
 
@@ -21,6 +22,8 @@ 
 #include "sysemu/xen-mapcache.h"
 #include "trace.h"
 
+#include <xenevtchn.h>
+#include <xengnttab.h>
 
 #if HOST_LONG_BITS == 32
 #  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
@@ -41,6 +44,7 @@  typedef struct MapCacheEntry {
     unsigned long *valid_mapping;
     uint32_t lock;
 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
+#define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)
     uint8_t flags;
     hwaddr size;
     struct MapCacheEntry *next;
@@ -71,6 +75,8 @@  typedef struct MapCache {
 } MapCache;
 
 static MapCache *mapcache;
+static MapCache *mapcache_grants;
+static xengnttab_handle *xen_region_gnttabdev;
 
 static inline void mapcache_lock(MapCache *mc)
 {
@@ -131,6 +137,12 @@  void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
     unsigned long max_mcache_size;
     unsigned int bucket_shift;
 
+    xen_region_gnttabdev = xengnttab_open(NULL, 0);
+    if (xen_region_gnttabdev == NULL) {
+        error_report("mapcache: Failed to open gnttab device");
+        exit(EXIT_FAILURE);
+    }
+
     if (HOST_LONG_BITS == 32) {
         bucket_shift = 16;
     } else {
@@ -159,6 +171,15 @@  void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
     mapcache = xen_map_cache_init_single(f, opaque,
                                          bucket_shift,
                                          max_mcache_size);
+
+    /*
+     * Grant mappings must use XC_PAGE_SIZE granularity since we can't
+     * map anything beyond the number of pages granted to us.
+     */
+    mapcache_grants = xen_map_cache_init_single(f, opaque,
+                                                XC_PAGE_SHIFT,
+                                                max_mcache_size);
+
     setrlimit(RLIMIT_AS, &rlimit_as);
 }
 
@@ -168,17 +189,24 @@  static void xen_remap_bucket(MapCache *mc,
                              hwaddr size,
                              hwaddr address_index,
                              bool dummy,
+                             bool grant,
+                             bool is_write,
                              ram_addr_t ram_offset)
 {
     uint8_t *vaddr_base;
-    xen_pfn_t *pfns;
+    uint32_t *refs = NULL;
+    xen_pfn_t *pfns = NULL;
     int *err;
     unsigned int i;
     hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
 
     trace_xen_remap_bucket(address_index);
 
-    pfns = g_new0(xen_pfn_t, nb_pfn);
+    if (grant) {
+        refs = g_new0(uint32_t, nb_pfn);
+    } else {
+        pfns = g_new0(xen_pfn_t, nb_pfn);
+    }
     err = g_new0(int, nb_pfn);
 
     if (entry->vaddr_base != NULL) {
@@ -207,21 +235,51 @@  static void xen_remap_bucket(MapCache *mc,
     g_free(entry->valid_mapping);
     entry->valid_mapping = NULL;
 
-    for (i = 0; i < nb_pfn; i++) {
-        pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
+    if (grant) {
+        hwaddr grant_base = address_index - (ram_offset >> XC_PAGE_SHIFT);
+
+        for (i = 0; i < nb_pfn; i++) {
+            refs[i] = grant_base + i;
+        }
+    } else {
+        for (i = 0; i < nb_pfn; i++) {
+            pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
+        }
     }
 
-    /*
-     * If the caller has requested the mapping at a specific address use
-     * MAP_FIXED to make sure it's honored.
-     */
+    entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
+
     if (!dummy) {
-        vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
-                                           PROT_READ | PROT_WRITE,
-                                           vaddr ? MAP_FIXED : 0,
-                                           nb_pfn, pfns, err);
+        if (grant) {
+            int prot = PROT_READ;
+
+            if (is_write) {
+                prot |= PROT_WRITE;
+            }
+
+            entry->flags |= XEN_MAPCACHE_ENTRY_GRANT;
+            assert(vaddr == NULL);
+            vaddr_base = xengnttab_map_domain_grant_refs(xen_region_gnttabdev,
+                                                         nb_pfn,
+                                                         xen_domid, refs,
+                                                         prot);
+        } else {
+            /*
+             * If the caller has requested the mapping at a specific address use
+             * MAP_FIXED to make sure it's honored.
+             *
+             * We don't yet support upgrading mappings from RO to RW, to handle
+             * models using ordinary address_space_rw(), foreign mappings ignore
+             * is_write and are always mapped RW.
+             */
+            vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
+                                               PROT_READ | PROT_WRITE,
+                                               vaddr ? MAP_FIXED : 0,
+                                               nb_pfn, pfns, err);
+        }
         if (vaddr_base == NULL) {
-            perror("xenforeignmemory_map2");
+            perror(grant ? "xengnttab_map_domain_grant_refs"
+                           : "xenforeignmemory_map2");
             exit(-1);
         }
     } else {
@@ -261,6 +319,7 @@  static void xen_remap_bucket(MapCache *mc,
         }
     }
 
+    g_free(refs);
     g_free(pfns);
     g_free(err);
 }
@@ -268,7 +327,8 @@  static void xen_remap_bucket(MapCache *mc,
 static uint8_t *xen_map_cache_unlocked(MapCache *mc,
                                        hwaddr phys_addr, hwaddr size,
                                        ram_addr_t ram_offset,
-                                       uint8_t lock, bool dma, bool is_write)
+                                       uint8_t lock, bool dma,
+                                       bool grant, bool is_write)
 {
     MapCacheEntry *entry, *pentry = NULL,
                   *free_entry = NULL, *free_pentry = NULL;
@@ -340,7 +400,7 @@  tryagain:
         entry = g_new0(MapCacheEntry, 1);
         pentry->next = entry;
         xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
-                         ram_offset);
+                         grant, is_write, ram_offset);
     } else if (!entry->lock) {
         if (!entry->vaddr_base || entry->paddr_index != address_index ||
                 entry->size != cache_size ||
@@ -348,7 +408,7 @@  tryagain:
                     test_bit_size >> XC_PAGE_SHIFT,
                     entry->valid_mapping)) {
             xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
-                             ram_offset);
+                             grant, is_write, ram_offset);
         }
     }
 
@@ -399,12 +459,28 @@  uint8_t *xen_map_cache(MemoryRegion *mr,
                        uint8_t lock, bool dma,
                        bool is_write)
 {
+    bool grant = xen_mr_is_grants(mr);
+    MapCache *mc = grant ? mapcache_grants : mapcache;
     uint8_t *p;
 
-    mapcache_lock(mapcache);
-    p = xen_map_cache_unlocked(mapcache, phys_addr, size, ram_addr_offset,
-                               lock, dma, is_write);
-    mapcache_unlock(mapcache);
+    if (grant) {
+        /*
+         * Grants are only supported via address_space_map(). Anything
+         * else is considered a user/guest error.
+         *
+         * QEMU generally doesn't expect these mappings to ever fail, so
+         * if this happens we report an error message and abort().
+         */
+        if (!lock) {
+            error_report("Trying access a grant reference without mapping it.");
+            abort();
+        }
+    }
+
+    mapcache_lock(mc);
+    p = xen_map_cache_unlocked(mc, phys_addr, size, ram_addr_offset,
+                               lock, dma, grant, is_write);
+    mapcache_unlock(mc);
     return p;
 }
 
@@ -449,7 +525,14 @@  static ram_addr_t xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
 
 ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 {
-    return xen_ram_addr_from_mapcache_single(mapcache, ptr);
+    ram_addr_t addr;
+
+    addr = xen_ram_addr_from_mapcache_single(mapcache, ptr);
+    if (addr == RAM_ADDR_INVALID) {
+        addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr);
+    }
+
+    return addr;
 }
 
 static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
@@ -460,6 +543,7 @@  static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
     hwaddr paddr_index;
     hwaddr size;
     int found = 0;
+    int rc;
 
     QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
         if (reventry->vaddr_req == buffer) {
@@ -502,7 +586,14 @@  static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
     }
 
     ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
-    if (munmap(entry->vaddr_base, entry->size) != 0) {
+    if (entry->flags & XEN_MAPCACHE_ENTRY_GRANT) {
+        rc = xengnttab_unmap(xen_region_gnttabdev, entry->vaddr_base,
+                             entry->size >> mc->bucket_shift);
+    } else {
+        rc = munmap(entry->vaddr_base, entry->size);
+    }
+
+    if (rc) {
         perror("unmap fails");
         exit(-1);
     }
@@ -521,14 +612,24 @@  typedef struct XenMapCacheData {
     uint8_t *buffer;
 } XenMapCacheData;
 
+static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t *buffer)
+{
+    mapcache_lock(mc);
+    xen_invalidate_map_cache_entry_unlocked(mc, buffer);
+    mapcache_unlock(mc);
+}
+
+static void xen_invalidate_map_cache_entry_all(uint8_t *buffer)
+{
+    xen_invalidate_map_cache_entry_single(mapcache, buffer);
+    xen_invalidate_map_cache_entry_single(mapcache_grants, buffer);
+}
+
 static void xen_invalidate_map_cache_entry_bh(void *opaque)
 {
     XenMapCacheData *data = opaque;
 
-    mapcache_lock(mapcache);
-    xen_invalidate_map_cache_entry_unlocked(mapcache, data->buffer);
-    mapcache_unlock(mapcache);
-
+    xen_invalidate_map_cache_entry_all(data->buffer);
     aio_co_wake(data->co);
 }
 
@@ -543,9 +644,7 @@  void coroutine_mixed_fn xen_invalidate_map_cache_entry(uint8_t *buffer)
                                 xen_invalidate_map_cache_entry_bh, &data);
         qemu_coroutine_yield();
     } else {
-        mapcache_lock(mapcache);
-        xen_invalidate_map_cache_entry_unlocked(mapcache, buffer);
-        mapcache_unlock(mapcache);
+        xen_invalidate_map_cache_entry_all(buffer);
     }
 }
 
@@ -597,6 +696,7 @@  void xen_invalidate_map_cache(void)
     bdrv_drain_all();
 
     xen_invalidate_map_cache_single(mapcache);
+    xen_invalidate_map_cache_single(mapcache_grants);
 }
 
 static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
@@ -632,13 +732,16 @@  static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
         return NULL;
     }
 
+    assert((entry->flags & XEN_MAPCACHE_ENTRY_GRANT) == 0);
+
     address_index  = new_phys_addr >> mc->bucket_shift;
     address_offset = new_phys_addr & (mc->bucket_size - 1);
 
     trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
 
     xen_remap_bucket(mc, entry, entry->vaddr_base,
-                     cache_size, address_index, false, old_phys_addr);
+                     cache_size, address_index, false,
+                     false, false, old_phys_addr);
     if (!test_bits(address_offset >> XC_PAGE_SHIFT,
                 test_bit_size >> XC_PAGE_SHIFT,
                 entry->valid_mapping)) {
diff --git a/include/hw/xen/xen-hvm-common.h b/include/hw/xen/xen-hvm-common.h
index 65a51aac2e..3d796235dc 100644
--- a/include/hw/xen/xen-hvm-common.h
+++ b/include/hw/xen/xen-hvm-common.h
@@ -16,6 +16,7 @@ 
 #include <xen/hvm/ioreq.h>
 
 extern MemoryRegion xen_memory;
+extern MemoryRegion xen_grants;
 extern MemoryListener xen_io_listener;
 extern DeviceListener xen_device_listener;
 
@@ -29,6 +30,8 @@  extern DeviceListener xen_device_listener;
     do { } while (0)
 #endif
 
+#define XEN_GRANT_ADDR_OFF (1ULL << 63)
+
 static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
 {
     return shared_page->vcpu_ioreq[i].vp_eport;
diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
index dc72f83bcb..19dccf4d71 100644
--- a/include/sysemu/xen.h
+++ b/include/sysemu/xen.h
@@ -35,6 +35,7 @@  void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
                    struct MemoryRegion *mr, Error **errp);
 
 bool xen_mr_is_memory(MemoryRegion *mr);
+bool xen_mr_is_grants(MemoryRegion *mr);
 
 #else /* !CONFIG_XEN_IS_POSSIBLE */
 
@@ -55,6 +56,12 @@  static inline bool xen_mr_is_memory(MemoryRegion *mr)
     return false;
 }
 
+static inline bool xen_mr_is_grants(MemoryRegion *mr)
+{
+    g_assert_not_reached();
+    return false;
+}
+
 #endif /* CONFIG_XEN_IS_POSSIBLE */
 
 #endif