diff mbox series

[v2,15/18] x86/mm: introduce a per-vCPU mapcache when using ASI

Message ID 20250108142659.99490-16-roger.pau@citrix.com (mailing list archive)
State New
Headers show
Series x86: adventures in Address Space Isolation | expand

Commit Message

Roger Pau Monné Jan. 8, 2025, 2:26 p.m. UTC
When using a unique per-vCPU root page table the per-domain region becomes
per-vCPU, and hence the mapcache is no longer shared between all vCPUs of a
domain.  Introduce per-vCPU mapcache structures, and modify map_domain_page()
to create per-vCPU mappings when possible.  Note the lock is also not needed
with using per-vCPU map caches, as the structure is no longer shared.

This introduces some duplication in the domain and vcpu structures, as both
contain a mapcache field to support running with and without per-vCPU
page-tables.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/domain_page.c        | 90 ++++++++++++++++++++-----------
 xen/arch/x86/include/asm/domain.h | 20 ++++---
 2 files changed, 71 insertions(+), 39 deletions(-)

Comments

Alejandro Vallejo Jan. 9, 2025, 3:08 p.m. UTC | #1
On Wed Jan 8, 2025 at 2:26 PM GMT, Roger Pau Monne wrote:
> When using a unique per-vCPU root page table the per-domain region becomes
> per-vCPU, and hence the mapcache is no longer shared between all vCPUs of a
> domain.  Introduce per-vCPU mapcache structures, and modify map_domain_page()
> to create per-vCPU mappings when possible.  Note the lock is also not needed
> with using per-vCPU map caches, as the structure is no longer shared.
>
> This introduces some duplication in the domain and vcpu structures, as both
> contain a mapcache field to support running with and without per-vCPU
> page-tables.
>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/arch/x86/domain_page.c        | 90 ++++++++++++++++++++-----------
>  xen/arch/x86/include/asm/domain.h | 20 ++++---
>  2 files changed, 71 insertions(+), 39 deletions(-)
>
> diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
> index 1372be20224e..65900d6218f8 100644
> --- a/xen/arch/x86/domain_page.c
> +++ b/xen/arch/x86/domain_page.c
> @@ -74,7 +74,9 @@ void *map_domain_page(mfn_t mfn)
>      struct vcpu *v;
>      struct mapcache_domain *dcache;
>      struct mapcache_vcpu *vcache;
> +    struct mapcache *cache;
>      struct vcpu_maphash_entry *hashent;
> +    struct domain *d;
>  
>  #ifdef NDEBUG
>      if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
> @@ -85,9 +87,12 @@ void *map_domain_page(mfn_t mfn)
>      if ( !v || !is_pv_vcpu(v) )
>          return mfn_to_virt(mfn_x(mfn));
>  
> -    dcache = &v->domain->arch.pv.mapcache;
> +    d = v->domain;
> +    dcache = &d->arch.pv.mapcache;
>      vcache = &v->arch.pv.mapcache;
> -    if ( !dcache->inuse )
> +    cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> +                            : &d->arch.pv.mapcache.cache;
> +    if ( !cache->inuse )
>          return mfn_to_virt(mfn_x(mfn));
>  
>      perfc_incr(map_domain_page_count);
> @@ -98,17 +103,18 @@ void *map_domain_page(mfn_t mfn)
>      if ( hashent->mfn == mfn_x(mfn) )
>      {
>          idx = hashent->idx;
> -        ASSERT(idx < dcache->entries);
> +        ASSERT(idx < cache->entries);
>          hashent->refcnt++;
>          ASSERT(hashent->refcnt);
>          ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn));
>          goto out;
>      }
>  
> -    spin_lock(&dcache->lock);
> +    if ( !d->arch.vcpu_pt )
> +        spin_lock(&dcache->lock);

Hmmm. I wonder whether we might not want a nospec here...

>  
>      /* Has some other CPU caused a wrap? We must flush if so. */
> -    if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
> +    if ( unlikely(!d->arch.vcpu_pt && dcache->epoch != vcache->shadow_epoch) )
>      {
>          vcache->shadow_epoch = dcache->epoch;
>          if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
> @@ -118,21 +124,21 @@ void *map_domain_page(mfn_t mfn)
>          }
>      }
>  
> -    idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
> -    if ( unlikely(idx >= dcache->entries) )
> +    idx = find_next_zero_bit(cache->inuse, cache->entries, cache->cursor);
> +    if ( unlikely(idx >= cache->entries) )
>      {
>          unsigned long accum = 0, prev = 0;
>  
>          /* /First/, clean the garbage map and update the inuse list. */
> -        for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
> +        for ( i = 0; i < BITS_TO_LONGS(cache->entries); i++ )
>          {
>              accum |= prev;
> -            dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
> -            prev = ~dcache->inuse[i];
> +            cache->inuse[i] &= ~xchg(&cache->garbage[i], 0);
> +            prev = ~cache->inuse[i];
>          }
>  
> -        if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) )
> -            idx = find_first_zero_bit(dcache->inuse, dcache->entries);
> +        if ( accum | (prev & BITMAP_LAST_WORD_MASK(cache->entries)) )
> +            idx = find_first_zero_bit(cache->inuse, cache->entries);
>          else
>          {
>              /* Replace a hash entry instead. */
> @@ -152,19 +158,23 @@ void *map_domain_page(mfn_t mfn)
>                      i = 0;
>              } while ( i != MAPHASH_HASHFN(mfn_x(mfn)) );
>          }
> -        BUG_ON(idx >= dcache->entries);
> +        BUG_ON(idx >= cache->entries);
>  
>          /* /Second/, flush TLBs. */
>          perfc_incr(domain_page_tlb_flush);
>          flush_tlb_local();
> -        vcache->shadow_epoch = ++dcache->epoch;
> -        dcache->tlbflush_timestamp = tlbflush_current_time();
> +        if ( !d->arch.vcpu_pt )
> +        {
> +            vcache->shadow_epoch = ++dcache->epoch;
> +            dcache->tlbflush_timestamp = tlbflush_current_time();
> +        }
>      }
>  
> -    set_bit(idx, dcache->inuse);
> -    dcache->cursor = idx + 1;
> +    set_bit(idx, cache->inuse);
> +    cache->cursor = idx + 1;
>  
> -    spin_unlock(&dcache->lock);
> +    if ( !d->arch.vcpu_pt )
> +        spin_unlock(&dcache->lock);

... and here.

>  
>      l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW));
>  
> @@ -178,6 +188,7 @@ void unmap_domain_page(const void *ptr)
>      unsigned int idx;
>      struct vcpu *v;
>      struct mapcache_domain *dcache;
> +    struct mapcache *cache;
>      unsigned long va = (unsigned long)ptr, mfn, flags;
>      struct vcpu_maphash_entry *hashent;
>  
> @@ -190,7 +201,9 @@ void unmap_domain_page(const void *ptr)
>      ASSERT(v && is_pv_vcpu(v));
>  
>      dcache = &v->domain->arch.pv.mapcache;
> -    ASSERT(dcache->inuse);
> +    cache = v->domain->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> +                                    : &v->domain->arch.pv.mapcache.cache;
> +    ASSERT(cache->inuse);
>  
>      idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
>      mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
> @@ -213,7 +226,7 @@ void unmap_domain_page(const void *ptr)
>                     hashent->mfn);
>              l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
>              /* /Second/, mark as garbage. */
> -            set_bit(hashent->idx, dcache->garbage);
> +            set_bit(hashent->idx, cache->garbage);
>          }
>  
>          /* Add newly-freed mapping to the maphash. */
> @@ -225,7 +238,7 @@ void unmap_domain_page(const void *ptr)
>          /* /First/, zap the PTE. */
>          l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
>          /* /Second/, mark as garbage. */
> -        set_bit(idx, dcache->garbage);
> +        set_bit(idx, cache->garbage);
>      }
>  
>      local_irq_restore(flags);
> @@ -234,7 +247,6 @@ void unmap_domain_page(const void *ptr)
>  void mapcache_domain_init(struct domain *d)
>  {
>      struct mapcache_domain *dcache = &d->arch.pv.mapcache;
> -    unsigned int bitmap_pages;
>  
>      ASSERT(is_pv_domain(d));
>  
> @@ -243,13 +255,12 @@ void mapcache_domain_init(struct domain *d)
>          return;
>  #endif
>  
> +    if ( d->arch.vcpu_pt )
> +        return;
> +
>      BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
>                   2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) >
>                   MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
> -    bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
> -    dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
> -    dcache->garbage = dcache->inuse +
> -                      (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
>  
>      spin_lock_init(&dcache->lock);
>  }
> @@ -258,30 +269,45 @@ int mapcache_vcpu_init(struct vcpu *v)
>  {
>      struct domain *d = v->domain;
>      struct mapcache_domain *dcache = &d->arch.pv.mapcache;
> +    struct mapcache *cache;
>      unsigned long i;
> -    unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
> +    unsigned int ents = (d->arch.vcpu_pt ? 1 : d->max_vcpus) *
> +                        MAPCACHE_VCPU_ENTRIES;
>      unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
>  
> -    if ( !is_pv_vcpu(v) || !dcache->inuse )
> +    if ( !is_pv_vcpu(v) )
>          return 0;
>  
> -    if ( ents > dcache->entries )
> +    cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
> +                            : &dcache->cache;
> +
> +    if ( !cache->inuse )
> +        return 0;
> +
> +    if ( ents > cache->entries )
>      {
>          /* Populate page tables. */
>          int rc = create_perdomain_mapping(v, MAPCACHE_VIRT_START, ents, false);
> +        const unsigned int bitmap_pages =
> +            PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
> +
> +        cache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
> +        cache->garbage = cache->inuse +
> +                         (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
> +
>  
>          /* Populate bit maps. */
>          if ( !rc )
> -            rc = create_perdomain_mapping(v, (unsigned long)dcache->inuse,
> +            rc = create_perdomain_mapping(v, (unsigned long)cache->inuse,
>                                            nr, true);
>          if ( !rc )
> -            rc = create_perdomain_mapping(v, (unsigned long)dcache->garbage,
> +            rc = create_perdomain_mapping(v, (unsigned long)cache->garbage,
>                                            nr, true);
>  
>          if ( rc )
>              return rc;
>  
> -        dcache->entries = ents;
> +        cache->entries = ents;
>      }
>  
>      /* Mark all maphash entries as not in use. */

Cheers,
Alejandro
diff mbox series

Patch

diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index 1372be20224e..65900d6218f8 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -74,7 +74,9 @@  void *map_domain_page(mfn_t mfn)
     struct vcpu *v;
     struct mapcache_domain *dcache;
     struct mapcache_vcpu *vcache;
+    struct mapcache *cache;
     struct vcpu_maphash_entry *hashent;
+    struct domain *d;
 
 #ifdef NDEBUG
     if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
@@ -85,9 +87,12 @@  void *map_domain_page(mfn_t mfn)
     if ( !v || !is_pv_vcpu(v) )
         return mfn_to_virt(mfn_x(mfn));
 
-    dcache = &v->domain->arch.pv.mapcache;
+    d = v->domain;
+    dcache = &d->arch.pv.mapcache;
     vcache = &v->arch.pv.mapcache;
-    if ( !dcache->inuse )
+    cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
+                            : &d->arch.pv.mapcache.cache;
+    if ( !cache->inuse )
         return mfn_to_virt(mfn_x(mfn));
 
     perfc_incr(map_domain_page_count);
@@ -98,17 +103,18 @@  void *map_domain_page(mfn_t mfn)
     if ( hashent->mfn == mfn_x(mfn) )
     {
         idx = hashent->idx;
-        ASSERT(idx < dcache->entries);
+        ASSERT(idx < cache->entries);
         hashent->refcnt++;
         ASSERT(hashent->refcnt);
         ASSERT(mfn_eq(l1e_get_mfn(MAPCACHE_L1ENT(idx)), mfn));
         goto out;
     }
 
-    spin_lock(&dcache->lock);
+    if ( !d->arch.vcpu_pt )
+        spin_lock(&dcache->lock);
 
     /* Has some other CPU caused a wrap? We must flush if so. */
-    if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
+    if ( unlikely(!d->arch.vcpu_pt && dcache->epoch != vcache->shadow_epoch) )
     {
         vcache->shadow_epoch = dcache->epoch;
         if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
@@ -118,21 +124,21 @@  void *map_domain_page(mfn_t mfn)
         }
     }
 
-    idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
-    if ( unlikely(idx >= dcache->entries) )
+    idx = find_next_zero_bit(cache->inuse, cache->entries, cache->cursor);
+    if ( unlikely(idx >= cache->entries) )
     {
         unsigned long accum = 0, prev = 0;
 
         /* /First/, clean the garbage map and update the inuse list. */
-        for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
+        for ( i = 0; i < BITS_TO_LONGS(cache->entries); i++ )
         {
             accum |= prev;
-            dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
-            prev = ~dcache->inuse[i];
+            cache->inuse[i] &= ~xchg(&cache->garbage[i], 0);
+            prev = ~cache->inuse[i];
         }
 
-        if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) )
-            idx = find_first_zero_bit(dcache->inuse, dcache->entries);
+        if ( accum | (prev & BITMAP_LAST_WORD_MASK(cache->entries)) )
+            idx = find_first_zero_bit(cache->inuse, cache->entries);
         else
         {
             /* Replace a hash entry instead. */
@@ -152,19 +158,23 @@  void *map_domain_page(mfn_t mfn)
                     i = 0;
             } while ( i != MAPHASH_HASHFN(mfn_x(mfn)) );
         }
-        BUG_ON(idx >= dcache->entries);
+        BUG_ON(idx >= cache->entries);
 
         /* /Second/, flush TLBs. */
         perfc_incr(domain_page_tlb_flush);
         flush_tlb_local();
-        vcache->shadow_epoch = ++dcache->epoch;
-        dcache->tlbflush_timestamp = tlbflush_current_time();
+        if ( !d->arch.vcpu_pt )
+        {
+            vcache->shadow_epoch = ++dcache->epoch;
+            dcache->tlbflush_timestamp = tlbflush_current_time();
+        }
     }
 
-    set_bit(idx, dcache->inuse);
-    dcache->cursor = idx + 1;
+    set_bit(idx, cache->inuse);
+    cache->cursor = idx + 1;
 
-    spin_unlock(&dcache->lock);
+    if ( !d->arch.vcpu_pt )
+        spin_unlock(&dcache->lock);
 
     l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW));
 
@@ -178,6 +188,7 @@  void unmap_domain_page(const void *ptr)
     unsigned int idx;
     struct vcpu *v;
     struct mapcache_domain *dcache;
+    struct mapcache *cache;
     unsigned long va = (unsigned long)ptr, mfn, flags;
     struct vcpu_maphash_entry *hashent;
 
@@ -190,7 +201,9 @@  void unmap_domain_page(const void *ptr)
     ASSERT(v && is_pv_vcpu(v));
 
     dcache = &v->domain->arch.pv.mapcache;
-    ASSERT(dcache->inuse);
+    cache = v->domain->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
+                                    : &v->domain->arch.pv.mapcache.cache;
+    ASSERT(cache->inuse);
 
     idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
     mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
@@ -213,7 +226,7 @@  void unmap_domain_page(const void *ptr)
                    hashent->mfn);
             l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
             /* /Second/, mark as garbage. */
-            set_bit(hashent->idx, dcache->garbage);
+            set_bit(hashent->idx, cache->garbage);
         }
 
         /* Add newly-freed mapping to the maphash. */
@@ -225,7 +238,7 @@  void unmap_domain_page(const void *ptr)
         /* /First/, zap the PTE. */
         l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
         /* /Second/, mark as garbage. */
-        set_bit(idx, dcache->garbage);
+        set_bit(idx, cache->garbage);
     }
 
     local_irq_restore(flags);
@@ -234,7 +247,6 @@  void unmap_domain_page(const void *ptr)
 void mapcache_domain_init(struct domain *d)
 {
     struct mapcache_domain *dcache = &d->arch.pv.mapcache;
-    unsigned int bitmap_pages;
 
     ASSERT(is_pv_domain(d));
 
@@ -243,13 +255,12 @@  void mapcache_domain_init(struct domain *d)
         return;
 #endif
 
+    if ( d->arch.vcpu_pt )
+        return;
+
     BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
                  2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) >
                  MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
-    bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
-    dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
-    dcache->garbage = dcache->inuse +
-                      (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
 
     spin_lock_init(&dcache->lock);
 }
@@ -258,30 +269,45 @@  int mapcache_vcpu_init(struct vcpu *v)
 {
     struct domain *d = v->domain;
     struct mapcache_domain *dcache = &d->arch.pv.mapcache;
+    struct mapcache *cache;
     unsigned long i;
-    unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
+    unsigned int ents = (d->arch.vcpu_pt ? 1 : d->max_vcpus) *
+                        MAPCACHE_VCPU_ENTRIES;
     unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
 
-    if ( !is_pv_vcpu(v) || !dcache->inuse )
+    if ( !is_pv_vcpu(v) )
         return 0;
 
-    if ( ents > dcache->entries )
+    cache = d->arch.vcpu_pt ? &v->arch.pv.mapcache.cache
+                            : &dcache->cache;
+
+    if ( !cache->inuse )
+        return 0;
+
+    if ( ents > cache->entries )
     {
         /* Populate page tables. */
         int rc = create_perdomain_mapping(v, MAPCACHE_VIRT_START, ents, false);
+        const unsigned int bitmap_pages =
+            PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
+
+        cache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
+        cache->garbage = cache->inuse +
+                         (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
+
 
         /* Populate bit maps. */
         if ( !rc )
-            rc = create_perdomain_mapping(v, (unsigned long)dcache->inuse,
+            rc = create_perdomain_mapping(v, (unsigned long)cache->inuse,
                                           nr, true);
         if ( !rc )
-            rc = create_perdomain_mapping(v, (unsigned long)dcache->garbage,
+            rc = create_perdomain_mapping(v, (unsigned long)cache->garbage,
                                           nr, true);
 
         if ( rc )
             return rc;
 
-        dcache->entries = ents;
+        cache->entries = ents;
     }
 
     /* Mark all maphash entries as not in use. */
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index 5bf0ad3fdcf7..ba5440099d90 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -41,6 +41,16 @@  struct trap_bounce {
     unsigned long eip;
 };
 
+struct mapcache {
+    /* The number of array entries, and a cursor into the array. */
+    unsigned int entries;
+    unsigned int cursor;
+
+    /* Which mappings are in use, and which are garbage to reap next epoch? */
+    unsigned long *inuse;
+    unsigned long *garbage;
+};
+
 #define MAPHASH_ENTRIES 8
 #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
 #define MAPHASHENT_NOTINUSE ((u32)~0U)
@@ -54,13 +64,11 @@  struct mapcache_vcpu {
         uint32_t      idx;
         uint32_t      refcnt;
     } hash[MAPHASH_ENTRIES];
+
+    struct mapcache cache;
 };
 
 struct mapcache_domain {
-    /* The number of array entries, and a cursor into the array. */
-    unsigned int entries;
-    unsigned int cursor;
-
     /* Protects map_domain_page(). */
     spinlock_t lock;
 
@@ -68,9 +76,7 @@  struct mapcache_domain {
     unsigned int epoch;
     u32 tlbflush_timestamp;
 
-    /* Which mappings are in use, and which are garbage to reap next epoch? */
-    unsigned long *inuse;
-    unsigned long *garbage;
+    struct mapcache cache;
 };
 
 void mapcache_domain_init(struct domain *d);