diff mbox series

[v2,14/18] x86/mm: introduce per-vCPU L3 page-table

Message ID 20250108142659.99490-15-roger.pau@citrix.com (mailing list archive)
State New
Headers show
Series x86: adventures in Address Space Isolation | expand

Commit Message

Roger Pau Monné Jan. 8, 2025, 2:26 p.m. UTC
Such table is to be used in the per-domain slot when running with Address Space
Isolation enabled for the domain.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/include/asm/domain.h |  3 +++
 xen/arch/x86/include/asm/mm.h     |  2 +-
 xen/arch/x86/mm.c                 | 45 ++++++++++++++++++++++---------
 xen/arch/x86/mm/hap/hap.c         |  2 +-
 xen/arch/x86/mm/shadow/hvm.c      |  2 +-
 xen/arch/x86/mm/shadow/multi.c    |  2 +-
 xen/arch/x86/pv/dom0_build.c      |  2 +-
 xen/arch/x86/pv/domain.c          |  2 +-
 8 files changed, 41 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index fb92a10bf3b7..5bf0ad3fdcf7 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -666,6 +666,9 @@  struct arch_vcpu
 
     struct vcpu_msrs *msrs;
 
+    /* ASI: per-vCPU L3 table to use in the L4 per-domain slot. */
+    struct page_info *pervcpu_l3_pg;
+
     struct {
         bool next_interrupt_enabled;
     } monitor;
diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
index f501e5e115ff..f79d1594fde4 100644
--- a/xen/arch/x86/include/asm/mm.h
+++ b/xen/arch/x86/include/asm/mm.h
@@ -375,7 +375,7 @@  int devalidate_page(struct page_info *page, unsigned long type,
 
 void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d);
 void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn,
-                       const struct domain *d, mfn_t sl4mfn, bool ro_mpt);
+                       const struct vcpu *v, mfn_t sl4mfn, bool ro_mpt);
 bool fill_ro_mpt(mfn_t mfn);
 void zap_ro_mpt(mfn_t mfn);
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 49403196d56e..583bf4c58bf9 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1658,8 +1658,9 @@  static int promote_l3_table(struct page_info *page)
  * extended directmap.
  */
 void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn,
-                       const struct domain *d, mfn_t sl4mfn, bool ro_mpt)
+                       const struct vcpu *v, mfn_t sl4mfn, bool ro_mpt)
 {
+    const struct domain *d = v->domain;
     /*
      * PV vcpus need a shortened directmap.  HVM and Idle vcpus get the full
      * directmap.
@@ -1687,7 +1688,9 @@  void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn,
 
     /* Slot 260: Per-domain mappings. */
     l4t[l4_table_offset(PERDOMAIN_VIRT_START)] =
-        l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW);
+        l4e_from_page(d->arch.vcpu_pt ? v->arch.pervcpu_l3_pg
+                                      : d->arch.perdomain_l3_pg,
+                      __PAGE_HYPERVISOR_RW);
 
     /* Slot 4: Per-domain mappings mirror. */
     BUILD_BUG_ON(IS_ENABLED(CONFIG_PV32) &&
@@ -1842,8 +1845,15 @@  static int promote_l4_table(struct page_info *page)
 
     if ( !rc )
     {
+        /*
+         * Use vCPU#0 unconditionally.  When not running with ASI enabled the
+         * per-domain table is shared between all vCPUs, so it doesn't matter
+         * which vCPU gets passed to init_xen_l4_slots().  When running with
+         * ASI enabled this L4 will not be used, as a shadow per-vCPU L4 is
+         * used instead.
+         */
         init_xen_l4_slots(pl4e, l4mfn,
-                          d, INVALID_MFN, VM_ASSIST(d, m2p_strict));
+                          d->vcpu[0], INVALID_MFN, VM_ASSIST(d, m2p_strict));
         atomic_inc(&d->arch.pv.nr_l4_pages);
     }
     unmap_domain_page(pl4e);
@@ -6313,14 +6323,17 @@  int create_perdomain_mapping(struct vcpu *v, unsigned long va,
     ASSERT(va >= PERDOMAIN_VIRT_START &&
            va < PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS));
 
-    if ( !d->arch.perdomain_l3_pg )
+    if ( !v->arch.pervcpu_l3_pg && !d->arch.perdomain_l3_pg )
     {
         pg = alloc_domheap_page(d, MEMF_no_owner);
         if ( !pg )
             return -ENOMEM;
         l3tab = __map_domain_page(pg);
         clear_page(l3tab);
-        d->arch.perdomain_l3_pg = pg;
+        if ( d->arch.vcpu_pt )
+            v->arch.pervcpu_l3_pg = pg;
+        else
+            d->arch.perdomain_l3_pg = pg;
         if ( !nr )
         {
             unmap_domain_page(l3tab);
@@ -6330,7 +6343,8 @@  int create_perdomain_mapping(struct vcpu *v, unsigned long va,
     else if ( !nr )
         return 0;
     else
-        l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+        l3tab = __map_domain_page(d->arch.vcpu_pt ? v->arch.pervcpu_l3_pg
+                                                  : d->arch.perdomain_l3_pg);
 
     ASSERT(!l3_table_offset(va ^ (va + nr * PAGE_SIZE - 1)));
 
@@ -6436,8 +6450,9 @@  void populate_perdomain_mapping(const struct vcpu *v, unsigned long va,
         return;
     }
 
-    ASSERT(d->arch.perdomain_l3_pg);
-    l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+    ASSERT(d->arch.perdomain_l3_pg || v->arch.pervcpu_l3_pg);
+    l3tab = __map_domain_page(d->arch.vcpu_pt ? v->arch.pervcpu_l3_pg
+                                              : d->arch.perdomain_l3_pg);
 
     if ( unlikely(!(l3e_get_flags(l3tab[l3_table_offset(va)]) &
                     _PAGE_PRESENT)) )
@@ -6498,7 +6513,7 @@  void destroy_perdomain_mapping(const struct vcpu *v, unsigned long va,
            va < PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS));
     ASSERT(!nr || !l3_table_offset(va ^ (va + nr * PAGE_SIZE - 1)));
 
-    if ( !d->arch.perdomain_l3_pg )
+    if ( !d->arch.perdomain_l3_pg && !v->arch.pervcpu_l3_pg )
         return;
 
     /* Use likely to force the optimization for the fast path. */
@@ -6522,7 +6537,8 @@  void destroy_perdomain_mapping(const struct vcpu *v, unsigned long va,
         return;
     }
 
-    l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+    l3tab = __map_domain_page(d->arch.vcpu_pt ? v->arch.pervcpu_l3_pg
+                                              : d->arch.perdomain_l3_pg);
     pl3e = l3tab + l3_table_offset(va);
 
     if ( l3e_get_flags(*pl3e) & _PAGE_PRESENT )
@@ -6567,10 +6583,11 @@  void free_perdomain_mappings(struct vcpu *v)
     l3_pgentry_t *l3tab;
     unsigned int i;
 
-    if ( !d->arch.perdomain_l3_pg )
+    if ( !v->arch.pervcpu_l3_pg && !d->arch.perdomain_l3_pg )
         return;
 
-    l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+    l3tab = __map_domain_page(d->arch.vcpu_pt ? v->arch.pervcpu_l3_pg
+                                              : d->arch.perdomain_l3_pg);
 
     for ( i = 0; i < PERDOMAIN_SLOTS; ++i)
         if ( l3e_get_flags(l3tab[i]) & _PAGE_PRESENT )
@@ -6604,8 +6621,10 @@  void free_perdomain_mappings(struct vcpu *v)
         }
 
     unmap_domain_page(l3tab);
-    free_domheap_page(d->arch.perdomain_l3_pg);
+    free_domheap_page(d->arch.vcpu_pt ? v->arch.pervcpu_l3_pg
+                                      : d->arch.perdomain_l3_pg);
     d->arch.perdomain_l3_pg = NULL;
+    v->arch.pervcpu_l3_pg = NULL;
 }
 
 static void write_sss_token(unsigned long *ptr)
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index ec5043a8aa9e..c7d9bf7c71bf 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -402,7 +402,7 @@  static mfn_t hap_make_monitor_table(struct vcpu *v)
     m4mfn = page_to_mfn(pg);
     l4e = map_domain_page(m4mfn);
 
-    init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false);
+    init_xen_l4_slots(l4e, m4mfn, v, INVALID_MFN, false);
     unmap_domain_page(l4e);
 
     return m4mfn;
diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c
index 114957a3e1ec..d588dbbae003 100644
--- a/xen/arch/x86/mm/shadow/hvm.c
+++ b/xen/arch/x86/mm/shadow/hvm.c
@@ -776,7 +776,7 @@  mfn_t sh_make_monitor_table(const struct vcpu *v, unsigned int shadow_levels)
      * shadow-linear mapping will either be inserted below when creating
      * lower level monitor tables, or later in sh_update_cr3().
      */
-    init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false);
+    init_xen_l4_slots(l4e, m4mfn, v, INVALID_MFN, false);
 
     if ( shadow_levels < 4 )
     {
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 10ddc408ff73..a1f8147e197a 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -973,7 +973,7 @@  sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
 
             BUILD_BUG_ON(sizeof(l4_pgentry_t) != sizeof(shadow_l4e_t));
 
-            init_xen_l4_slots(l4t, gmfn, d, smfn, (!is_pv_32bit_domain(d) &&
+            init_xen_l4_slots(l4t, gmfn, v, smfn, (!is_pv_32bit_domain(d) &&
                                                    VM_ASSIST(d, m2p_strict)));
             unmap_domain_page(l4t);
         }
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index f54d1da5c6f4..5081c19b9a9a 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -737,7 +737,7 @@  static int __init dom0_construct(struct boot_info *bi, struct domain *d)
         l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
         clear_page(l4tab);
         init_xen_l4_slots(l4tab, _mfn(virt_to_mfn(l4start)),
-                          d, INVALID_MFN, true);
+                          d->vcpu[0], INVALID_MFN, true);
         v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
     }
     else
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 5bda168eadff..8d2428051607 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -125,7 +125,7 @@  static int setup_compat_l4(struct vcpu *v)
     mfn = page_to_mfn(pg);
     l4tab = map_domain_page(mfn);
     clear_page(l4tab);
-    init_xen_l4_slots(l4tab, mfn, v->domain, INVALID_MFN, false);
+    init_xen_l4_slots(l4tab, mfn, v, INVALID_MFN, false);
     unmap_domain_page(l4tab);
 
     /* This page needs to look like a pagetable so that it can be shadowed */