@@ -17,6 +17,7 @@
#include <asm/nops.h>
#include <asm/page.h>
#include <asm/pv/domain.h>
+#include <asm/pv/mm.h>
#include <asm/spec_ctrl.h>
/* Debug builds: Wrap frequently to stress-test the wrap logic. */
@@ -192,7 +193,28 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
unsigned int order = (flags - 1) & FLUSH_ORDER_MASK;
if ( flags & FLUSH_ROOT_PGTBL )
+ {
get_cpu_info()->root_pgt_changed = true;
+ /*
+ * Use opt_vcpu_pt_pv instead of current->arch.vcpu_pt to avoid doing a
+ * sync_local_execstate() when per-vCPU page-tables are not enabled for
+ * PV.
+ */
+ if ( opt_vcpu_pt_pv )
+ {
+ const struct vcpu *curr;
+ const struct domain *curr_d;
+
+ sync_local_execstate();
+
+ curr = current;
+ curr_d = curr->domain;
+
+ if ( is_pv_domain(curr_d) && curr_d->arch.vcpu_pt )
+ /* Update shadow root page-table ahead of doing TLB flush. */
+ pv_asi_update_shadow_l4(curr);
+ }
+ }
if ( flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL) )
{
@@ -265,6 +265,12 @@ extern unsigned long xen_phys_start;
/* The address of a particular VCPU's GDT or LDT. */
#define GDT_VIRT_START(v) \
(PERDOMAIN_VIRT_START + ((v)->vcpu_id << GDT_LDT_VCPU_VA_SHIFT))
+/*
+ * There are 2 GDT pages reserved for Xen, but only one is used. Use the
+ * remaining one to map the guest L4 when running with ASI enabled.
+ */
+#define L4_SHADOW(v) \
+ (GDT_VIRT_START(v) + ((FIRST_RESERVED_GDT_PAGE + 1) << PAGE_SHIFT))
#define LDT_VIRT_START(v) \
(GDT_VIRT_START(v) + (64*1024))
@@ -591,6 +591,9 @@ struct pv_vcpu
/* Deferred VA-based update state. */
bool need_update_runstate_area;
struct vcpu_time_info pending_system_time;
+
+ /* For ASI: page to use as L4 shadow of the guest selected L4. */
+ root_pgentry_t *root_pgt;
};
struct arch_vcpu
@@ -23,6 +23,8 @@ bool pv_destroy_ldt(struct vcpu *v);
int validate_segdesc_page(struct page_info *page);
+void pv_asi_update_shadow_l4(const struct vcpu *v);
+
#else
#include <xen/errno.h>
@@ -44,6 +46,9 @@ static inline bool pv_map_ldt_shadow_page(unsigned int off) { return false; }
static inline bool pv_destroy_ldt(struct vcpu *v)
{ ASSERT_UNREACHABLE(); return false; }
+static inline void pv_asi_update_shadow_l4(const struct vcpu *v)
+{ ASSERT_UNREACHABLE(); }
+
#endif
#endif /* __X86_PV_MM_H__ */
@@ -546,6 +546,8 @@ void write_ptbase(struct vcpu *v)
}
else
{
+ if ( is_pv_domain(d) && d->arch.vcpu_pt )
+ pv_asi_update_shadow_l4(v);
/* Make sure to clear use_pv_cr3 and xen_cr3 before pv_cr3. */
cpu_info->use_pv_cr3 = false;
cpu_info->xen_cr3 = 0;
@@ -565,6 +567,7 @@ void write_ptbase(struct vcpu *v)
*/
pagetable_t update_cr3(struct vcpu *v)
{
+ const struct domain *d = v->domain;
mfn_t cr3_mfn;
if ( paging_mode_enabled(v->domain) )
@@ -575,7 +578,14 @@ pagetable_t update_cr3(struct vcpu *v)
else
cr3_mfn = pagetable_get_mfn(v->arch.guest_table);
- make_cr3(v, cr3_mfn);
+ make_cr3(v, d->arch.vcpu_pt ? virt_to_mfn(v->arch.pv.root_pgt) : cr3_mfn);
+
+ if ( d->arch.vcpu_pt )
+ {
+ populate_perdomain_mapping(v, L4_SHADOW(v), &cr3_mfn, 1);
+ if ( v == this_cpu(curr_vcpu) )
+ flush_tlb_one_local(L4_SHADOW(v));
+ }
return pagetable_null();
}
@@ -695,6 +695,12 @@ int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
return -EINVAL;
}
+ if ( is_pv_domain(d) && d->arch.vcpu_pt )
+ {
+ gprintk(XENLOG_ERR, "Paging not supported on PV domains with ASI\n");
+ return -EOPNOTSUPP;
+ }
+
if ( resuming
? (d->arch.paging.preempt.dom != current->domain ||
d->arch.paging.preempt.op != sc->op)
@@ -838,8 +838,11 @@ static int __init dom0_construct(struct boot_info *bi, struct domain *d)
d->arch.paging.mode = 0;
- /* Set up CR3 value for switch_cr3_cr4(). */
- update_cr3(v);
+ /*
+ * Set up CR3 value for switch_cr3_cr4(). Use make_cr3() instead of
+ * update_cr3() to avoid using an ASI page-table for dom0 building.
+ */
+ make_cr3(v, pagetable_get_mfn(v->arch.guest_table));
/* We run on dom0's page tables for the final part of the build process. */
switch_cr3_cr4(cr3_pa(v->arch.cr3), read_cr4());
@@ -1068,6 +1071,9 @@ static int __init dom0_construct(struct boot_info *bi, struct domain *d)
}
#endif
+ /* Must be called in case ASI is enabled. */
+ update_cr3(v);
+
v->is_initialised = 1;
clear_bit(_VPF_down, &v->pause_flags);
@@ -15,6 +15,7 @@
#include <asm/invpcid.h>
#include <asm/spec_ctrl.h>
#include <asm/pv/domain.h>
+#include <asm/pv/mm.h>
#include <asm/shadow.h>
#ifdef CONFIG_PV32
@@ -296,6 +297,7 @@ void pv_vcpu_destroy(struct vcpu *v)
pv_destroy_gdt_ldt_l1tab(v);
XFREE(v->arch.pv.trap_ctxt);
+ FREE_XENHEAP_PAGE(v->arch.pv.root_pgt);
}
int pv_vcpu_initialise(struct vcpu *v)
@@ -336,6 +338,24 @@ int pv_vcpu_initialise(struct vcpu *v)
goto done;
}
+ if ( d->arch.vcpu_pt )
+ {
+ v->arch.pv.root_pgt = alloc_xenheap_page();
+ if ( !v->arch.pv.root_pgt )
+ {
+ rc = -ENOMEM;
+ goto done;
+ }
+
+ /*
+ * VM assists are not yet known, RO machine-to-phys slot will be copied
+ * from the guest L4.
+ */
+ init_xen_l4_slots(v->arch.pv.root_pgt,
+ _mfn(virt_to_mfn(v->arch.pv.root_pgt)),
+ v, INVALID_MFN, false);
+ }
+
done:
if ( rc )
pv_vcpu_destroy(v);
@@ -368,7 +388,7 @@ int pv_domain_initialise(struct domain *d)
d->arch.ctxt_switch = &pv_csw;
- d->arch.pv.flush_root_pt = d->arch.pv.xpti;
+ d->arch.pv.flush_root_pt = d->arch.pv.xpti || d->arch.vcpu_pt;
if ( !is_pv_32bit_domain(d) && use_invpcid && cpu_has_pcid )
switch ( ACCESS_ONCE(opt_pcid) )
@@ -409,6 +429,7 @@ bool __init xpti_pcid_enabled(void)
static void _toggle_guest_pt(struct vcpu *v)
{
+ const struct domain *d = v->domain;
bool guest_update;
pagetable_t old_shadow;
unsigned long cr3;
@@ -417,6 +438,14 @@ static void _toggle_guest_pt(struct vcpu *v)
guest_update = v->arch.flags & TF_kernel_mode;
old_shadow = update_cr3(v);
+ if ( d->arch.vcpu_pt )
+ /*
+ * _toggle_guest_pt() might switch between user and kernel page tables,
+ * but doesn't use write_ptbase(), and hence needs an explicit call to
+ * sync the shadow L4.
+ */
+ pv_asi_update_shadow_l4(v);
+
/*
* Don't flush user global mappings from the TLB. Don't tick TLB clock.
*
@@ -12,6 +12,7 @@
#include <asm/current.h>
#include <asm/p2m.h>
+#include <asm/pv/domain.h>
#include "mm.h"
@@ -104,6 +105,45 @@ void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d)
}
#endif
+void pv_asi_update_shadow_l4(const struct vcpu *v)
+{
+ const root_pgentry_t *guest_pgt;
+ root_pgentry_t *root_pgt = v->arch.pv.root_pgt;
+ const struct domain *d = v->domain;
+
+ ASSERT(!d->arch.pv.xpti);
+ ASSERT(is_pv_domain(d));
+ ASSERT(!is_idle_domain(d));
+ ASSERT(current == this_cpu(curr_vcpu));
+
+ if ( likely(v == current) )
+ guest_pgt = (void *)L4_SHADOW(v);
+ else if ( !(v->arch.flags & TF_kernel_mode) )
+ guest_pgt =
+ map_domain_page(pagetable_get_mfn(v->arch.guest_table_user));
+ else
+ guest_pgt = map_domain_page(pagetable_get_mfn(v->arch.guest_table));
+
+ if ( is_pv_64bit_domain(d) )
+ {
+ unsigned int i;
+
+ for ( i = 0; i < ROOT_PAGETABLE_FIRST_XEN_SLOT; i++ )
+ l4e_write(&root_pgt[i], guest_pgt[i]);
+ for ( i = ROOT_PAGETABLE_LAST_XEN_SLOT + 1;
+ i < L4_PAGETABLE_ENTRIES; i++ )
+ l4e_write(&root_pgt[i], guest_pgt[i]);
+
+ l4e_write(&root_pgt[l4_table_offset(RO_MPT_VIRT_START)],
+ guest_pgt[l4_table_offset(RO_MPT_VIRT_START)]);
+ }
+ else
+ l4e_write(&root_pgt[0], guest_pgt[0]);
+
+ if ( v != this_cpu(curr_vcpu) )
+ unmap_domain_page(guest_pgt);
+}
+
/*
* Local variables:
* mode: C
When running PV guests it's possible for the guest to use the same root page table (L4) for all vCPUs, which in turn will result in Xen also using the same root page table on all pCPUs that are running any domain vCPU. When using XPTI Xen switches to a per-CPU shadow L4 when running in guest context, switching to the fully populated L4 when in Xen context. Take advantage of this existing shadowing and force the usage of a per-CPU L4 that shadows the guest selected L4 when Address Space Isolation is requested for PV guests. The mapping of the guest L4 is done with a per-CPU fixmap entry, that however requires that the currently loaded L4 has the per-CPU slot setup. In order to ensure this switch to the shadow per-CPU L4 with just the Xen slots populated, and then map the guest L4 and copy the contents of the guest controlled slots. Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> --- xen/arch/x86/flushtlb.c | 22 +++++++++++++++++ xen/arch/x86/include/asm/config.h | 6 +++++ xen/arch/x86/include/asm/domain.h | 3 +++ xen/arch/x86/include/asm/pv/mm.h | 5 ++++ xen/arch/x86/mm.c | 12 +++++++++- xen/arch/x86/mm/paging.c | 6 +++++ xen/arch/x86/pv/dom0_build.c | 10 ++++++-- xen/arch/x86/pv/domain.c | 31 +++++++++++++++++++++++- xen/arch/x86/pv/mm.c | 40 +++++++++++++++++++++++++++++++ 9 files changed, 131 insertions(+), 4 deletions(-)