@@ -168,7 +168,7 @@
/* Slot 260: per-domain mappings (including map cache). */
#define PERDOMAIN_VIRT_START (PML4_ADDR(260))
#define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
-#define PERDOMAIN_SLOTS 3
+#define PERDOMAIN_SLOTS 4
#define PERDOMAIN_VIRT_SLOT(s) (PERDOMAIN_VIRT_START + (s) * \
(PERDOMAIN_SLOT_MBYTES << 20))
/* Slot 4: mirror of per-domain mappings (for compat xlat area accesses). */
@@ -282,6 +282,14 @@ extern unsigned long xen_phys_start;
#define ARG_XLAT_START(v) \
(ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
+/* pv_root_pt mapping area. The fourth per-domain-mapping sub-area */
+#define PV_ROOT_PT_MAPPING_VIRT_START PERDOMAIN_VIRT_SLOT(3)
+#define PV_ROOT_PT_MAPPING_ENTRIES MAX_VIRT_CPUS
+
+/* The address of a particular VCPU's PV_ROOT_PT */
+#define PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v) \
+ (PV_ROOT_PT_MAPPING_VIRT_START + ((v)->vcpu_id * PAGE_SIZE))
+
#define ELFSIZE 64
#define ARCH_CRASH_SAVE_VMCOREINFO
@@ -273,6 +273,9 @@ struct pv_domain
{
l1_pgentry_t **gdt_ldt_l1tab;
+ /* Array of pointers to the l1 PTs holding PV root PTs of each vCPU */
+ l1_pgentry_t **root_pt_l1tab;
+
atomic_t nr_l4_pages;
/* Is a 32-bit PV guest? */
@@ -527,6 +527,14 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
v->arch.cr3 |= get_pcid_bits(v, false);
}
+/* Index of the l1 PT that maps v's root PT */
+#define pv_root_pt_idx(v) ((v)->vcpu_id / L1_PAGETABLE_ENTRIES)
+
+/* Pointer to the PTE that maps v's root PT in the perdomain area */
+#define pv_root_pt_pte(v) \
+ ((v)->domain->arch.pv.root_pt_l1tab[pv_root_pt_idx(v)] + \
+ ((v)->vcpu_id & (L1_PAGETABLE_ENTRIES - 1)))
+
void write_ptbase(struct vcpu *v)
{
const struct domain *d = v->domain;
@@ -538,11 +546,16 @@ void write_ptbase(struct vcpu *v)
if ( is_pv_domain(d) && d->arch.pv.xpti )
{
+ mfn_t guest_root_pt = _mfn(MASK_EXTR(v->arch.cr3, X86_CR3_ADDR_MASK));
+ l1_pgentry_t *pte = pv_root_pt_pte(v);
+
cpu_info->root_pgt_changed = true;
cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
if ( new_cr4 & X86_CR4_PCIDE )
cpu_info->pv_cr3 |= get_pcid_bits(v, true);
switch_cr3_cr4(v->arch.cr3, new_cr4);
+
+ l1e_write(pte, l1e_from_mfn(guest_root_pt, __PAGE_HYPERVISOR_RO));
}
else
{
@@ -289,6 +289,20 @@ static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v)
1U << GDT_LDT_VCPU_SHIFT);
}
+static int pv_create_root_pt_l1tab(const struct vcpu *v)
+{
+ return create_perdomain_mapping(v->domain,
+ PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v),
+ 1, v->domain->arch.pv.root_pt_l1tab,
+ NULL);
+}
+
+static void pv_destroy_root_pt_l1tab(const struct vcpu *v)
+{
+ destroy_perdomain_mapping(v->domain,
+ PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v), 1);
+}
+
void pv_vcpu_destroy(struct vcpu *v)
{
if ( is_pv_32bit_vcpu(v) )
@@ -298,6 +312,7 @@ void pv_vcpu_destroy(struct vcpu *v)
}
pv_destroy_gdt_ldt_l1tab(v);
+ pv_destroy_root_pt_l1tab(v);
XFREE(v->arch.pv.trap_ctxt);
}
@@ -312,6 +327,13 @@ int pv_vcpu_initialise(struct vcpu *v)
if ( rc )
return rc;
+ if ( v->domain->arch.pv.xpti )
+ {
+ rc = pv_create_root_pt_l1tab(v);
+ if ( rc )
+ goto done;
+ }
+
BUILD_BUG_ON(X86_NR_VECTORS * sizeof(*v->arch.pv.trap_ctxt) >
PAGE_SIZE);
v->arch.pv.trap_ctxt = xzalloc_array(struct trap_info, X86_NR_VECTORS);
@@ -347,10 +369,12 @@ void pv_domain_destroy(struct domain *d)
destroy_perdomain_mapping(d, GDT_LDT_VIRT_START,
GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
+ destroy_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START, d->max_vcpus);
XFREE(d->arch.pv.cpuidmasks);
FREE_XENHEAP_PAGE(d->arch.pv.gdt_ldt_l1tab);
+ XFREE(d->arch.pv.root_pt_l1tab);
}
void noreturn cf_check continue_pv_domain(void);
@@ -376,8 +400,22 @@ int pv_domain_initialise(struct domain *d)
(d->arch.pv.cpuidmasks = xmemdup(&cpuidmask_defaults)) == NULL )
goto fail;
+ rc = create_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START,
+ d->max_vcpus, NULL, NULL);
+ if ( rc )
+ goto fail;
+
d->arch.ctxt_switch = &pv_csw;
+ if ( d->arch.pv.xpti )
+ {
+ d->arch.pv.root_pt_l1tab =
+ xzalloc_array(l1_pgentry_t *,
+ DIV_ROUND_UP(d->max_vcpus, L1_PAGETABLE_ENTRIES));
+ if ( !d->arch.pv.root_pt_l1tab )
+ goto fail;
+ }
+
if ( !is_pv_32bit_domain(d) && use_invpcid && cpu_has_pcid )
switch ( ACCESS_ONCE(opt_pcid) )
{
@@ -451,7 +489,8 @@ static void _toggle_guest_pt(struct vcpu *v)
guest_update = false;
}
}
- write_cr3(cr3);
+
+ write_ptbase(v);
if ( !pagetable_is_null(old_shadow) )
shadow_put_top_level(v->domain, old_shadow);
@@ -491,9 +530,6 @@ void toggle_guest_mode(struct vcpu *v)
{
struct cpu_info *cpu_info = get_cpu_info();
- cpu_info->root_pgt_changed = true;
- cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)) |
- (d->arch.pv.pcid ? get_pcid_bits(v, true) : 0);
/*
* As in _toggle_guest_pt() the XPTI CR3 write needs to be a TLB-
* flushing one too for shadow mode guests.
@@ -80,6 +80,7 @@ void __dummy__(void)
#undef OFFSET_EF
+ OFFSET(VCPU_id, struct vcpu, vcpu_id);
OFFSET(VCPU_processor, struct vcpu, processor);
OFFSET(VCPU_domain, struct vcpu, domain);
OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map);
@@ -170,9 +170,16 @@ FUNC_LOCAL(restore_all_guest)
movabs $PADDR_MASK & PAGE_MASK, %rsi
movabs $DIRECTMAP_VIRT_START, %rcx
and %rsi, %rdi
- and %r9, %rsi
add %rcx, %rdi
+
+ /*
+ * The address in the vCPU cr3 is always mapped in the per-domain
+ * pv_root_pt virt area.
+ */
+ imul $PAGE_SIZE, VCPU_id(%rbx), %esi
+ movabs $PV_ROOT_PT_MAPPING_VIRT_START, %rcx
add %rcx, %rsi
+
mov $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx
mov root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8
mov %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi)