diff mbox series

[v3,5/5] x86/pvh: Add 64bit relocation page tables

Message ID 20240823193630.2583107-6-jason.andryuk@amd.com (mailing list archive)
State Accepted
Commit c727a18ca391d8d9bac9d39caea6dac9c4391c59
Headers show
Series x86/pvh: Make 64bit PVH entry relocatable | expand

Commit Message

Jason Andryuk Aug. 23, 2024, 7:36 p.m. UTC
The PVH entry point is 32bit.  For a 64bit kernel, the entry point must
switch to 64bit mode, which requires a set of page tables.  In the past,
PVH used init_top_pgt.

This works fine when the kernel is loaded at LOAD_PHYSICAL_ADDR, as the
page tables are prebuilt for this address.  If the kernel is loaded at a
different address, they need to be adjusted.

__startup_64() adjusts the prebuilt page tables for the physical load
address, but it is 64bit code.  The 32bit PVH entry code can't call it
to adjust the page tables, so it can't readily be re-used.

64bit PVH entry needs page tables set up for identity map, the kernel
high map and the direct map.  pvh_start_xen() enters identity mapped.
Inside xen_prepare_pvh(), it jumps through a pv_ops function pointer
into the highmap.  The direct map is used for __va() on the initramfs
and other guest physical addresses.

Add a dedicated set of prebuild page tables for PVH entry.  They are
adjusted in assembly before loading.

Add XEN_ELFNOTE_PHYS32_RELOC to indicate support for relocation
along with the kernel's loading constraints.  The maximum load address,
KERNEL_IMAGE_SIZE - 1, is determined by a single pvh_level2_ident_pgt
page.  It could be larger with more pages.

Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
---
v3:
Include asm/pgtable.h to avoid 32bit build failure

v2:
Use some defines: PTRS_PER_PGD, PTRS_PER_PMD, PAGE_SIZE
Add some spaces around operators and after commas
Include asm/pgtable_64.h
s/LOAD_PHYSICAL_ADDR/_pa(pvh_start_xen)/ in case they differ
---
 arch/x86/platform/pvh/head.S | 104 ++++++++++++++++++++++++++++++++++-
 1 file changed, 103 insertions(+), 1 deletion(-)

Comments

Jürgen Groß Sept. 16, 2024, 6:33 a.m. UTC | #1
On 23.08.24 21:36, Jason Andryuk wrote:
> The PVH entry point is 32bit.  For a 64bit kernel, the entry point must
> switch to 64bit mode, which requires a set of page tables.  In the past,
> PVH used init_top_pgt.
> 
> This works fine when the kernel is loaded at LOAD_PHYSICAL_ADDR, as the
> page tables are prebuilt for this address.  If the kernel is loaded at a
> different address, they need to be adjusted.
> 
> __startup_64() adjusts the prebuilt page tables for the physical load
> address, but it is 64bit code.  The 32bit PVH entry code can't call it
> to adjust the page tables, so it can't readily be re-used.
> 
> 64bit PVH entry needs page tables set up for identity map, the kernel
> high map and the direct map.  pvh_start_xen() enters identity mapped.
> Inside xen_prepare_pvh(), it jumps through a pv_ops function pointer
> into the highmap.  The direct map is used for __va() on the initramfs
> and other guest physical addresses.
> 
> Add a dedicated set of prebuild page tables for PVH entry.  They are
> adjusted in assembly before loading.
> 
> Add XEN_ELFNOTE_PHYS32_RELOC to indicate support for relocation
> along with the kernel's loading constraints.  The maximum load address,
> KERNEL_IMAGE_SIZE - 1, is determined by a single pvh_level2_ident_pgt
> page.  It could be larger with more pages.
> 
> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen
diff mbox series

Patch

diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 14b4345d9bae..64fca49cd88f 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -16,6 +16,7 @@ 
 #include <asm/segment.h>
 #include <asm/asm.h>
 #include <asm/boot.h>
+#include <asm/pgtable.h>
 #include <asm/processor-flags.h>
 #include <asm/msr.h>
 #include <asm/nospec-branch.h>
@@ -102,8 +103,47 @@  SYM_CODE_START_LOCAL(pvh_start_xen)
 	btsl $_EFER_LME, %eax
 	wrmsr
 
+	mov %ebp, %ebx
+	subl $_pa(pvh_start_xen), %ebx /* offset */
+	jz .Lpagetable_done
+
+	/* Fixup page-tables for relocation. */
+	leal rva(pvh_init_top_pgt)(%ebp), %edi
+	movl $PTRS_PER_PGD, %ecx
+2:
+	testl $_PAGE_PRESENT, 0x00(%edi)
+	jz 1f
+	addl %ebx, 0x00(%edi)
+1:
+	addl $8, %edi
+	decl %ecx
+	jnz 2b
+
+	/* L3 ident has a single entry. */
+	leal rva(pvh_level3_ident_pgt)(%ebp), %edi
+	addl %ebx, 0x00(%edi)
+
+	leal rva(pvh_level3_kernel_pgt)(%ebp), %edi
+	addl %ebx, (PAGE_SIZE - 16)(%edi)
+	addl %ebx, (PAGE_SIZE - 8)(%edi)
+
+	/* pvh_level2_ident_pgt is fine - large pages */
+
+	/* pvh_level2_kernel_pgt needs adjustment - large pages */
+	leal rva(pvh_level2_kernel_pgt)(%ebp), %edi
+	movl $PTRS_PER_PMD, %ecx
+2:
+	testl $_PAGE_PRESENT, 0x00(%edi)
+	jz 1f
+	addl %ebx, 0x00(%edi)
+1:
+	addl $8, %edi
+	decl %ecx
+	jnz 2b
+
+.Lpagetable_done:
 	/* Enable pre-constructed page tables. */
-	leal rva(init_top_pgt)(%ebp), %eax
+	leal rva(pvh_init_top_pgt)(%ebp), %eax
 	mov %eax, %cr3
 	mov $(X86_CR0_PG | X86_CR0_PE), %eax
 	mov %eax, %cr0
@@ -198,5 +238,67 @@  SYM_DATA_START_LOCAL(early_stack)
 	.fill BOOT_STACK_SIZE, 1, 0
 SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
 
+#ifdef CONFIG_X86_64
+/*
+ * Xen PVH needs a set of identity mapped and kernel high mapping
+ * page tables.  pvh_start_xen starts running on the identity mapped
+ * page tables, but xen_prepare_pvh calls into the high mapping.
+ * These page tables need to be relocatable and are only used until
+ * startup_64 transitions to init_top_pgt.
+ */
+SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
+	.quad   pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+	.org    pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0
+	.quad   pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+	.org    pvh_init_top_pgt + L4_START_KERNEL * 8, 0
+	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+	.quad   pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+SYM_DATA_END(pvh_init_top_pgt)
+
+SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
+	.quad	pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+	.fill	511, 8, 0
+SYM_DATA_END(pvh_level3_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
+	/*
+	 * Since I easily can, map the first 1G.
+	 * Don't set NX because code runs from these pages.
+	 *
+	 * Note: This sets _PAGE_GLOBAL despite whether
+	 * the CPU supports it or it is enabled.  But,
+	 * the CPU should ignore the bit.
+	 */
+	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+SYM_DATA_END(pvh_level2_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
+	.fill	L3_START_KERNEL, 8, 0
+	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
+	.quad	pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+	.quad	0 /* no fixmap */
+SYM_DATA_END(pvh_level3_kernel_pgt)
+
+SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
+	/*
+	 * Kernel high mapping.
+	 *
+	 * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
+	 * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
+	 * 512 MiB otherwise.
+	 *
+	 * (NOTE: after that starts the module area, see MODULES_VADDR.)
+	 *
+	 * This table is eventually used by the kernel during normal runtime.
+	 * Care must be taken to clear out undesired bits later, like _PAGE_RW
+	 * or _PAGE_GLOBAL in some cases.
+	 */
+	PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE)
+SYM_DATA_END(pvh_level2_kernel_pgt)
+
+	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
+		     .long CONFIG_PHYSICAL_ALIGN;
+		     .long LOAD_PHYSICAL_ADDR;
+		     .long KERNEL_IMAGE_SIZE - 1)
+#endif
+
 	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
 	             _ASM_PTR (pvh_start_xen - __START_KERNEL_map))