Message ID | c0e531fc665c9ad7595d853e2ce631a13974c022.camel@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2,1/6] x86/boot: Remove gratuitous call back into low-memory code | expand |
On 09.08.2019 17:02, David Woodhouse wrote: > From: David Woodhouse <dwmw@amazon.co.uk> > > Where booted from EFI or with no-real-mode, there is no need to stomp > on low memory with the 16-boot code. Instead, just go straight to > trampoline_protmode_entry() at its physical location within the Xen > image. > > For now, the boot code (including the EFI loader path) still determines > what the trampoline_phys address should be. The trampoline is actually > relocated for that address and copied into low memory, from a > relocate_trampoline() call made from __start_xen(). I assume this talks about the real mode part of the trampoline, as opposed to the next paragraph? Would be nice if you made this explicit. > For subsequent AP startup and wakeup, the 32-bit trampoline can't > trivially be used in-place as that region isn't mapped. So copy it > down to low memory too, having relocated it (again) to work from > there. trampoline_protmode_entry gets entered with CR0.PG=0, i.e. at that point there's not even the question yet of there being a mapping. Subsequently idle_pg_table gets loaded into CR3. I wonder if, rather than relocating the 32-bit part of the trampoline, it wouldn't be better to install a 1:1 mapping into idle_pg_table. Such a mapping would need to have the G bits clear in order to not conflict with PV guest mappings of the same linear addresses. > --- a/xen/arch/x86/acpi/power.c > +++ b/xen/arch/x86/acpi/power.c > @@ -152,9 +152,9 @@ static void acpi_sleep_prepare(u32 state) > return; > > if ( acpi_sinfo.vector_width == 32 ) > - *(uint32_t *)wakeup_vector_va = bootsym_phys(wakeup_start); > + *(uint32_t *)wakeup_vector_va = trampsym_phys(wakeup_start); > else > - *(uint64_t *)wakeup_vector_va = bootsym_phys(wakeup_start); > + *(uint64_t *)wakeup_vector_va = trampsym_phys(wakeup_start); > } > > static void acpi_sleep_post(u32 state) {} > @@ -388,7 +388,7 @@ static void tboot_sleep(u8 sleep_state) > g_tboot_shared->acpi_sinfo.wakeup_vector = acpi_sinfo.wakeup_vector; > g_tboot_shared->acpi_sinfo.vector_width = acpi_sinfo.vector_width; > g_tboot_shared->acpi_sinfo.kernel_s3_resume_vector = > - bootsym_phys(wakeup_start); > + trampsym_phys(wakeup_start); Shouldn't changes like these have happened earlier, when you introduce the (logical only at that point) distinction between trampoline pieces? > @@ -97,7 +100,7 @@ GLOBAL(trampoline_realmode_entry) > cld > cli > lidt trampsym(idt_48) > - lgdt trampsym(gdt_48) > + lgdtl trampsym(gdt_48) Stray / unrelated change (and if needed, then also for lidt)? > @@ -236,11 +239,23 @@ gdt_48: .word 7*8-1 > > /* The first page of trampoline is permanent, the rest boot-time only. */ > /* Reuse the boot trampoline on the 1st trampoline page as stack for wakeup. */ > - .equ wakeup_stack, boot_trampoline_start + PAGE_SIZE > + .equ wakeup_stack, perm_trampoline_start + PAGE_SIZE > .global wakeup_stack > > +ENTRY(perm_trampoline_end) > + > /* From here on early boot only. */ > > +ENTRY(boot_trampoline_start) > + > + .word 0 > +boot16_idt: > + .word 0, 0, 0 # base = limit = 0 > + .word 0 > +boot16_gdt: > + .word 7*8-1 > + .long tramp32sym_rel(trampoline_gdt,4) Can we really not get away without a second copy of these? > @@ -304,8 +319,8 @@ trampoline_boot_cpu_entry: > cli > > /* Reset GDT and IDT. Some BIOSes clobber GDTR. */ > - lidt bootsym(idt_48) > - lgdt bootsym(gdt_48) > + lidt bootsym(boot16_idt) > + lgdtl bootsym(boot16_gdt) As above - either both should gain a suffix, or neither of them. > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -682,6 +682,42 @@ static unsigned int __init copy_bios_e820(struct e820entry *map, unsigned int li > return n; > } > > +extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[]; > +extern const s32 __trampoline32_rel_start[], __trampoline32_rel_stop[]; > + > +static void __init relocate_trampoline(unsigned long phys) > +{ > + const s32 *trampoline_ptr; > + uint32_t tramp32_delta = 0; > + > + /* Apply relocations to trampoline. */ > + for ( trampoline_ptr = __trampoline_rel_start; > + trampoline_ptr < __trampoline_rel_stop; > + ++trampoline_ptr ) > + *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; > + > + tramp32_delta = phys; Any reason this can't be the initializer of the variable, or the zero initializer above can't be dropped? > + if (!efi_enabled(EFI_LOADER)) { Style (missing blanks inside the parentheses, and brace to go on its own line). > --- a/xen/include/asm-x86/config.h > +++ b/xen/include/asm-x86/config.h > @@ -89,12 +89,12 @@ > > #ifndef __ASSEMBLY__ > extern unsigned long trampoline_phys; > -#define bootsym_phys(sym) \ > - (((unsigned long)&(sym)-(unsigned long)&boot_trampoline_start)+trampoline_phys) > -#define bootsym(sym) \ > +#define trampsym_phys(sym) \ > + (((unsigned long)&(sym)-(unsigned long)&perm_trampoline_start)+trampoline_phys) > +#define trampsym(sym) \ > (*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))), \ > - trampoline_phys-__pa(boot_trampoline_start))) > -extern char boot_trampoline_start[], boot_trampoline_end[]; > + trampoline_phys-__pa(perm_trampoline_start))) As you're touching these, could you please also insert the missing blanks around the binary + and - ? Jan
On Mon, 2019-08-12 at 12:55 +0200, Jan Beulich wrote: > On 09.08.2019 17:02, David Woodhouse wrote: > > From: David Woodhouse <dwmw@amazon.co.uk> > > > > Where booted from EFI or with no-real-mode, there is no need to stomp > > on low memory with the 16-boot code. Instead, just go straight to > > trampoline_protmode_entry() at its physical location within the Xen > > image. > > > > For now, the boot code (including the EFI loader path) still determines > > what the trampoline_phys address should be. The trampoline is actually > > relocated for that address and copied into low memory, from a > > relocate_trampoline() call made from __start_xen(). > > I assume this talks about the real mode part of the trampoline, as > opposed to the next paragraph? Would be nice if you made this > explicit. This is the permanent real-mode trampoline used for AP startup and wakeup, not the real-mode boot code (which the boot code has to have put there for itself it it wanted it). I will try to make the commit message clearer; thanks for pointing it out. > > For subsequent AP startup and wakeup, the 32-bit trampoline can't > > trivially be used in-place as that region isn't mapped. So copy it > > down to low memory too, having relocated it (again) to work from > > there. > > trampoline_protmode_entry gets entered with CR0.PG=0, i.e. at > that point there's not even the question yet of there being a > mapping. Subsequently idle_pg_table gets loaded into CR3. I wonder > if, rather than relocating the 32-bit part of the trampoline, it > wouldn't be better to install a 1:1 mapping into idle_pg_table. > Such a mapping would need to have the G bits clear in order to > not conflict with PV guest mappings of the same linear addresses. Yeah, I tried making that happen. It made me sad. This seemed to be simpler and less fragile. > > --- a/xen/arch/x86/acpi/power.c > > +++ b/xen/arch/x86/acpi/power.c > > @@ -152,9 +152,9 @@ static void acpi_sleep_prepare(u32 state) > > return; > > > > if ( acpi_sinfo.vector_width == 32 ) > > - *(uint32_t *)wakeup_vector_va = bootsym_phys(wakeup_start); > > + *(uint32_t *)wakeup_vector_va = trampsym_phys(wakeup_start); > > else > > - *(uint64_t *)wakeup_vector_va = bootsym_phys(wakeup_start); > > + *(uint64_t *)wakeup_vector_va = trampsym_phys(wakeup_start); > > } > > > > static void acpi_sleep_post(u32 state) {} > > @@ -388,7 +388,7 @@ static void tboot_sleep(u8 sleep_state) > > g_tboot_shared->acpi_sinfo.wakeup_vector = acpi_sinfo.wakeup_vector; > > g_tboot_shared->acpi_sinfo.vector_width = acpi_sinfo.vector_width; > > g_tboot_shared->acpi_sinfo.kernel_s3_resume_vector = > > - bootsym_phys(wakeup_start); > > + trampsym_phys(wakeup_start); > > Shouldn't changes like these have happened earlier, when you > introduce the (logical only at that point) distinction between > trampoline pieces? That was in assembler code. This is C, which never had to be that involved with the distinction. But now that all the dust has settled, I'm making it consistent, using 'trampsym' for stuff in the permanent trampoline just like the asm code does. > > @@ -97,7 +100,7 @@ GLOBAL(trampoline_realmode_entry) > > cld > > cli > > lidt trampsym(idt_48) > > - lgdt trampsym(gdt_48) > > + lgdtl trampsym(gdt_48) > > Stray / unrelated change (and if needed, then also for lidt)? The difference between 16bit l.dt and 32-bit l.dtl is that the former only loads 24 bits of the actual table address (trampoline_gdt in this case). Thus, when trampoline_gdt is being used in-place, as it is during early boot, and *if* the Xen image is loaded higher than 16MiB, lgdt doesn't work. That's half a day of my life I want back. It doesn't matter for lidt because we're just loading an empty limit and pointer there, and we don't care about bits 24-31 of a zero value. > > @@ -236,11 +239,23 @@ gdt_48: .word 7*8-1 > > > > /* The first page of trampoline is permanent, the rest boot-time only. */ > > /* Reuse the boot trampoline on the 1st trampoline page as stack for wakeup. */ > > - .equ wakeup_stack, boot_trampoline_start + PAGE_SIZE > > + .equ wakeup_stack, perm_trampoline_start + PAGE_SIZE > > .global wakeup_stack > > > > +ENTRY(perm_trampoline_end) > > + > > /* From here on early boot only. */ > > > > +ENTRY(boot_trampoline_start) > > + > > + .word 0 > > +boot16_idt: > > + .word 0, 0, 0 # base = limit = 0 > > + .word 0 > > +boot16_gdt: > > + .word 7*8-1 > > + .long tramp32sym_rel(trampoline_gdt,4) > > Can we really not get away without a second copy of these? Probably, but my judgement was that the complexity and the pain of doing so would exceed the benefit. I'll take another look at doing so. > > @@ -304,8 +319,8 @@ trampoline_boot_cpu_entry: > > cli > > > > /* Reset GDT and IDT. Some BIOSes clobber GDTR. */ > > - lidt bootsym(idt_48) > > - lgdt bootsym(gdt_48) > > + lidt bootsym(boot16_idt) > > + lgdtl bootsym(boot16_gdt) > > As above - either both should gain a suffix, or neither of them. > > > --- a/xen/arch/x86/setup.c > > +++ b/xen/arch/x86/setup.c > > @@ -682,6 +682,42 @@ static unsigned int __init copy_bios_e820(struct e820entry *map, unsigned int li > > return n; > > } > > > > +extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[]; > > +extern const s32 __trampoline32_rel_start[], __trampoline32_rel_stop[]; > > + > > +static void __init relocate_trampoline(unsigned long phys) > > +{ > > + const s32 *trampoline_ptr; > > + uint32_t tramp32_delta = 0; > > + > > + /* Apply relocations to trampoline. */ > > + for ( trampoline_ptr = __trampoline_rel_start; > > + trampoline_ptr < __trampoline_rel_stop; > > + ++trampoline_ptr ) > > + *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; > > + > > + tramp32_delta = phys; > > Any reason this can't be the initializer of the variable, or the > zero initializer above can't be dropped? I can't think of one. I think I quite like the initial setting of tramp32_delta=phys to live *right* above the subsequent if(something) tramp32_delta-=something, to make it very clear what that calculation is. So maybe I'll just drop the pointless =0 initialiser. > > + if (!efi_enabled(EFI_LOADER)) { > > Style (missing blanks inside the parentheses, and brace to go on > its own line). Ack. You can take the Linux hacker out of the Linux kernel but... > > --- a/xen/include/asm-x86/config.h > > +++ b/xen/include/asm-x86/config.h > > @@ -89,12 +89,12 @@ > > > > #ifndef __ASSEMBLY__ > > extern unsigned long trampoline_phys; > > -#define bootsym_phys(sym) \ > > - (((unsigned long)&(sym)-(unsigned long)&boot_trampoline_start)+trampoline_phys) > > -#define bootsym(sym) \ > > +#define trampsym_phys(sym) \ > > + (((unsigned long)&(sym)-(unsigned long)&perm_trampoline_start)+trampoline_phys) > > +#define trampsym(sym) \ > > (*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))), \ > > - trampoline_phys-__pa(boot_trampoline_start))) > > -extern char boot_trampoline_start[], boot_trampoline_end[]; > > + trampoline_phys-__pa(perm_trampoline_start))) > > As you're touching these, could you please also insert the missing > blanks around the binary + and - ? Will do. Thanks.
On 19.08.2019 17:25, David Woodhouse wrote: > On Mon, 2019-08-12 at 12:55 +0200, Jan Beulich wrote: >> On 09.08.2019 17:02, David Woodhouse wrote: >>> @@ -97,7 +100,7 @@ GLOBAL(trampoline_realmode_entry) >>> cld >>> cli >>> lidt trampsym(idt_48) >>> - lgdt trampsym(gdt_48) >>> + lgdtl trampsym(gdt_48) >> >> Stray / unrelated change (and if needed, then also for lidt)? > > The difference between 16bit l.dt and 32-bit l.dtl is that the former > only loads 24 bits of the actual table address (trampoline_gdt in this > case). > > Thus, when trampoline_gdt is being used in-place, as it is during early > boot, and *if* the Xen image is loaded higher than 16MiB, lgdt doesn't > work. That's half a day of my life I want back. But isn't this an issue even independent of your series? I.e. doesn't this want to be fixed in a separate (to be backported) patch? Jan
> On 19.08.2019 17:25, David Woodhouse wrote: >> On Mon, 2019-08-12 at 12:55 +0200, Jan Beulich wrote: >>> On 09.08.2019 17:02, David Woodhouse wrote: >>>> @@ -97,7 +100,7 @@ GLOBAL(trampoline_realmode_entry) >>>> cld >>>> cli >>>> lidt trampsym(idt_48) >>>> - lgdt trampsym(gdt_48) >>>> + lgdtl trampsym(gdt_48) >>> >>> Stray / unrelated change (and if needed, then also for lidt)? >> >> The difference between 16bit l.dt and 32-bit l.dtl is that the former >> only loads 24 bits of the actual table address (trampoline_gdt in this >> case). >> >> Thus, when trampoline_gdt is being used in-place, as it is during early >> boot, and *if* the Xen image is loaded higher than 16MiB, lgdt doesn't >> work. That's half a day of my life I want back. > > But isn't this an issue even independent of your series? I.e. doesn't > this want to be fixed in a separate (to be backported) patch? Before my series it isn't used in place in the Xen image; it's also (mostly gratuitously) copied to low memory.
diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c index aecc754fdb..d2a355429e 100644 --- a/xen/arch/x86/acpi/power.c +++ b/xen/arch/x86/acpi/power.c @@ -152,9 +152,9 @@ static void acpi_sleep_prepare(u32 state) return; if ( acpi_sinfo.vector_width == 32 ) - *(uint32_t *)wakeup_vector_va = bootsym_phys(wakeup_start); + *(uint32_t *)wakeup_vector_va = trampsym_phys(wakeup_start); else - *(uint64_t *)wakeup_vector_va = bootsym_phys(wakeup_start); + *(uint64_t *)wakeup_vector_va = trampsym_phys(wakeup_start); } static void acpi_sleep_post(u32 state) {} @@ -388,7 +388,7 @@ static void tboot_sleep(u8 sleep_state) g_tboot_shared->acpi_sinfo.wakeup_vector = acpi_sinfo.wakeup_vector; g_tboot_shared->acpi_sinfo.vector_width = acpi_sinfo.vector_width; g_tboot_shared->acpi_sinfo.kernel_s3_resume_vector = - bootsym_phys(wakeup_start); + trampsym_phys(wakeup_start); switch ( sleep_state ) { diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S index 104eb0eb3c..2268ec99ed 100644 --- a/xen/arch/x86/boot/head.S +++ b/xen/arch/x86/boot/head.S @@ -697,16 +697,23 @@ trampoline_setup: lea __PAGE_HYPERVISOR+sym_esi(l1_identmap),%edi mov %edi,sym_fs(l2_bootmap) - /* Apply relocations to bootstrap trampoline. */ - mov sym_fs(trampoline_phys),%edx - mov $sym_offs(__trampoline_rel_start),%edi -1: - mov %fs:(%edi),%eax - add %edx,%fs:(%edi,%eax) - add $4,%edi - cmp $sym_offs(__trampoline_rel_stop),%edi - jb 1b + /* Do not parse command line on EFI platform here. */ + cmpb $0,sym_fs(efi_platform) + jnz 1f + /* Bail if there is no command line to parse. */ + mov sym_fs(multiboot_ptr),%ebx + testl $MBI_CMDLINE,MB_flags(%ebx) + jz 1f + + lea sym_esi(early_boot_opts),%eax + push %eax + pushl MB_cmdline(%ebx) + call cmdline_parse_early + +1: + /* Apply relocations to 32-bit trampoline for execution in place. */ + lea sym_esi(perm_trampoline_start),%edx mov $sym_offs(__trampoline32_rel_start),%edi 1: mov %fs:(%edi),%eax @@ -715,6 +722,21 @@ trampoline_setup: cmp $sym_offs(__trampoline32_rel_stop),%edi jb 1b + cmp $0,sym_esi(skip_realmode) + jz .Ldo_realmode + + /* Go directly to trampoline_protmode_entry at its physical address */ + lea trampoline_protmode_entry-__XEN_VIRT_START(%esi),%eax + pushl $BOOT_CS32 + push %eax + + /* EBX == 0 indicates we are the BP (Boot Processor). */ + xor %ebx,%ebx + retl + +.Ldo_realmode: + /* Apply relocations to 16-bit boot code. */ + mov sym_fs(trampoline_phys),%edx mov $sym_offs(__bootsym_rel_start),%edi 1: mov %fs:(%edi),%eax @@ -744,35 +766,12 @@ trampoline_setup: cmp $sym_offs(__bootdatasym_rel_stop),%edi jb 1b - /* Do not parse command line on EFI platform here. */ - cmpb $0,sym_fs(efi_platform) - jnz 1f - - /* Bail if there is no command line to parse. */ - mov sym_fs(multiboot_ptr),%ebx - testl $MBI_CMDLINE,MB_flags(%ebx) - jz 1f - - lea sym_esi(early_boot_opts),%eax - push %eax - pushl MB_cmdline(%ebx) - call cmdline_parse_early - -1: /* Switch to low-memory stack which lives at the end of trampoline region. */ mov sym_fs(trampoline_phys),%edi lea TRAMPOLINE_SPACE+TRAMPOLINE_STACK_SPACE(%edi),%esp - cmpb $0, sym_fs(skip_realmode) - jz 1f - /* If no-real-mode, jump straight to trampoline_protmode_entry */ - lea trampoline_protmode_entry-boot_trampoline_start(%edi),%eax - /* EBX == 0 indicates we are the BP (Boot Processor). */ - xor %ebx,%ebx - jmp 2f -1: + /* Go via 16-bit code in trampoline_boot_cpu_entry */ lea trampoline_boot_cpu_entry-boot_trampoline_start(%edi),%eax -2: pushl $BOOT_CS32 push %eax @@ -795,8 +794,6 @@ cmdline_parse_early: reloc: #include "reloc.S" -ENTRY(boot_trampoline_start) #include "trampoline.S" -ENTRY(boot_trampoline_end) #include "x86_64.S" diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S index b5517a44bb..7b9ebb6172 100644 --- a/xen/arch/x86/boot/trampoline.S +++ b/xen/arch/x86/boot/trampoline.S @@ -60,7 +60,7 @@ GLOBAL(bootdata_start) .popsection #undef trampsym -#define trampsym(s) ((s)-boot_trampoline_start) +#define trampsym(s) ((s)-perm_trampoline_start) #define trampsym_rel(sym, off, opnd...) \ trampsym(sym),##opnd; \ @@ -70,7 +70,7 @@ GLOBAL(bootdata_start) .popsection #undef tramp32sym -#define tramp32sym(s) ((s)-boot_trampoline_start) +#define tramp32sym(s) ((s)-perm_trampoline_start) #define tramp32sym_rel(sym, off, opnd...) \ tramp32sym(sym),##opnd; \ @@ -83,6 +83,8 @@ GLOBAL(bootdata_start) .code16 +ENTRY(perm_trampoline_start) + /* * do_boot_cpu() programs the Startup-IPI to point here. Due to the SIPI * format, the relocated entrypoint must be 4k aligned. @@ -90,6 +92,7 @@ GLOBAL(bootdata_start) * It is entered in Real Mode, with %cs = trampoline_realmode_entry >> 4 and * %ip = 0. */ + GLOBAL(trampoline_realmode_entry) mov %cs,%ax mov %ax,%ds @@ -97,7 +100,7 @@ GLOBAL(trampoline_realmode_entry) cld cli lidt trampsym(idt_48) - lgdt trampsym(gdt_48) + lgdtl trampsym(gdt_48) mov $1,%bl # EBX != 0 indicates we are an AP xor %ax, %ax inc %ax @@ -236,11 +239,23 @@ gdt_48: .word 7*8-1 /* The first page of trampoline is permanent, the rest boot-time only. */ /* Reuse the boot trampoline on the 1st trampoline page as stack for wakeup. */ - .equ wakeup_stack, boot_trampoline_start + PAGE_SIZE + .equ wakeup_stack, perm_trampoline_start + PAGE_SIZE .global wakeup_stack +ENTRY(perm_trampoline_end) + /* From here on early boot only. */ +ENTRY(boot_trampoline_start) + + .word 0 +boot16_idt: + .word 0, 0, 0 # base = limit = 0 + .word 0 +boot16_gdt: + .word 7*8-1 + .long tramp32sym_rel(trampoline_gdt,4) + .code32 trampoline_boot_cpu_entry: /* Load pseudo-real-mode segments. */ @@ -304,8 +319,8 @@ trampoline_boot_cpu_entry: cli /* Reset GDT and IDT. Some BIOSes clobber GDTR. */ - lidt bootsym(idt_48) - lgdt bootsym(gdt_48) + lidt bootsym(boot16_idt) + lgdtl bootsym(boot16_gdt) /* Enter protected mode, and flush insn queue. */ xor %ax,%ax @@ -343,7 +358,8 @@ trampoline_boot_cpu_entry: xor %ebx,%ebx /* Jump to the common bootstrap entry point. */ - jmp trampoline_protmode_entry + mov $tramp32sym_rel(trampoline_protmode_entry,4,%eax) + jmp *%eax #include "video.h" @@ -379,6 +395,8 @@ rm_idt: .word 256*4-1, 0, 0 #include "video.S" #endif +ENTRY(boot_trampoline_end) + .pushsection .data.boot16, "aw", @progbits GLOBAL(bootdata_end) .popsection diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c index 7478e21177..d202216739 100644 --- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -389,7 +389,7 @@ static void generic_identify(struct cpuinfo_x86 *c) &c->x86_capability[cpufeat_word(X86_FEATURE_LAHF_LM)], &c->x86_capability[cpufeat_word(X86_FEATURE_SYSCALL)]); if (c == &boot_cpu_data) - bootsym(cpuid_ext_features) = + trampsym(cpuid_ext_features) = c->x86_capability[cpufeat_word(X86_FEATURE_NX)]; if (c->extended_cpuid_level >= 0x80000004) diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c index 5356a6ae10..1970eb1848 100644 --- a/xen/arch/x86/cpu/intel.c +++ b/xen/arch/x86/cpu/intel.c @@ -269,7 +269,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) MSR_IA32_MISC_ENABLE_XD_DISABLE); if (disable) { wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); - bootsym(trampoline_misc_enable_off) |= disable; + trampsym(trampoline_misc_enable_off) |= disable; } if (disable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h index fc2ea554b5..e7418894a4 100644 --- a/xen/arch/x86/efi/efi-boot.h +++ b/xen/arch/x86/efi/efi-boot.h @@ -98,29 +98,6 @@ static void __init efi_arch_relocate_image(unsigned long delta) } } -extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[]; -extern const s32 __trampoline32_rel_start[], __trampoline32_rel_stop[]; - -static void __init relocate_trampoline(unsigned long phys) -{ - const s32 *trampoline_ptr; - - trampoline_phys = phys; - - if ( !efi_enabled(EFI_LOADER) ) - return; - - /* Apply relocations to trampoline. */ - for ( trampoline_ptr = __trampoline_rel_start; - trampoline_ptr < __trampoline_rel_stop; - ++trampoline_ptr ) - *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; - for ( trampoline_ptr = __trampoline32_rel_start; - trampoline_ptr < __trampoline32_rel_stop; - ++trampoline_ptr ) - *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; -} - static void __init place_string(u32 *addr, const char *s) { char *alloc = NULL; @@ -223,7 +200,7 @@ static void __init efi_arch_pre_exit_boot(void) { if ( !cfg.addr ) blexit(L"No memory for trampoline"); - relocate_trampoline(cfg.addr); + trampoline_phys = cfg.addr; } } @@ -232,7 +209,6 @@ static void __init noreturn efi_arch_post_exit_boot(void) u64 cr4 = XEN_MINIMAL_CR4 & ~X86_CR4_PGE, efer; efi_arch_relocate_image(__XEN_VIRT_START - xen_phys_start); - memcpy((void *)trampoline_phys, boot_trampoline_start, cfg.size); /* Set system registers and transfer control. */ asm volatile("pushq $0\n\tpopfq"); @@ -566,14 +542,14 @@ static void __init efi_arch_memory_setup(void) cfg.addr = 0x100000; if ( efi_enabled(EFI_LOADER) ) - cfg.size = boot_trampoline_end - boot_trampoline_start; + cfg.size = perm_trampoline_end - perm_trampoline_start; else cfg.size = TRAMPOLINE_SPACE + TRAMPOLINE_STACK_SPACE; status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData, PFN_UP(cfg.size), &cfg.addr); if ( status == EFI_SUCCESS ) - relocate_trampoline(cfg.addr); + trampoline_phys = cfg.addr; else { cfg.addr = 0; @@ -665,7 +641,6 @@ static void __init efi_arch_load_addr_check(EFI_LOADED_IMAGE *loaded_image) blexit(L"Xen must be loaded below 4Gb."); if ( xen_phys_start & ((1 << L2_PAGETABLE_SHIFT) - 1) ) blexit(L"Xen must be loaded at a 2Mb boundary."); - trampoline_xen_phys_start = xen_phys_start; } static bool __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable) diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 101c9dd272..eb8bc7e33b 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -682,6 +682,42 @@ static unsigned int __init copy_bios_e820(struct e820entry *map, unsigned int li return n; } +extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[]; +extern const s32 __trampoline32_rel_start[], __trampoline32_rel_stop[]; + +static void __init relocate_trampoline(unsigned long phys) +{ + const s32 *trampoline_ptr; + uint32_t tramp32_delta = 0; + + /* Apply relocations to trampoline. */ + for ( trampoline_ptr = __trampoline_rel_start; + trampoline_ptr < __trampoline_rel_stop; + ++trampoline_ptr ) + *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; + + tramp32_delta = phys; + if (!efi_enabled(EFI_LOADER)) { + /* + * The non-EFI boot code uses the 32-bit trampoline in place + * so will have relocated it to the physical address of + * perm_trampoline_start already. Undo that as it needs to + * run from low memory for AP startup, because the Xen + * physical address range won't be mapped. + */ + tramp32_delta -= trampoline_xen_phys_start; + tramp32_delta -= (unsigned long)(perm_trampoline_start - __XEN_VIRT_START); + } + for ( trampoline_ptr = __trampoline32_rel_start; + trampoline_ptr < __trampoline32_rel_stop; + ++trampoline_ptr ) + *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += tramp32_delta; + trampoline_xen_phys_start = xen_phys_start; + + memcpy(trampsym(perm_trampoline_start), perm_trampoline_start, + perm_trampoline_end - perm_trampoline_start); +} + void __init noreturn __start_xen(unsigned long mbi_p) { char *memmap_type = NULL; @@ -1076,7 +1112,6 @@ void __init noreturn __start_xen(unsigned long mbi_p) /* Select relocation address. */ e = end - reloc_size; xen_phys_start = e; - bootsym(trampoline_xen_phys_start) = e; /* * No PTEs pointing above this address are candidates for relocation. @@ -1509,6 +1544,8 @@ void __init noreturn __start_xen(unsigned long mbi_p) else end_boot_allocator(); + relocate_trampoline(trampoline_phys); + system_state = SYS_STATE_boot; /* * No calls involving ACPI code should go between the setting of @@ -1879,8 +1916,8 @@ int __hwdom_init xen_in_range(unsigned long mfn) if ( !xen_regions[0].s ) { /* S3 resume code (and other real mode trampoline code) */ - xen_regions[region_s3].s = bootsym_phys(boot_trampoline_start); - xen_regions[region_s3].e = bootsym_phys(boot_trampoline_end); + xen_regions[region_s3].s = trampsym_phys(perm_trampoline_start); + xen_regions[region_s3].e = trampsym_phys(perm_trampoline_end); /* * This needs to remain in sync with the uses of the same symbols in diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 65e9ceeece..0410331a9e 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -47,7 +47,7 @@ #include <asm/tboot.h> #include <mach_apic.h> -#define setup_trampoline() (bootsym_phys(trampoline_realmode_entry)) +#define setup_trampoline() (trampsym_phys(trampoline_realmode_entry)) unsigned long __read_mostly trampoline_phys; @@ -598,7 +598,7 @@ static int do_boot_cpu(int apicid, int cpu) { boot_error = 1; smp_mb(); - if ( bootsym(trampoline_cpu_started) == 0xA5 ) + if ( trampsym(trampoline_cpu_started) == 0xA5 ) /* trampoline started but...? */ printk("Stuck ??\n"); else @@ -614,7 +614,7 @@ static int do_boot_cpu(int apicid, int cpu) } /* mark "stuck" area as not stuck */ - bootsym(trampoline_cpu_started) = 0; + trampsym(trampoline_cpu_started) = 0; smp_mb(); return rc; diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c index 325d94d23a..daf19d346a 100644 --- a/xen/arch/x86/tboot.c +++ b/xen/arch/x86/tboot.c @@ -369,9 +369,9 @@ void tboot_shutdown(uint32_t shutdown_type) */ g_tboot_shared->num_mac_regions = 3; /* S3 resume code (and other real mode trampoline code) */ - g_tboot_shared->mac_regions[0].start = bootsym_phys(boot_trampoline_start); - g_tboot_shared->mac_regions[0].size = bootsym_phys(boot_trampoline_end) - - bootsym_phys(boot_trampoline_start); + g_tboot_shared->mac_regions[0].start = trampsym_phys(perm_trampoline_start); + g_tboot_shared->mac_regions[0].size = trampsym_phys(perm_trampoline_end) - + trampsym_phys(perm_trampoline_start); /* hypervisor .text + .rodata */ g_tboot_shared->mac_regions[1].start = (uint64_t)__pa(&_stext); g_tboot_shared->mac_regions[1].size = __pa(&__2M_rodata_end) - diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index 149cc4f7b5..05d476c423 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -697,7 +697,7 @@ void __init zap_low_mappings(void) /* Replace with mapping of the boot trampoline only. */ map_pages_to_xen(trampoline_phys, maddr_to_mfn(trampoline_phys), - PFN_UP(boot_trampoline_end - boot_trampoline_start), + PFN_UP(perm_trampoline_end - perm_trampoline_start), __PAGE_HYPERVISOR); } diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h index 7032f3a001..8c5381aa47 100644 --- a/xen/include/asm-x86/acpi.h +++ b/xen/include/asm-x86/acpi.h @@ -106,7 +106,7 @@ extern int acpi_scan_nodes(u64 start, u64 end); #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) extern struct acpi_sleep_info acpi_sinfo; -#define acpi_video_flags bootsym(video_flags) +#define acpi_video_flags trampsym(video_flags) struct xenpf_enter_acpi_sleep; extern int acpi_enter_sleep(struct xenpf_enter_acpi_sleep *sleep); extern int acpi_enter_state(u32 state); diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index ada8c7b4f6..78634296b5 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -89,12 +89,12 @@ #ifndef __ASSEMBLY__ extern unsigned long trampoline_phys; -#define bootsym_phys(sym) \ - (((unsigned long)&(sym)-(unsigned long)&boot_trampoline_start)+trampoline_phys) -#define bootsym(sym) \ +#define trampsym_phys(sym) \ + (((unsigned long)&(sym)-(unsigned long)&perm_trampoline_start)+trampoline_phys) +#define trampsym(sym) \ (*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))), \ - trampoline_phys-__pa(boot_trampoline_start))) -extern char boot_trampoline_start[], boot_trampoline_end[]; + trampoline_phys-__pa(perm_trampoline_start))) +extern char perm_trampoline_start[], perm_trampoline_end[]; extern char trampoline_realmode_entry[]; extern unsigned int trampoline_xen_phys_start; extern unsigned char trampoline_cpu_started;