diff mbox series

[v3,5/5] x86/boot: Do not use trampoline for no-real-mode boot paths

Message ID 20190821163542.172063-5-dwmw2@infradead.org (mailing list archive)
State New, archived
Headers show
Series Clean up x86_64 boot code | expand

Commit Message

David Woodhouse Aug. 21, 2019, 4:35 p.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

Where booted from EFI or with no-real-mode, there is no need to stomp
on low memory with the 16-boot code. Instead, just go straight to
trampoline_protmode_entry() at its physical location within the Xen
image, having applied suitable relocations.

This means that the GDT has to be loaded with lgdtl because the 16-bit
lgdt instruction would drop the high 8 bits of the gdt_trampoline
address, causing failures if the Xen image was loaded above 16MiB.

For now, the boot code (including the EFI loader path) still determines
what the trampoline_phys address should be for the permanent 16-bit
trampoline (used for AP startup, and wakeup). But that trampoline is
now only relocated for that address and copied into low memory later,
from a relocate_trampoline() call made from __start_xen().

The permanent trampoline can't trivially use the 32-bit code in place in
its physical location in the Xen image, as idle_pg_table doesn't have a
full physical mapping. Fixing that is theoretically possible but it's
actually much simpler just to relocate the 32-bit code (again) into low
memory, alongside the 16-bit code for the permanent trampoline.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 xen/arch/x86/acpi/power.c      |  6 +--
 xen/arch/x86/boot/head.S       | 67 ++++++++++++++++------------------
 xen/arch/x86/boot/trampoline.S | 32 ++++++++++++----
 xen/arch/x86/cpu/common.c      |  2 +-
 xen/arch/x86/cpu/intel.c       |  2 +-
 xen/arch/x86/efi/efi-boot.h    | 31 ++--------------
 xen/arch/x86/setup.c           | 46 +++++++++++++++++++++--
 xen/arch/x86/smpboot.c         |  6 +--
 xen/arch/x86/tboot.c           |  6 +--
 xen/arch/x86/x86_64/mm.c       |  2 +-
 xen/include/asm-x86/acpi.h     |  2 +-
 xen/include/asm-x86/config.h   | 12 +++---
 12 files changed, 122 insertions(+), 92 deletions(-)

Comments

Jan Beulich Sept. 2, 2019, 8:54 a.m. UTC | #1
On 21.08.2019 18:35, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> Where booted from EFI or with no-real-mode, there is no need to stomp
> on low memory with the 16-boot code. Instead, just go straight to
> trampoline_protmode_entry() at its physical location within the Xen
> image, having applied suitable relocations.
> 
> This means that the GDT has to be loaded with lgdtl because the 16-bit
> lgdt instruction would drop the high 8 bits of the gdt_trampoline
> address, causing failures if the Xen image was loaded above 16MiB.

There's a 2nd case where we assume an address not exceeding 16MiB:
The BOOT_PSEUDORM_{C,D}S entries on trampoline_gdt. While they'll
be unused (afaict) when not going through real mode, them getting
corrupted by applying relocations still seems somewhat risky to
me. I therefore wonder whether we shouldn't re-arrange this GDT's
layout in a prereq patch, such that theses two entries would go
last, and the GDT limit be reduced by two entries when skipping
real mode.

> --- a/xen/arch/x86/boot/head.S
> +++ b/xen/arch/x86/boot/head.S
> @@ -689,16 +689,23 @@ trampoline_setup:
>          lea     __PAGE_HYPERVISOR+sym_esi(l1_identmap),%edi
>          mov     %edi,sym_fs(l2_bootmap)
>  
> -        /* Apply relocations to bootstrap trampoline. */
> -        mov     sym_fs(trampoline_phys),%edx
> -        mov     $sym_offs(__trampoline_rel_start),%edi
> -1:
> -        mov     %fs:(%edi),%eax
> -        add     %edx,%fs:(%edi,%eax)
> -        add     $4,%edi
> -        cmp     $sym_offs(__trampoline_rel_stop),%edi
> -        jb      1b
> +        /* Do not parse command line on EFI platform here. */
> +        cmpb    $0,sym_fs(efi_platform)
> +        jnz     1f
>  
> +        /* Bail if there is no command line to parse. */
> +        mov     sym_fs(multiboot_ptr),%ebx
> +        testl   $MBI_CMDLINE,MB_flags(%ebx)
> +        jz      1f

As a really minor nit - I think it is generally better to have CMP
followed by JE/JNE, while (as you already have it) TEST/AND/OR are
better followed by JZ/JNZ. (Obviously this is a remark applicable to
the series as a whole.)

> @@ -90,6 +92,7 @@ GLOBAL(bootdata_start)
>   * It is entered in Real Mode, with %cs = trampoline_realmode_entry >> 4 and
>   * %ip = 0.
>   */
> +
>  GLOBAL(trampoline_realmode_entry)
>          mov     %cs,%ax
>          mov     %ax,%ds

I'd prefer if you didn't insert a blank line here, as the comment is
specifically associated with this label.

> @@ -97,7 +100,7 @@ GLOBAL(trampoline_realmode_entry)
>          cld
>          cli
>          lidt    trampsym(idt_48)
> -        lgdt    trampsym(gdt_48)
> +        lgdtl   trampsym(gdt_48)
>          mov     $1,%bl                    # EBX != 0 indicates we are an AP
>          xor     %ax, %ax
>          inc     %ax

As per the remark further up, trampoline_gdt's two entries (below
here) having a relocation associate with them should imo at least get
a comment added to state why (other than for the LGDTL here) there's
no issue with a relocation to an address above 16MiB.

> @@ -236,11 +239,23 @@ gdt_48: .word   7*8-1
>  
>  /* The first page of trampoline is permanent, the rest boot-time only. */
>  /* Reuse the boot trampoline on the 1st trampoline page as stack for wakeup. */
> -        .equ    wakeup_stack, boot_trampoline_start + PAGE_SIZE
> +        .equ    wakeup_stack, perm_trampoline_start + PAGE_SIZE

Doesn't this, at the very least, render the comment stale?

>          .global wakeup_stack
>  
> +ENTRY(perm_trampoline_end)
> +
>  /* From here on early boot only. */
>  
> +ENTRY(boot_trampoline_start)
> +
> +        .word   0
> +boot16_idt:
> +        .word   0, 0, 0 # base = limit = 0
> +        .word   0
> +boot16_gdt:
> +        .word   7*8-1
> +        .long   tramp32sym_rel(trampoline_gdt,4)

As there's no change in this patch to how/where the boot trampoline
gets copied, am I understanding right that both end up at
trampoline_phys, just at different points in time? Otherwise I wonder
what the alignment of the relocated boot_trampoline_start end up
being, and hence whether the initial .word here is actually useful at
all.

> @@ -343,7 +358,8 @@ trampoline_boot_cpu_entry:
>          xor     %ebx,%ebx
>  
>          /* Jump to the common bootstrap entry point. */
> -        jmp     trampoline_protmode_entry
> +        mov     $tramp32sym_rel(trampoline_protmode_entry,4,%eax)
> +        jmp     *%eax

Do you really need to switch to an indirect branch here? I.e. can't
there be a relocation associated with the direct JMP's displacement?

> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -679,6 +679,45 @@ static unsigned int __init copy_bios_e820(struct e820entry *map, unsigned int li
>      return n;
>  }
>  
> +extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[];
> +extern const s32 __trampoline32_rel_start[], __trampoline32_rel_stop[];
> +
> +static void __init relocate_trampoline(unsigned long phys)
> +{
> +    const s32 *trampoline_ptr;
> +    uint32_t tramp32_delta;
> +
> +    /* Apply relocations to trampoline. */
> +    for ( trampoline_ptr = __trampoline_rel_start;
> +          trampoline_ptr < __trampoline_rel_stop;
> +          ++trampoline_ptr )
> +        *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys;
> +
> +    tramp32_delta = phys;
> +    if ( !efi_enabled(EFI_LOADER) )
> +    {
> +        /*
> +         * The non-EFI boot code uses the 32-bit trampoline in place
> +         * so will have relocated it to the physical address of
> +         * perm_trampoline_start already. Undo that as it needs to
> +         * run from low memory for AP startup, because the Xen
> +         * physical address range won't be mapped.
> +         */
> +        tramp32_delta -= trampoline_xen_phys_start;
> +        tramp32_delta -= (unsigned long)(perm_trampoline_start - __XEN_VIRT_START);
> +    }
> +    for ( trampoline_ptr = __trampoline32_rel_start;
> +          trampoline_ptr < __trampoline32_rel_stop;
> +          ++trampoline_ptr )
> +    {
> +        *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += tramp32_delta;
> +    }

Along the lines of the loop further up, lease omit the braces here.

> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -47,7 +47,7 @@
>  #include <asm/tboot.h>
>  #include <mach_apic.h>
>  
> -#define setup_trampoline()    (bootsym_phys(trampoline_realmode_entry))
> +#define setup_trampoline()    (trampsym_phys(trampoline_realmode_entry))

Would you mind taking the opportunity and strip the unnecessary pair
of parentheses here?

> --- a/xen/arch/x86/x86_64/mm.c
> +++ b/xen/arch/x86/x86_64/mm.c
> @@ -697,7 +697,7 @@ void __init zap_low_mappings(void)
>  
>      /* Replace with mapping of the boot trampoline only. */
>      map_pages_to_xen(trampoline_phys, maddr_to_mfn(trampoline_phys),
> -                     PFN_UP(boot_trampoline_end - boot_trampoline_start),
> +                     PFN_UP(perm_trampoline_end - perm_trampoline_start),
>                       __PAGE_HYPERVISOR);

You also want to adjust the comment then.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
index aecc754fdb..d2a355429e 100644
--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -152,9 +152,9 @@  static void acpi_sleep_prepare(u32 state)
         return;
 
     if ( acpi_sinfo.vector_width == 32 )
-        *(uint32_t *)wakeup_vector_va = bootsym_phys(wakeup_start);
+        *(uint32_t *)wakeup_vector_va = trampsym_phys(wakeup_start);
     else
-        *(uint64_t *)wakeup_vector_va = bootsym_phys(wakeup_start);
+        *(uint64_t *)wakeup_vector_va = trampsym_phys(wakeup_start);
 }
 
 static void acpi_sleep_post(u32 state) {}
@@ -388,7 +388,7 @@  static void tboot_sleep(u8 sleep_state)
     g_tboot_shared->acpi_sinfo.wakeup_vector = acpi_sinfo.wakeup_vector;
     g_tboot_shared->acpi_sinfo.vector_width = acpi_sinfo.vector_width;
     g_tboot_shared->acpi_sinfo.kernel_s3_resume_vector =
-                                              bootsym_phys(wakeup_start);
+                                              trampsym_phys(wakeup_start);
 
     switch ( sleep_state )
     {
diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index 6d315020d2..cebad00c13 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -689,16 +689,23 @@  trampoline_setup:
         lea     __PAGE_HYPERVISOR+sym_esi(l1_identmap),%edi
         mov     %edi,sym_fs(l2_bootmap)
 
-        /* Apply relocations to bootstrap trampoline. */
-        mov     sym_fs(trampoline_phys),%edx
-        mov     $sym_offs(__trampoline_rel_start),%edi
-1:
-        mov     %fs:(%edi),%eax
-        add     %edx,%fs:(%edi,%eax)
-        add     $4,%edi
-        cmp     $sym_offs(__trampoline_rel_stop),%edi
-        jb      1b
+        /* Do not parse command line on EFI platform here. */
+        cmpb    $0,sym_fs(efi_platform)
+        jnz     1f
 
+        /* Bail if there is no command line to parse. */
+        mov     sym_fs(multiboot_ptr),%ebx
+        testl   $MBI_CMDLINE,MB_flags(%ebx)
+        jz      1f
+
+        lea     sym_esi(early_boot_opts),%eax
+        push    %eax
+        pushl   MB_cmdline(%ebx)
+        call    cmdline_parse_early
+
+1:
+        /* Apply relocations to 32-bit trampoline for execution in place. */
+        lea     sym_esi(perm_trampoline_start),%edx
         mov     $sym_offs(__trampoline32_rel_start),%edi
 1:
         mov     %fs:(%edi),%eax
@@ -707,6 +714,21 @@  trampoline_setup:
         cmp     $sym_offs(__trampoline32_rel_stop),%edi
         jb      1b
 
+        cmp     $0,sym_esi(skip_realmode)
+        jz      .Ldo_realmode
+
+        /* Go directly to trampoline_protmode_entry at its physical address */
+        lea     trampoline_protmode_entry-__XEN_VIRT_START(%esi),%eax
+        pushl   $BOOT_CS32
+        push    %eax
+
+        /* EBX == 0 indicates we are the BP (Boot Processor). */
+        xor     %ebx,%ebx
+        retl
+
+.Ldo_realmode:
+        /* Apply relocations to 16-bit boot code. */
+        mov     sym_fs(trampoline_phys),%edx
         mov     $sym_offs(__bootsym_rel_start),%edi
 1:
         mov     %fs:(%edi),%eax
@@ -736,35 +758,12 @@  trampoline_setup:
         cmp     $sym_offs(__bootdatasym_rel_stop),%edi
         jb      1b
 
-        /* Do not parse command line on EFI platform here. */
-        cmpb    $0,sym_fs(efi_platform)
-        jnz     1f
-
-        /* Bail if there is no command line to parse. */
-        mov     sym_fs(multiboot_ptr),%ebx
-        testl   $MBI_CMDLINE,MB_flags(%ebx)
-        jz      1f
-
-        lea     sym_esi(early_boot_opts),%eax
-        push    %eax
-        pushl   MB_cmdline(%ebx)
-        call    cmdline_parse_early
-
-1:
         /* Switch to low-memory stack which lives at the end of trampoline region. */
         mov     sym_fs(trampoline_phys),%edi
         lea     TRAMPOLINE_SPACE+TRAMPOLINE_STACK_SPACE(%edi),%esp
-        cmpb    $0, sym_fs(skip_realmode)
-        jz      1f
-        /* If no-real-mode, jump straight to trampoline_protmode_entry */
-        lea     trampoline_protmode_entry-boot_trampoline_start(%edi),%eax
-        /* EBX == 0 indicates we are the BP (Boot Processor). */
-        xor     %ebx,%ebx
-        jmp     2f
-1:
+
         /* Go via 16-bit code in trampoline_boot_cpu_entry */
         lea     trampoline_boot_cpu_entry-boot_trampoline_start(%edi),%eax
-2:
         pushl   $BOOT_CS32
         push    %eax
 
@@ -787,8 +786,6 @@  cmdline_parse_early:
 reloc:
 #include "reloc.S"
 
-ENTRY(boot_trampoline_start)
 #include "trampoline.S"
-ENTRY(boot_trampoline_end)
 
 #include "x86_64.S"
diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
index c747d70404..b846977ad0 100644
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -60,7 +60,7 @@  GLOBAL(bootdata_start)
         .popsection
 
 #undef trampsym
-#define trampsym(s) ((s)-boot_trampoline_start)
+#define trampsym(s) ((s)-perm_trampoline_start)
 
 #define trampsym_rel(sym, off, opnd...)    \
         trampsym(sym),##opnd;              \
@@ -70,7 +70,7 @@  GLOBAL(bootdata_start)
         .popsection
 
 #undef tramp32sym
-#define tramp32sym(s) ((s)-boot_trampoline_start)
+#define tramp32sym(s) ((s)-perm_trampoline_start)
 
 #define tramp32sym_rel(sym, off, opnd...)  \
         tramp32sym(sym),##opnd;            \
@@ -83,6 +83,8 @@  GLOBAL(bootdata_start)
 
         .code16
 
+ENTRY(perm_trampoline_start)
+
 /*
  * do_boot_cpu() programs the Startup-IPI to point here.  Due to the SIPI
  * format, the relocated entrypoint must be 4k aligned.
@@ -90,6 +92,7 @@  GLOBAL(bootdata_start)
  * It is entered in Real Mode, with %cs = trampoline_realmode_entry >> 4 and
  * %ip = 0.
  */
+
 GLOBAL(trampoline_realmode_entry)
         mov     %cs,%ax
         mov     %ax,%ds
@@ -97,7 +100,7 @@  GLOBAL(trampoline_realmode_entry)
         cld
         cli
         lidt    trampsym(idt_48)
-        lgdt    trampsym(gdt_48)
+        lgdtl   trampsym(gdt_48)
         mov     $1,%bl                    # EBX != 0 indicates we are an AP
         xor     %ax, %ax
         inc     %ax
@@ -236,11 +239,23 @@  gdt_48: .word   7*8-1
 
 /* The first page of trampoline is permanent, the rest boot-time only. */
 /* Reuse the boot trampoline on the 1st trampoline page as stack for wakeup. */
-        .equ    wakeup_stack, boot_trampoline_start + PAGE_SIZE
+        .equ    wakeup_stack, perm_trampoline_start + PAGE_SIZE
         .global wakeup_stack
 
+ENTRY(perm_trampoline_end)
+
 /* From here on early boot only. */
 
+ENTRY(boot_trampoline_start)
+
+        .word   0
+boot16_idt:
+        .word   0, 0, 0 # base = limit = 0
+        .word   0
+boot16_gdt:
+        .word   7*8-1
+        .long   tramp32sym_rel(trampoline_gdt,4)
+
         .code32
 trampoline_boot_cpu_entry:
         /* Load pseudo-real-mode segments. */
@@ -304,8 +319,8 @@  trampoline_boot_cpu_entry:
         cli
 
         /* Reset GDT and IDT. Some BIOSes clobber GDTR. */
-        lidt    bootsym(idt_48)
-        lgdt    bootsym(gdt_48)
+        lidt    bootsym(boot16_idt)
+        lgdtl   bootsym(boot16_gdt)
 
         /* Enter protected mode, and flush insn queue. */
         xor     %ax,%ax
@@ -343,7 +358,8 @@  trampoline_boot_cpu_entry:
         xor     %ebx,%ebx
 
         /* Jump to the common bootstrap entry point. */
-        jmp     trampoline_protmode_entry
+        mov     $tramp32sym_rel(trampoline_protmode_entry,4,%eax)
+        jmp     *%eax
 
 #include "video.h"
 
@@ -379,6 +395,8 @@  rm_idt: .word   256*4-1, 0, 0
 #include "video.S"
 #endif
 
+ENTRY(boot_trampoline_end)
+
         .pushsection .data.boot16, "aw", @progbits
 GLOBAL(bootdata_end)
         .popsection
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index a074176c00..05ced26597 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -389,7 +389,7 @@  static void generic_identify(struct cpuinfo_x86 *c)
 		      &c->x86_capability[cpufeat_word(X86_FEATURE_LAHF_LM)],
 		      &c->x86_capability[cpufeat_word(X86_FEATURE_SYSCALL)]);
 	if (c == &boot_cpu_data)
-		bootsym(cpuid_ext_features) =
+		trampsym(cpuid_ext_features) =
 			c->x86_capability[cpufeat_word(X86_FEATURE_NX)];
 
 	if (c->extended_cpuid_level >= 0x80000004)
diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
index 5356a6ae10..1970eb1848 100644
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -269,7 +269,7 @@  static void early_init_intel(struct cpuinfo_x86 *c)
 				 MSR_IA32_MISC_ENABLE_XD_DISABLE);
 	if (disable) {
 		wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable);
-		bootsym(trampoline_misc_enable_off) |= disable;
+		trampsym(trampoline_misc_enable_off) |= disable;
 	}
 
 	if (disable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID)
diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
index fac2b5e12a..f8c6ea100c 100644
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -98,29 +98,6 @@  static void __init efi_arch_relocate_image(unsigned long delta)
     }
 }
 
-extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[];
-extern const s32 __trampoline32_rel_start[], __trampoline32_rel_stop[];
-
-static void __init relocate_trampoline(unsigned long phys)
-{
-    const s32 *trampoline_ptr;
-
-    trampoline_phys = phys;
-
-    if ( !efi_enabled(EFI_LOADER) )
-        return;
-
-    /* Apply relocations to trampoline. */
-    for ( trampoline_ptr = __trampoline_rel_start;
-          trampoline_ptr < __trampoline_rel_stop;
-          ++trampoline_ptr )
-        *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys;
-    for ( trampoline_ptr = __trampoline32_rel_start;
-          trampoline_ptr < __trampoline32_rel_stop;
-          ++trampoline_ptr )
-        *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys;
-}
-
 static void __init place_string(u32 *addr, const char *s)
 {
     char *alloc = NULL;
@@ -223,7 +200,7 @@  static void __init efi_arch_pre_exit_boot(void)
     {
         if ( !cfg.addr )
             blexit(L"No memory for trampoline");
-        relocate_trampoline(cfg.addr);
+        trampoline_phys = cfg.addr;
     }
 }
 
@@ -232,7 +209,6 @@  static void __init noreturn efi_arch_post_exit_boot(void)
     u64 cr4 = XEN_MINIMAL_CR4 & ~X86_CR4_PGE, efer;
 
     efi_arch_relocate_image(__XEN_VIRT_START - xen_phys_start);
-    memcpy((void *)trampoline_phys, boot_trampoline_start, cfg.size);
 
     /* Set system registers and transfer control. */
     asm volatile("pushq $0\n\tpopfq");
@@ -566,14 +542,14 @@  static void __init efi_arch_memory_setup(void)
     cfg.addr = 0x100000;
 
     if ( efi_enabled(EFI_LOADER) )
-        cfg.size = boot_trampoline_end - boot_trampoline_start;
+        cfg.size = perm_trampoline_end - perm_trampoline_start;
     else
         cfg.size = TRAMPOLINE_SPACE + TRAMPOLINE_STACK_SPACE;
 
     status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData,
                                    PFN_UP(cfg.size), &cfg.addr);
     if ( status == EFI_SUCCESS )
-        relocate_trampoline(cfg.addr);
+        trampoline_phys = cfg.addr;
     else
     {
         cfg.addr = 0;
@@ -665,7 +641,6 @@  static void __init efi_arch_load_addr_check(EFI_LOADED_IMAGE *loaded_image)
         blexit(L"Xen must be loaded below 4Gb.");
     if ( xen_phys_start & ((1 << L2_PAGETABLE_SHIFT) - 1) )
         blexit(L"Xen must be loaded at a 2Mb boundary.");
-    trampoline_xen_phys_start = xen_phys_start;
 }
 
 static bool __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 5c818caed2..4faafc180c 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -679,6 +679,45 @@  static unsigned int __init copy_bios_e820(struct e820entry *map, unsigned int li
     return n;
 }
 
+extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[];
+extern const s32 __trampoline32_rel_start[], __trampoline32_rel_stop[];
+
+static void __init relocate_trampoline(unsigned long phys)
+{
+    const s32 *trampoline_ptr;
+    uint32_t tramp32_delta;
+
+    /* Apply relocations to trampoline. */
+    for ( trampoline_ptr = __trampoline_rel_start;
+          trampoline_ptr < __trampoline_rel_stop;
+          ++trampoline_ptr )
+        *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys;
+
+    tramp32_delta = phys;
+    if ( !efi_enabled(EFI_LOADER) )
+    {
+        /*
+         * The non-EFI boot code uses the 32-bit trampoline in place
+         * so will have relocated it to the physical address of
+         * perm_trampoline_start already. Undo that as it needs to
+         * run from low memory for AP startup, because the Xen
+         * physical address range won't be mapped.
+         */
+        tramp32_delta -= trampoline_xen_phys_start;
+        tramp32_delta -= (unsigned long)(perm_trampoline_start - __XEN_VIRT_START);
+    }
+    for ( trampoline_ptr = __trampoline32_rel_start;
+          trampoline_ptr < __trampoline32_rel_stop;
+          ++trampoline_ptr )
+    {
+        *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += tramp32_delta;
+    }
+    trampoline_xen_phys_start = xen_phys_start;
+
+    memcpy(trampsym(perm_trampoline_start), perm_trampoline_start,
+           perm_trampoline_end - perm_trampoline_start);
+}
+
 void __init noreturn __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
@@ -1073,7 +1112,6 @@  void __init noreturn __start_xen(unsigned long mbi_p)
             /* Select relocation address. */
             e = end - reloc_size;
             xen_phys_start = e;
-            bootsym(trampoline_xen_phys_start) = e;
 
             /*
              * No PTEs pointing above this address are candidates for relocation.
@@ -1506,6 +1544,8 @@  void __init noreturn __start_xen(unsigned long mbi_p)
     else
         end_boot_allocator();
 
+    relocate_trampoline(trampoline_phys);
+
     system_state = SYS_STATE_boot;
     /*
      * No calls involving ACPI code should go between the setting of
@@ -1876,8 +1916,8 @@  int __hwdom_init xen_in_range(unsigned long mfn)
     if ( !xen_regions[0].s )
     {
         /* S3 resume code (and other real mode trampoline code) */
-        xen_regions[region_s3].s = bootsym_phys(boot_trampoline_start);
-        xen_regions[region_s3].e = bootsym_phys(boot_trampoline_end);
+        xen_regions[region_s3].s = trampsym_phys(perm_trampoline_start);
+        xen_regions[region_s3].e = trampsym_phys(perm_trampoline_end);
 
         /*
          * This needs to remain in sync with the uses of the same symbols in
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 5c4254ac87..8e0c844e37 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -47,7 +47,7 @@ 
 #include <asm/tboot.h>
 #include <mach_apic.h>
 
-#define setup_trampoline()    (bootsym_phys(trampoline_realmode_entry))
+#define setup_trampoline()    (trampsym_phys(trampoline_realmode_entry))
 
 unsigned long __read_mostly trampoline_phys;
 
@@ -598,7 +598,7 @@  static int do_boot_cpu(int apicid, int cpu)
         {
             boot_error = 1;
             smp_mb();
-            if ( bootsym(trampoline_cpu_started) == 0xA5 )
+            if ( trampsym(trampoline_cpu_started) == 0xA5 )
                 /* trampoline started but...? */
                 printk("Stuck ??\n");
             else
@@ -614,7 +614,7 @@  static int do_boot_cpu(int apicid, int cpu)
     }
 
     /* mark "stuck" area as not stuck */
-    bootsym(trampoline_cpu_started) = 0;
+    trampsym(trampoline_cpu_started) = 0;
     smp_mb();
 
     return rc;
diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c
index 2769e1ccd9..ad2591578d 100644
--- a/xen/arch/x86/tboot.c
+++ b/xen/arch/x86/tboot.c
@@ -369,9 +369,9 @@  void tboot_shutdown(uint32_t shutdown_type)
          */
         g_tboot_shared->num_mac_regions = 3;
         /* S3 resume code (and other real mode trampoline code) */
-        g_tboot_shared->mac_regions[0].start = bootsym_phys(boot_trampoline_start);
-        g_tboot_shared->mac_regions[0].size = bootsym_phys(boot_trampoline_end) -
-                                              bootsym_phys(boot_trampoline_start);
+        g_tboot_shared->mac_regions[0].start = trampsym_phys(perm_trampoline_start);
+        g_tboot_shared->mac_regions[0].size = trampsym_phys(perm_trampoline_end) -
+                                              trampsym_phys(perm_trampoline_start);
         /* hypervisor .text + .rodata */
         g_tboot_shared->mac_regions[1].start = (uint64_t)__pa(&_stext);
         g_tboot_shared->mac_regions[1].size = __pa(&__2M_rodata_end) -
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 149cc4f7b5..05d476c423 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -697,7 +697,7 @@  void __init zap_low_mappings(void)
 
     /* Replace with mapping of the boot trampoline only. */
     map_pages_to_xen(trampoline_phys, maddr_to_mfn(trampoline_phys),
-                     PFN_UP(boot_trampoline_end - boot_trampoline_start),
+                     PFN_UP(perm_trampoline_end - perm_trampoline_start),
                      __PAGE_HYPERVISOR);
 }
 
diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h
index 7032f3a001..8c5381aa47 100644
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -106,7 +106,7 @@  extern int acpi_scan_nodes(u64 start, u64 end);
 #define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
 
 extern struct acpi_sleep_info acpi_sinfo;
-#define acpi_video_flags bootsym(video_flags)
+#define acpi_video_flags trampsym(video_flags)
 struct xenpf_enter_acpi_sleep;
 extern int acpi_enter_sleep(struct xenpf_enter_acpi_sleep *sleep);
 extern int acpi_enter_state(u32 state);
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 50481748f6..e5043fafbd 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -89,12 +89,12 @@ 
 
 #ifndef __ASSEMBLY__
 extern unsigned long trampoline_phys;
-#define bootsym_phys(sym)                                 \
-    (((unsigned long)&(sym)-(unsigned long)&boot_trampoline_start)+trampoline_phys)
-#define bootsym(sym)                                      \
-    (*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))),      \
-                 trampoline_phys-__pa(boot_trampoline_start)))
-extern char boot_trampoline_start[], boot_trampoline_end[];
+#define trampsym_phys(sym)                                 \
+    (((unsigned long)&(sym) - (unsigned long)&perm_trampoline_start) + trampoline_phys)
+#define trampsym(sym)                                      \
+    (*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))),       \
+                 trampoline_phys - __pa(perm_trampoline_start)))
+extern char perm_trampoline_start[], perm_trampoline_end[];
 extern char trampoline_realmode_entry[];
 extern unsigned int trampoline_xen_phys_start;
 extern unsigned char trampoline_cpu_started;