[v13,4/9] x86: add multiboot2 protocol support for EFI platforms
diff mbox

Message ID 20170125224921.GL16671@olila.local.net-space.pl
State New, archived
Headers show

Commit Message

Daniel Kiper Jan. 25, 2017, 10:49 p.m. UTC
On Wed, Jan 25, 2017 at 04:20:30PM -0600, Doug Goldstein wrote:
> On 1/25/17 4:11 PM, Daniel Kiper wrote:
> > This way Xen can be loaded on EFI platforms using GRUB2 and
> > other boot loaders which support multiboot2 protocol.
> > 
> > Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
> > ---
> > v13 - suggestions/fixes:
> >     - move vga_text_buffer and efi_platform to .init.data section
> >       (suggested by Jan Beulich),
> >     - reduce number of error branches in EFI code in xen/arch/x86/boot/head.S
> >       (suggested by Jan Beulich),
> >     - rename run_bs label to .Lrun_bs
> >       (suggested by Jan Beulich),
> >     - align the stack as UEFI spec requires
> >       (suggested by Jan Beulich),
> >     - change trampoline region memory layout
> >       (suggested by Jan Beulich),
> >     - revert changes in efi_arch_pre_exit_boot()
> >       (suggested by Jan Beulich),
> >     - relocate_trampoline() must set trampoline_phys for all bootloaders;
> >       otherwise fallback allocator is always used if Xen was loaded with
> >       Multiboot2 protocol,
> >     - change err type in efi_multiboot2() to "static const CHAR16 __initconst"
> >       (suggested by Jan Beulich),
> >     - change asm "g" constraint to "rm" in efi_multiboot2()
> >       (suggested by Jan Beulich),
> >     - improve comments
> >       (suggested by Jan Beulich and Doug Goldstein).
> 
> This is a huge change and would really be helpful to have the diff of
> what's changed to work from.

Please look below...

Daniel

Comments

Jan Beulich Jan. 31, 2017, 12:33 p.m. UTC | #1
>>> On 25.01.17 at 23:49, <daniel.kiper@oracle.com> wrote:
> On Wed, Jan 25, 2017 at 04:20:30PM -0600, Doug Goldstein wrote:
>> This is a huge change and would really be helpful to have the diff of
>> what's changed to work from.
> 
> Please look below...

Thanks for providing this - I'll comment this rather than the full patch:

> @@ -123,6 +116,15 @@ efi_platform:
>  .Lbad_ldr_nih: .asciz "ERR: EFI ImageHandle is not provided by bootloader!"
>  .Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
>  
> +        .section .init.data, "a", @progbits

This needs to be a writable section.

> @@ -170,6 +172,12 @@ not_multiboot:
>          .code64
>  
>  __efi64_mb2_start:
> +        /*
> +         * Multiboot2 spec says that here CPU is in 64-bit mode. However, there
> +         * is also guarantee that all code and data is always put by the bootloader
> +         * below 4 GiB. Hence, we can safely use in most cases 32-bit addressing.
> +         */

"use 32-bit addressing" is misleading: I don't think you use any such,
since - as pointed out during earlier review - this would needlessly
cause 0x67 prefixes to be emitted. Instead what you mean is that
we can safely truncate addresses.

> @@ -180,7 +188,7 @@ __efi64_mb2_start:
>          je      .Lefi_multiboot2_proto
>  
>          /* Jump to not_multiboot after switching CPU to x86_32 mode. */
> -        lea     not_multiboot(%rip),%edi
> +        lea     not_multiboot(%rip),%r15d

In cases like this, where a REX prefix is needed anyway, please
use the full register unless you strictly need it zero-extended
from 32 bits.

> +        /* Align the stack as UEFI spec requires. */
> +        add     $15,%rsp
> +        and     $~15,%rsp

How come you _add_ something here first? Simply do the AND and
be done. Also please extend the comment along the lines of what
I had asked for before: To warn of future changes to the number
of items pushed onto the stack below.

> @@ -280,13 +286,13 @@ run_bs:
>  
>          pop     %rdi
>  
> +        /* Align the stack as UEFI spec requires. */
> +        push    %rdi

Please combine the two into "mov (%rsp), %rdi" and make the
comment say "Keep the stack aligned; don't pop a single item
off it" or some such.

> @@ -424,26 +433,44 @@ trampoline_bios_setup:
>          cmp     %ecx,%edx           /* compare with BDA value */
>          cmovb   %edx,%ecx           /* and use the smaller */
>  
> -        /* Reserve memory for the trampoline. */
> -        sub     $((MB_TRAMPOLINE_SIZE+MB_TRAMPOLINE_STACK_SIZE)>>4),%ecx
> +        /* Reserve memory for the trampoline and the low-memory stack. */
> +        sub     $((TRAMPOLINE_SPACE+TRAMPOLINE_STACK_SPACE)>>4),%ecx
>  
>          /* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
>          xor     %cl, %cl
>  
>  trampoline_setup:
> -        /* Save trampoline address for later use. */
>          shl     $4, %ecx
>          mov     %ecx,sym_phys(trampoline_phys)
>  
> +        /* Get topmost low-memory stack address. */
> +        add     $TRAMPOLINE_SPACE,%ecx

The top-most address of the stack is
%ecx + TRAMPOLINE_SPACE + TRAMPOLINE_STACK_SPACE - 1.
Please don't add misleading comments.

>          /* Save the Multiboot info struct (after relocation) for later use. */
>          mov     $sym_phys(cpu0_stack)+1024,%esp
> -        push    %ecx                /* Boot trampoline address. */
> +        push    %ecx                /* Topmost low-memory stack address. */
>          push    %ebx                /* Multiboot information address. */
>          push    %eax                /* Multiboot magic. */
>          call    reloc
>          mov     %eax,sym_phys(multiboot_ptr)
>  
>          /*
> +         * Now trampoline_phys points to the following structure (lowest
> +         * address is at the top):
> +         *
> +         * +------------------------+
> +         * |    TRAMPOLINE_SPACE    |
> +         * +- - - - - - - - - - - - +
> +         * |       mbi struct       |
> +         * +------------------------+
> +         * | TRAMPOLINE_STACK_SPACE |
> +         * +------------------------+
> +         *
> +         * mbi struct lives at the end of TRAMPOLINE_SPACE. The rest of
> +         * TRAMPOLINE_SPACE is reserved for trampoline code and data.
> +         */

Please can you clarify here that the MBI data grows downwards
from the beginning of the stack to the end of the trampoline?

> @@ -696,8 +699,8 @@ paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTa
>  
>      efi_exit_boot(ImageHandle, SystemTable);
>  
> -    /* Return highest allocated memory address below 1 MiB. */
> -    return cfg.addr + cfg.size;
> +    /* Return trampoline address. */
> +    return trampoline_phys;
>  }

With this it would be less confusing if you move the trampoline_setup
label down a few more lines. Perhaps the function here could then
return void.

> --- a/xen/arch/x86/efi/stub.c
> +++ b/xen/arch/x86/efi/stub.c
> @@ -20,7 +20,8 @@
>  paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
>                                         EFI_SYSTEM_TABLE *SystemTable)
>  {
> -    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
> +    static const CHAR16 __initconst err[] =
> +        L"(XEN) Xen does not have EFI code build in!\r\n(XEN) System halted!\r\n";

Why did you add these (XEN) prefixes?

> --- a/xen/arch/x86/xen.lds.S
> +++ b/xen/arch/x86/xen.lds.S
> @@ -334,5 +334,5 @@ ASSERT(IS_ALIGNED(trampoline_end,   4), "trampoline_end 
> misaligned")
>  ASSERT(IS_ALIGNED(__bss_start,      8), "__bss_start misaligned")
>  ASSERT(IS_ALIGNED(__bss_end,        8), "__bss_end misaligned")
>  
> -ASSERT((trampoline_end - trampoline_start) < MB_TRAMPOLINE_SIZE,
> +ASSERT((trampoline_end - trampoline_start) < TRAMPOLINE_SPACE,
>      "not enough room for trampoline")

Didn't you mean to make sure there are at least two pages for
MBI data?

Jan
Daniel Kiper Jan. 31, 2017, 2:23 p.m. UTC | #2
On Tue, Jan 31, 2017 at 05:33:12AM -0700, Jan Beulich wrote:
> >>> On 25.01.17 at 23:49, <daniel.kiper@oracle.com> wrote:
> > On Wed, Jan 25, 2017 at 04:20:30PM -0600, Doug Goldstein wrote:
> >> This is a huge change and would really be helpful to have the diff of
> >> what's changed to work from.
> >
> > Please look below...
>
> Thanks for providing this - I'll comment this rather than the full patch:

If you wish I can do that next time too.

> > @@ -123,6 +116,15 @@ efi_platform:
> >  .Lbad_ldr_nih: .asciz "ERR: EFI ImageHandle is not provided by bootloader!"
> >  .Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
> >
> > +        .section .init.data, "a", @progbits
>
> This needs to be a writable section.

AIUI, it is by default but if you wish I can replace "a" with "aw" here.

> > @@ -170,6 +172,12 @@ not_multiboot:
> >          .code64
> >
> >  __efi64_mb2_start:
> > +        /*
> > +         * Multiboot2 spec says that here CPU is in 64-bit mode. However, there
> > +         * is also guarantee that all code and data is always put by the bootloader
> > +         * below 4 GiB. Hence, we can safely use in most cases 32-bit addressing.
> > +         */
>
> "use 32-bit addressing" is misleading: I don't think you use any such,
> since - as pointed out during earlier review - this would needlessly
> cause 0x67 prefixes to be emitted. Instead what you mean is that
> we can safely truncate addresses.

OK.

> > @@ -180,7 +188,7 @@ __efi64_mb2_start:
> >          je      .Lefi_multiboot2_proto
> >
> >          /* Jump to not_multiboot after switching CPU to x86_32 mode. */
> > -        lea     not_multiboot(%rip),%edi
> > +        lea     not_multiboot(%rip),%r15d
>
> In cases like this, where a REX prefix is needed anyway, please
> use the full register unless you strictly need it zero-extended
> from 32 bits.

OK.

> > +        /* Align the stack as UEFI spec requires. */
> > +        add     $15,%rsp
> > +        and     $~15,%rsp
>
> How come you _add_ something here first? Simply do the AND and
> be done. Also please extend the comment along the lines of what

Facepalm! Err... Why I forgot here that stack grows downward...

> I had asked for before: To warn of future changes to the number
> of items pushed onto the stack below.

OK.

> > @@ -280,13 +286,13 @@ run_bs:
> >
> >          pop     %rdi
> >
> > +        /* Align the stack as UEFI spec requires. */
> > +        push    %rdi
>
> Please combine the two into "mov (%rsp), %rdi" and make the
> comment say "Keep the stack aligned; don't pop a single item
> off it" or some such.

OK.

> > @@ -424,26 +433,44 @@ trampoline_bios_setup:
> >          cmp     %ecx,%edx           /* compare with BDA value */
> >          cmovb   %edx,%ecx           /* and use the smaller */
> >
> > -        /* Reserve memory for the trampoline. */
> > -        sub     $((MB_TRAMPOLINE_SIZE+MB_TRAMPOLINE_STACK_SIZE)>>4),%ecx
> > +        /* Reserve memory for the trampoline and the low-memory stack. */
> > +        sub     $((TRAMPOLINE_SPACE+TRAMPOLINE_STACK_SPACE)>>4),%ecx
> >
> >          /* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
> >          xor     %cl, %cl
> >
> >  trampoline_setup:
> > -        /* Save trampoline address for later use. */
> >          shl     $4, %ecx
> >          mov     %ecx,sym_phys(trampoline_phys)
> >
> > +        /* Get topmost low-memory stack address. */
> > +        add     $TRAMPOLINE_SPACE,%ecx
>
> The top-most address of the stack is
> %ecx + TRAMPOLINE_SPACE + TRAMPOLINE_STACK_SPACE - 1.
> Please don't add misleading comments.

Right, it is misleading. Do you think that:

Get the lowest low-memory stack address.

...is better?

> >          /* Save the Multiboot info struct (after relocation) for later use. */
> >          mov     $sym_phys(cpu0_stack)+1024,%esp
> > -        push    %ecx                /* Boot trampoline address. */
> > +        push    %ecx                /* Topmost low-memory stack address. */
> >          push    %ebx                /* Multiboot information address. */
> >          push    %eax                /* Multiboot magic. */
> >          call    reloc
> >          mov     %eax,sym_phys(multiboot_ptr)
> >
> >          /*
> > +         * Now trampoline_phys points to the following structure (lowest
> > +         * address is at the top):
> > +         *
> > +         * +------------------------+
> > +         * |    TRAMPOLINE_SPACE    |
> > +         * +- - - - - - - - - - - - +
> > +         * |       mbi struct       |
> > +         * +------------------------+
> > +         * | TRAMPOLINE_STACK_SPACE |
> > +         * +------------------------+
> > +         *
> > +         * mbi struct lives at the end of TRAMPOLINE_SPACE. The rest of
> > +         * TRAMPOLINE_SPACE is reserved for trampoline code and data.
> > +         */
>
> Please can you clarify here that the MBI data grows downwards
> from the beginning of the stack to the end of the trampoline?

OK, but I think that "beginning of the stack" should be replaced
with "the end of TRAMPOLINE_SPACE" here.

> > @@ -696,8 +699,8 @@ paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTa
> >
> >      efi_exit_boot(ImageHandle, SystemTable);
> >
> > -    /* Return highest allocated memory address below 1 MiB. */
> > -    return cfg.addr + cfg.size;
> > +    /* Return trampoline address. */
> > +    return trampoline_phys;
> >  }
>
> With this it would be less confusing if you move the trampoline_setup
> label down a few more lines. Perhaps the function here could then
> return void.

If you wish.

> > --- a/xen/arch/x86/efi/stub.c
> > +++ b/xen/arch/x86/efi/stub.c
> > @@ -20,7 +20,8 @@
> >  paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
> >                                         EFI_SYSTEM_TABLE *SystemTable)
> >  {
> > -    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
> > +    static const CHAR16 __initconst err[] =
> > +        L"(XEN) Xen does not have EFI code build in!\r\n(XEN) System halted!\r\n";
>
> Why did you add these (XEN) prefixes?

To align message format with messages printed from most places. And I realized
that it will be nice to do the same thing in head.S and the rest of EFI code.
This way it is much easier to differentiate between a bootloader and Xen messages.
Though it begs another patch series.

> > --- a/xen/arch/x86/xen.lds.S
> > +++ b/xen/arch/x86/xen.lds.S
> > @@ -334,5 +334,5 @@ ASSERT(IS_ALIGNED(trampoline_end,   4), "trampoline_end
> > misaligned")
> >  ASSERT(IS_ALIGNED(__bss_start,      8), "__bss_start misaligned")
> >  ASSERT(IS_ALIGNED(__bss_end,        8), "__bss_end misaligned")
> >
> > -ASSERT((trampoline_end - trampoline_start) < MB_TRAMPOLINE_SIZE,
> > +ASSERT((trampoline_end - trampoline_start) < TRAMPOLINE_SPACE,
> >      "not enough room for trampoline")
>
> Didn't you mean to make sure there are at least two pages for
> MBI data?

Do you wish plain number here or constant like MBI_SIZE defined somewhere.
I think that constant is better thing.

Daniel
Jan Beulich Jan. 31, 2017, 3:14 p.m. UTC | #3
>>> On 31.01.17 at 15:23, <daniel.kiper@oracle.com> wrote:
> On Tue, Jan 31, 2017 at 05:33:12AM -0700, Jan Beulich wrote:
>> >>> On 25.01.17 at 23:49, <daniel.kiper@oracle.com> wrote:
>> > On Wed, Jan 25, 2017 at 04:20:30PM -0600, Doug Goldstein wrote:
>> > @@ -123,6 +116,15 @@ efi_platform:
>> >  .Lbad_ldr_nih: .asciz "ERR: EFI ImageHandle is not provided by bootloader!"
>> >  .Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
>> >
>> > +        .section .init.data, "a", @progbits
>>
>> This needs to be a writable section.
> 
> AIUI, it is by default but if you wish I can replace "a" with "aw" here.

To me it would smell like a binutils bug if any flag was added despite
the explicit setting here. Things would be different if you omitted
those arguments.

>> > +        /* Get topmost low-memory stack address. */
>> > +        add     $TRAMPOLINE_SPACE,%ecx
>>
>> The top-most address of the stack is
>> %ecx + TRAMPOLINE_SPACE + TRAMPOLINE_STACK_SPACE - 1.
>> Please don't add misleading comments.
> 
> Right, it is misleading. Do you think that:
> 
> Get the lowest low-memory stack address.
> 
> ...is better?

That or "bottom-most" (to avoid the double "low").

>> >          /*
>> > +         * Now trampoline_phys points to the following structure (lowest
>> > +         * address is at the top):
>> > +         *
>> > +         * +------------------------+
>> > +         * |    TRAMPOLINE_SPACE    |
>> > +         * +- - - - - - - - - - - - +
>> > +         * |       mbi struct       |
>> > +         * +------------------------+
>> > +         * | TRAMPOLINE_STACK_SPACE |
>> > +         * +------------------------+
>> > +         *
>> > +         * mbi struct lives at the end of TRAMPOLINE_SPACE. The rest of
>> > +         * TRAMPOLINE_SPACE is reserved for trampoline code and data.
>> > +         */
>>
>> Please can you clarify here that the MBI data grows downwards
>> from the beginning of the stack to the end of the trampoline?
> 
> OK, but I think that "beginning of the stack" should be replaced
> with "the end of TRAMPOLINE_SPACE" here.

That would be in lines with the #define-s, but not with the drawing
above (which btw also applies to the comment that's there above).

>> > --- a/xen/arch/x86/efi/stub.c
>> > +++ b/xen/arch/x86/efi/stub.c
>> > @@ -20,7 +20,8 @@
>> >  paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
>> >                                         EFI_SYSTEM_TABLE *SystemTable)
>> >  {
>> > -    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
>> > +    static const CHAR16 __initconst err[] =
>> > +        L"(XEN) Xen does not have EFI code build in!\r\n(XEN) System halted!\r\n";
>>
>> Why did you add these (XEN) prefixes?
> 
> To align message format with messages printed from most places. And I realized
> that it will be nice to do the same thing in head.S and the rest of EFI code.
> This way it is much easier to differentiate between a bootloader and Xen messages.

I disagree. Those prefixes should be used only for messages ending
up in Xen's log.

>> > --- a/xen/arch/x86/xen.lds.S
>> > +++ b/xen/arch/x86/xen.lds.S
>> > @@ -334,5 +334,5 @@ ASSERT(IS_ALIGNED(trampoline_end,   4), "trampoline_end
>> > misaligned")
>> >  ASSERT(IS_ALIGNED(__bss_start,      8), "__bss_start misaligned")
>> >  ASSERT(IS_ALIGNED(__bss_end,        8), "__bss_end misaligned")
>> >
>> > -ASSERT((trampoline_end - trampoline_start) < MB_TRAMPOLINE_SIZE,
>> > +ASSERT((trampoline_end - trampoline_start) < TRAMPOLINE_SPACE,
>> >      "not enough room for trampoline")
>>
>> Didn't you mean to make sure there are at least two pages for
>> MBI data?
> 
> Do you wish plain number here or constant like MBI_SIZE defined somewhere.
> I think that constant is better thing.

I'd prefer MBI_SPACE_MIN (or whatever else making clear that this
is a bound, not an exact size).

Jan

Patch
diff mbox

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index b8f727a..2ecdcb3 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -109,13 +109,6 @@  gdt_boot_descr:
         .long   sym_phys(trampoline_gdt)
         .long   0 /* Needed for 64-bit lgdt */
 
-        .align 4
-vga_text_buffer:
-        .long   0xb8000
-
-efi_platform:
-        .byte   0
-
 .Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
 .Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
 .Lbad_ldr_nbs: .asciz "ERR: Bootloader shutdown EFI x64 boot services!"
@@ -123,6 +116,15 @@  efi_platform:
 .Lbad_ldr_nih: .asciz "ERR: EFI ImageHandle is not provided by bootloader!"
 .Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
 
+        .section .init.data, "a", @progbits
+        .align 4
+
+vga_text_buffer:
+        .long   0xb8000
+
+efi_platform:
+        .byte   0
+
         .section .init.text, "ax", @progbits
 
 bad_cpu:
@@ -170,6 +172,12 @@  not_multiboot:
         .code64
 
 __efi64_mb2_start:
+        /*
+         * Multiboot2 spec says that here CPU is in 64-bit mode. However, there
+         * is also guarantee that all code and data is always put by the bootloader
+         * below 4 GiB. Hence, we can safely use in most cases 32-bit addressing.
+         */
+
         cld
 
         /* VGA is not available on EFI platforms. */
@@ -180,7 +188,7 @@  __efi64_mb2_start:
         je      .Lefi_multiboot2_proto
 
         /* Jump to not_multiboot after switching CPU to x86_32 mode. */
-        lea     not_multiboot(%rip),%edi
+        lea     not_multiboot(%rip),%r15d
         jmp     x86_32_switch
 
 .Lefi_multiboot2_proto:
@@ -197,7 +205,7 @@  __efi64_mb2_start:
         mov     %ecx,%r8d
         sub     %ebx,%r8d
         cmp     %r8d,MB2_fixed_total_size(%rbx)
-        jbe     run_bs
+        jbe     .Lrun_bs
 
         /* Are EFI boot services available? */
         cmpl    $MULTIBOOT2_TAG_TYPE_EFI_BS,MB2_tag_type(%rcx)
@@ -226,7 +234,7 @@  __efi64_mb2_start:
 
         /* Is it the end of Multiboot2 information? */
         cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%rcx)
-        je      run_bs
+        je      .Lrun_bs
 
 .Lefi_mb2_next_tag:
         /* Go to next Multiboot2 information tag. */
@@ -235,34 +243,32 @@  __efi64_mb2_start:
         and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
         jmp     .Lefi_mb2_tsize
 
-run_bs:
+.Lrun_bs:
         /* Are EFI boot services available? */
         cmpb    $0,efi_platform(%rip)
-        jnz     0f
 
         /* Jump to .Lmb2_no_bs after switching CPU to x86_32 mode. */
-        lea     .Lmb2_no_bs(%rip),%edi
-        jmp     x86_32_switch
+        lea     .Lmb2_no_bs(%rip),%r15d
+        jz      x86_32_switch
 
-0:
         /* Is EFI SystemTable address provided by boot loader? */
         test    %rsi,%rsi
-        jnz     1f
 
         /* Jump to .Lmb2_no_st after switching CPU to x86_32 mode. */
-        lea     .Lmb2_no_st(%rip),%edi
-        jmp     x86_32_switch
+        lea     .Lmb2_no_st(%rip),%r15d
+        jz      x86_32_switch
 
-1:
         /* Is EFI ImageHandle address provided by boot loader? */
         test    %rdi,%rdi
-        jnz     2f
 
         /* Jump to .Lmb2_no_ih after switching CPU to x86_32 mode. */
-        lea     .Lmb2_no_ih(%rip),%edi
-        jmp     x86_32_switch
+        lea     .Lmb2_no_ih(%rip),%r15d
+        jz      x86_32_switch
+
+        /* Align the stack as UEFI spec requires. */
+        add     $15,%rsp
+        and     $~15,%rsp
 
-2:
         push    %rax
         push    %rdi
 
@@ -280,13 +286,13 @@  run_bs:
 
         pop     %rdi
 
+        /* Align the stack as UEFI spec requires. */
+        push    %rdi
+
         /*
          * efi_multiboot2() is called according to System V AMD64 ABI:
-         *   - IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
-         *   - OUT: %rax - highest allocated memory address below 1 MiB;
-         *                 memory below this address is used for trampoline
-         *                 stack, trampoline itself and as a storage for
-         *                 mbi struct created in reloc().
+         *   - IN:  %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
+         *   - OUT: %rax - trampoline address.
          *
          * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
          * on EFI platforms. Hence, it could not be used like
@@ -298,12 +304,15 @@  run_bs:
         shr     $4,%eax
         mov     %eax,%ecx
 
+        pop     %rdi
         pop     %rax
 
         /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
-        lea     trampoline_setup(%rip),%edi
+        lea     trampoline_setup(%rip),%r15d
 
 x86_32_switch:
+        mov     %r15d,%edi
+
         cli
 
         /* Initialize GDTR. */
@@ -424,26 +433,44 @@  trampoline_bios_setup:
         cmp     %ecx,%edx           /* compare with BDA value */
         cmovb   %edx,%ecx           /* and use the smaller */
 
-        /* Reserve memory for the trampoline. */
-        sub     $((MB_TRAMPOLINE_SIZE+MB_TRAMPOLINE_STACK_SIZE)>>4),%ecx
+        /* Reserve memory for the trampoline and the low-memory stack. */
+        sub     $((TRAMPOLINE_SPACE+TRAMPOLINE_STACK_SPACE)>>4),%ecx
 
         /* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
         xor     %cl, %cl
 
 trampoline_setup:
-        /* Save trampoline address for later use. */
         shl     $4, %ecx
         mov     %ecx,sym_phys(trampoline_phys)
 
+        /* Get topmost low-memory stack address. */
+        add     $TRAMPOLINE_SPACE,%ecx
+
         /* Save the Multiboot info struct (after relocation) for later use. */
         mov     $sym_phys(cpu0_stack)+1024,%esp
-        push    %ecx                /* Boot trampoline address. */
+        push    %ecx                /* Topmost low-memory stack address. */
         push    %ebx                /* Multiboot information address. */
         push    %eax                /* Multiboot magic. */
         call    reloc
         mov     %eax,sym_phys(multiboot_ptr)
 
         /*
+         * Now trampoline_phys points to the following structure (lowest
+         * address is at the top):
+         *
+         * +------------------------+
+         * |    TRAMPOLINE_SPACE    |
+         * +- - - - - - - - - - - - +
+         * |       mbi struct       |
+         * +------------------------+
+         * | TRAMPOLINE_STACK_SPACE |
+         * +------------------------+
+         *
+         * mbi struct lives at the end of TRAMPOLINE_SPACE. The rest of
+         * TRAMPOLINE_SPACE is reserved for trampoline code and data.
+         */
+
+        /*
          * Do not zero BSS on EFI platform here.
          * It was initialized earlier.
          */
@@ -526,7 +553,7 @@  trampoline_setup:
 1:
         /* Switch to low-memory stack which lives at the end of trampoline region. */
         mov     sym_phys(trampoline_phys),%edi
-        lea     MB_TRAMPOLINE_SIZE+MB_TRAMPOLINE_STACK_SIZE(%edi),%esp
+        lea     TRAMPOLINE_SPACE+TRAMPOLINE_STACK_SPACE(%edi),%esp
         lea     trampoline_boot_cpu_entry-trampoline_start(%edi),%eax
         pushl   $BOOT_CS32
         push    %eax
diff --git a/xen/arch/x86/boot/reloc.c b/xen/arch/x86/boot/reloc.c
index 0f2e372..b992678 100644
--- a/xen/arch/x86/boot/reloc.c
+++ b/xen/arch/x86/boot/reloc.c
@@ -16,7 +16,7 @@ 
  * This entry point is entered from xen/arch/x86/boot/head.S with:
  *   - 0x4(%esp) = MULTIBOOT_MAGIC,
  *   - 0x8(%esp) = MULTIBOOT_INFORMATION_ADDRESS,
- *   - 0xc(%esp) = BOOT_TRAMPOLINE_ADDRESS.
+ *   - 0xc(%esp) = TOPMOST_LOW_MEMORY_STACK_ADDRESS.
  */
 asm (
     "    .text                         \n"
@@ -235,3 +235,13 @@  multiboot_info_t __stdcall *reloc(u32 mb_magic, u32 mbi_in, u32 trampoline)
     else
         return mbi_reloc(mbi_in);
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
index c1285ad..d193847 100644
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -100,10 +100,11 @@  static void __init relocate_trampoline(unsigned long phys)
 {
     const s32 *trampoline_ptr;
 
-    if ( !efi_enabled(EFI_LOADER) || trampoline_phys )
+    trampoline_phys = phys;
+
+    if ( !efi_enabled(EFI_LOADER) )
         return;
 
-    trampoline_phys = phys;
     /* Apply relocations to trampoline. */
     for ( trampoline_ptr = __trampoline_rel_start;
           trampoline_ptr < __trampoline_rel_stop;
@@ -213,10 +214,12 @@  static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
 
 static void __init efi_arch_pre_exit_boot(void)
 {
-    if ( !cfg.addr )
-        blexit(L"No memory for trampoline");
-
-    relocate_trampoline(cfg.addr);
+    if ( !trampoline_phys )
+    {
+        if ( !cfg.addr )
+            blexit(L"No memory for trampoline");
+        relocate_trampoline(cfg.addr);
+    }
 }
 
 static void __init noreturn efi_arch_post_exit_boot(void)
@@ -555,7 +558,7 @@  static void __init efi_arch_memory_setup(void)
     if ( efi_enabled(EFI_LOADER) )
         cfg.size = trampoline_end - trampoline_start;
     else
-        cfg.size = MB_TRAMPOLINE_SIZE + MB_TRAMPOLINE_STACK_SIZE + MBI_SIZE;
+        cfg.size = TRAMPOLINE_SPACE + TRAMPOLINE_STACK_SPACE;
 
     status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData,
                                    PFN_UP(cfg.size), &cfg.addr);
@@ -696,8 +699,8 @@  paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTa
 
     efi_exit_boot(ImageHandle, SystemTable);
 
-    /* Return highest allocated memory address below 1 MiB. */
-    return cfg.addr + cfg.size;
+    /* Return trampoline address. */
+    return trampoline_phys;
 }
 
 /*
diff --git a/xen/arch/x86/efi/stub.c b/xen/arch/x86/efi/stub.c
index b81adc0..8df9ba2 100644
--- a/xen/arch/x86/efi/stub.c
+++ b/xen/arch/x86/efi/stub.c
@@ -20,7 +20,8 @@ 
 paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
                                        EFI_SYSTEM_TABLE *SystemTable)
 {
-    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
+    static const CHAR16 __initconst err[] =
+        L"(XEN) Xen does not have EFI code build in!\r\n(XEN) System halted!\r\n";
     SIMPLE_TEXT_OUTPUT_INTERFACE *StdErr;
 
     StdErr = SystemTable->StdErr ? SystemTable->StdErr : SystemTable->ConOut;
@@ -36,7 +37,7 @@  paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
     "    call *%2                     \n"
     "0:  hlt                          \n"
     "    jmp  0b                      \n"
-       : "+c" (StdErr), "+d" (err) : "g" (StdErr->OutputString)
+       : "+c" (StdErr), "+d" (err) : "rm" (StdErr->OutputString)
        : "rax", "r8", "r9", "r10", "r11", "memory");
 
     unreachable();
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 3a02b2b..addf2ef 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -334,5 +334,5 @@  ASSERT(IS_ALIGNED(trampoline_end,   4), "trampoline_end misaligned")
 ASSERT(IS_ALIGNED(__bss_start,      8), "__bss_start misaligned")
 ASSERT(IS_ALIGNED(__bss_end,        8), "__bss_end misaligned")
 
-ASSERT((trampoline_end - trampoline_start) < MB_TRAMPOLINE_SIZE,
+ASSERT((trampoline_end - trampoline_start) < TRAMPOLINE_SPACE,
     "not enough room for trampoline")
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index a86ea12..9cd05a2 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -73,10 +73,8 @@ 
 #define STACK_ORDER 3
 #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
 
-#define MB_TRAMPOLINE_STACK_SIZE        PAGE_SIZE
-#define MB_TRAMPOLINE_SIZE              (KB(64) - MB_TRAMPOLINE_STACK_SIZE)
-
-#define MBI_SIZE                        (2 * PAGE_SIZE)
+#define TRAMPOLINE_STACK_SPACE  PAGE_SIZE
+#define TRAMPOLINE_SPACE        (KB(64) - TRAMPOLINE_STACK_SPACE)
 
 /* Primary stack is restricted to 8kB by guard pages. */
 #define PRIMARY_STACK_SIZE 8192