[v12,05/10] x86: add multiboot2 protocol support for EFI platforms
diff mbox

Message ID 1484876060-2236-6-git-send-email-daniel.kiper@oracle.com
State New, archived
Headers show

Commit Message

Daniel Kiper Jan. 20, 2017, 1:34 a.m. UTC
This way Xen can be loaded on EFI platforms using GRUB2 and
other boot loaders which support multiboot2 protocol.

Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
---
v12 - suggestions/fixes:
    - rename __efi64_start to __efi64_mb2_start
      (suggested by Andrew Cooper),
    - use efi_arch_memory_setup() machinery as trampoline
      et consortes main memory allocator
      (suggested by Doug Goldstein),
    - allocate space for mbi struct in efi_arch_memory_setup() too;
      this thing was not taken into account in earlier releases,
    - revert trampoline et consortes fallback memory allocator code
      in efi_arch_process_memory_map() to current upstream state
      (suggested by Doug Goldstein),
    - further simplify efi_arch_pre_exit_boot() code,
    - call efi_arch_memory_setup() from efi_multiboot2()
      (suggested by Doug Goldstein),
    - fix asembly call argument in xen/arch/x86/efi/stub.c
      (suggested by Andrew Cooper),
    - add ASSERT() for trampoline size
      (suggested by Doug Goldstein),
    - add KB() macro
      (suggested by Doug Goldstein),
    - improve comments
      (suggested by Andrew Cooper and Doug Goldstein).

v10 - suggestions/fixes:
    - replace ljmpl with lretq
      (suggested by Andrew Cooper),
    - introduce efi_platform to increase code readability
      (suggested by Andrew Cooper).

v9 - suggestions/fixes:
   - use .L labels instead of numeric ones in multiboot2 data scanning loops
     (suggested by Jan Beulich).

v8 - suggestions/fixes:
   - use __bss_start(%rip)/__bss_end(%rip) instead of
     of .startof.(.bss)(%rip)/$.sizeof.(.bss) because
     latter is not tested extensively in different
     built environments yet
     (suggested by Andrew Cooper),
   - fix multiboot2 data scanning loop in x86_32 code
     (suggested by Jan Beulich),
   - add check for extra mem for mbi data if Xen is loaded
     via multiboot2 protocol on EFI platform
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich).

v7 - suggestions/fixes:
   - do not allocate twice memory for trampoline if we were
     loaded via multiboot2 protocol on EFI platform,
   - wrap long line
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich).

v6 - suggestions/fixes:
   - improve label names in assembly
     error printing code
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich),
   - various minor cleanups and fixes
     (suggested by Jan Beulich).

v4 - suggestions/fixes:
   - remove redundant BSS alignment,
   - update BSS alignment check,
   - use __set_bit() instead of set_bit() if possible
     (suggested by Jan Beulich),
   - call efi_arch_cpu() from efi_multiboot2()
     even if the same work is done later in
     other place right now
     (suggested by Jan Beulich),
   - xen/arch/x86/efi/stub.c:efi_multiboot2()
     fail properly on EFI platforms,
   - do not read data beyond the end of multiboot2
     information in xen/arch/x86/boot/head.S
     (suggested by Jan Beulich),
   - use 32-bit registers in x86_64 code if possible
     (suggested by Jan Beulich),
   - multiboot2 information address is 64-bit
     in x86_64 code, so, treat it is as is
     (suggested by Jan Beulich),
   - use cmovcc if possible,
   - leave only one space between rep and stosq
     (suggested by Jan Beulich),
   - improve error handling,
   - improve early error messages,
     (suggested by Jan Beulich),
   - improve early error messages printing code,
   - improve label names
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich),
   - various minor cleanups.

v3 - suggestions/fixes:
   - take into account alignment when skipping multiboot2 fixed part
     (suggested by Konrad Rzeszutek Wilk),
   - improve segment registers initialization
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich and Konrad Rzeszutek Wilk),
   - improve commit message
     (suggested by Jan Beulich).

v2 - suggestions/fixes:
   - generate multiboot2 header using macros
     (suggested by Jan Beulich),
   - switch CPU to x86_32 mode before
     jumping to 32-bit code
     (suggested by Andrew Cooper),
   - reduce code changes to increase patch readability
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich),
   - ignore MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag on EFI platform
     and find on my own multiboot2.mem_lower value,
   - stop execution if EFI platform is detected
     in legacy BIOS path.
---
 xen/arch/x86/boot/head.S          |  268 ++++++++++++++++++++++++++++++++++---
 xen/arch/x86/efi/efi-boot.h       |   61 ++++++++-
 xen/arch/x86/efi/stub.c           |   38 ++++++
 xen/arch/x86/x86_64/asm-offsets.c |    2 +
 xen/arch/x86/xen.lds.S            |    7 +-
 xen/common/efi/boot.c             |   11 ++
 xen/include/asm-x86/config.h      |    5 +
 xen/include/xen/config.h          |    1 +
 8 files changed, 366 insertions(+), 27 deletions(-)

Comments

Doug Goldstein Jan. 20, 2017, 4:37 a.m. UTC | #1
On 1/19/17 8:34 PM, Daniel Kiper wrote:

> diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
> index 84cf44d..b8f727a 100644
> --- a/xen/arch/x86/boot/head.S
> +++ b/xen/arch/x86/boot/head.S

> -2:      /* Reserve 64kb for the trampoline */
> -        sub     $0x1000,%ecx
> +        /* Reserve memory for the trampoline. */

/* Reserve memory for the trampoline and the low memory stack */

> +        sub     $((MB_TRAMPOLINE_SIZE+MB_TRAMPOLINE_STACK_SIZE)>>4),%ecx
>  
>          /* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
>          xor     %cl, %cl

> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
> index 62c010e..c1285ad 100644
> --- a/xen/arch/x86/efi/efi-boot.h
> +++ b/xen/arch/x86/efi/efi-boot.h

> @@ -550,7 +551,12 @@ static void __init efi_arch_memory_setup(void)
>  
>      /* Allocate space for trampoline (in first Mb). */
>      cfg.addr = 0x100000;
> -    cfg.size = trampoline_end - trampoline_start;
> +
> +    if ( efi_enabled(EFI_LOADER) )
> +        cfg.size = trampoline_end - trampoline_start;
> +    else
> +        cfg.size = MB_TRAMPOLINE_SIZE + MB_TRAMPOLINE_STACK_SIZE + MBI_SIZE;

Why MBI_SIZE? I don't see that being copied in that region or living there?
Jan Beulich Jan. 20, 2017, 9:46 a.m. UTC | #2
>>> On 20.01.17 at 02:34, <daniel.kiper@oracle.com> wrote:
> @@ -100,20 +107,48 @@ multiboot2_header_start:
>  gdt_boot_descr:
>          .word   6*8-1
>          .long   sym_phys(trampoline_gdt)
> +        .long   0 /* Needed for 64-bit lgdt */
> +
> +        .align 4
> +vga_text_buffer:
> +        .long   0xb8000
> +
> +efi_platform:
> +        .byte   0

You mean to modify these fields, but you add them to a r/o section.

> +.Lefi_multiboot2_proto:
> +        /* Zero EFI SystemTable and EFI ImageHandle addresses. */
> +        xor     %esi,%esi
> +        xor     %edi,%edi
> +
> +        /* Skip Multiboot2 information fixed part. */
> +        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%ecx

In an earlier reply to Andrew's inquiry regarding the possible
truncation here you said that grub can be made obey to such a
load restriction. None of the tags added here or in patch 2
appear to have such an effect, so would you please clarify how
the address restriction is being enforced?

> +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
> +
> +.Lefi_mb2_tsize:
> +        /* Check Multiboot2 information total size. */
> +        mov     %ecx,%r8d
> +        sub     %ebx,%r8d
> +        cmp     %r8d,MB2_fixed_total_size(%rbx)
> +        jbe     run_bs
> +
> +        /* Are EFI boot services available? */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI_BS,MB2_tag_type(%rcx)
> +        jne     .Lefi_mb2_st
> +
> +        /* We are on EFI platform and EFI boot services are available. */
> +        incb    efi_platform(%rip)
> +
> +        /*
> +         * Disable real mode and other legacy stuff which should not
> +         * be run on EFI platforms.
> +         */
> +        incb    skip_realmode(%rip)
> +        jmp     .Lefi_mb2_next_tag
> +
> +.Lefi_mb2_st:
> +        /* Get EFI SystemTable address from Multiboot2 information. */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%rcx)
> +        cmove   MB2_efi64_st(%rcx),%rsi
> +        je      .Lefi_mb2_next_tag
> +
> +        /* Get EFI ImageHandle address from Multiboot2 information. */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64_IH,MB2_tag_type(%rcx)
> +        cmove   MB2_efi64_ih(%rcx),%rdi
> +        je      .Lefi_mb2_next_tag
> +
> +        /* Is it the end of Multiboot2 information? */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%rcx)
> +        je      run_bs
> +
> +.Lefi_mb2_next_tag:
> +        /* Go to next Multiboot2 information tag. */
> +        add     MB2_tag_size(%rcx),%ecx
> +        add     $(MULTIBOOT2_TAG_ALIGN-1),%ecx
> +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
> +        jmp     .Lefi_mb2_tsize
> +
> +run_bs:

.Lrun_bs:

> +        /* Are EFI boot services available? */
> +        cmpb    $0,efi_platform(%rip)
> +        jnz     0f
> +
> +        /* Jump to .Lmb2_no_bs after switching CPU to x86_32 mode. */
> +        lea     .Lmb2_no_bs(%rip),%edi
> +        jmp     x86_32_switch
> +0:

I realize you need to avoid clobbering %rdi in the non-error case,
but the register choice seems sub-optimal: If you used a register
which you can clobber here (e.g. %edx as it seems, using %edi in
its place at x86_32_switch and cs32_switch), you could simplify
this to

    cmpb ...
    lea ...
    je x86_32_switch

at once avoiding all the numeric labels here.

> +2:
> +        push    %rax
> +        push    %rdi
> +
> +        /*
> +         * Initialize BSS (no nasty surprises!).
> +         * It must be done earlier than in BIOS case
> +         * because efi_multiboot2() touches it.
> +         */
> +        lea     __bss_start(%rip),%edi
> +        lea     __bss_end(%rip),%ecx
> +        sub     %edi,%ecx
> +        shr     $3,%ecx
> +        xor     %eax,%eax
> +        rep stosq
> +
> +        pop     %rdi
> +
> +        /*
> +         * efi_multiboot2() is called according to System V AMD64 ABI:
> +         *   - IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
> +         *   - OUT: %rax - highest allocated memory address below 1 MiB;
> +         *                 memory below this address is used for trampoline
> +         *                 stack, trampoline itself and as a storage for
> +         *                 mbi struct created in reloc().
> +         *
> +         * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
> +         * on EFI platforms. Hence, it could not be used like
> +         * on legacy BIOS platforms.
> +         */
> +        call    efi_multiboot2

Now that we had a need for commit f6b7fedc89 ("x86/EFI: meet
further spec requirements for runtime calls") I'm worried about stack
alignment here. Does GrUB call or jmp to our entry point (and is that
firmly specified to be the case)? Perhaps it would be a good idea to
align the stack earlier on in any case. Or if not (and if alignment at
this call is ABI conforming), a comment should be left here to warn
people of future modifications to the amount of items pushed onto /
popped off the stack.

> +trampoline_setup:
> +        /* Save trampoline address for later use. */
>          shl     $4, %ecx
>          mov     %ecx,sym_phys(trampoline_phys)

I don't think this comment is very useful.

> --- a/xen/arch/x86/efi/efi-boot.h
> +++ b/xen/arch/x86/efi/efi-boot.h
> @@ -100,6 +100,9 @@ static void __init relocate_trampoline(unsigned long phys)
>  {
>      const s32 *trampoline_ptr;
>  
> +    if ( !efi_enabled(EFI_LOADER) || trampoline_phys )
> +        return;
> +
>      trampoline_phys = phys;
>      /* Apply relocations to trampoline. */
>      for ( trampoline_ptr = __trampoline_rel_start;
> @@ -210,12 +213,10 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
>  
>  static void __init efi_arch_pre_exit_boot(void)
>  {
> -    if ( !trampoline_phys )
> -    {
> -        if ( !cfg.addr )
> -            blexit(L"No memory for trampoline");
> -        relocate_trampoline(cfg.addr);
> -    }
> +    if ( !cfg.addr )
> +        blexit(L"No memory for trampoline");
> +
> +    relocate_trampoline(cfg.addr);
>  }

Why can't this function be left untouched, and the change to
relocate_trampoline() above also be reduced or even be removed
altogether?

> --- a/xen/arch/x86/efi/stub.c
> +++ b/xen/arch/x86/efi/stub.c
> @@ -3,6 +3,44 @@
>  #include <xen/init.h>
>  #include <xen/lib.h>
>  #include <asm/page.h>
> +#include <asm/efibind.h>
> +#include <efi/efidef.h>
> +#include <efi/eficapsule.h>
> +#include <efi/eficon.h>
> +#include <efi/efidevp.h>
> +#include <efi/efiapi.h>
> +
> +/*
> + * Here we are in EFI stub. EFI calls are not supported due to lack
> + * of relevant functionality in compiler and/or linker.
> + *
> + * efi_multiboot2() is an exception. Please look below for more details.
> + */
> +
> +paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
> +                                       EFI_SYSTEM_TABLE *SystemTable)
> +{
> +    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";

static const CHAR16 __initconst err[]

And please reduce the number of exclamation marks.

> +    SIMPLE_TEXT_OUTPUT_INTERFACE *StdErr;
> +
> +    StdErr = SystemTable->StdErr ? SystemTable->StdErr : SystemTable->ConOut;
> +
> +    /*
> +     * Print error message and halt the system.
> +     *
> +     * We have to open code MS x64 calling convention
> +     * in assembly because here this convention may
> +     * not be directly supported by C compiler.
> +     */
> +    asm volatile(
> +    "    call *%2                     \n"
> +    "0:  hlt                          \n"
> +    "    jmp  0b                      \n"
> +       : "+c" (StdErr), "+d" (err) : "g" (StdErr->OutputString)

The "g" really wants to be "rm": While the kind of expression doesn't
really allow for an immediate, you still shouldn't give wrong examples
of constraints (with the * now properly added to the call operand, it
doesn't allow for an immediate anymore).

> --- a/xen/include/asm-x86/config.h
> +++ b/xen/include/asm-x86/config.h
> @@ -73,6 +73,11 @@
>  #define STACK_ORDER 3
>  #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
>  
> +#define MB_TRAMPOLINE_STACK_SIZE        PAGE_SIZE
> +#define MB_TRAMPOLINE_SIZE              (KB(64) - MB_TRAMPOLINE_STACK_SIZE)

What's the reason for the MB_ prefixes here? The trampoline is
always the same size, isn't it? Nor am I convinced we really need
two defines - except in the link time assertion you always use
the sum of the two.

> +#define MBI_SIZE                        (2 * PAGE_SIZE)

On top of Doug's question - if it is needed at all, what does this
match up with, i.e. how come this is 2 pages (and not 1 or 3)?

Jan
Daniel Kiper Jan. 20, 2017, 11:43 a.m. UTC | #3
On Fri, Jan 20, 2017 at 02:46:47AM -0700, Jan Beulich wrote:
> >>> On 20.01.17 at 02:34, <daniel.kiper@oracle.com> wrote:
> > @@ -100,20 +107,48 @@ multiboot2_header_start:
> >  gdt_boot_descr:
> >          .word   6*8-1
> >          .long   sym_phys(trampoline_gdt)
> > +        .long   0 /* Needed for 64-bit lgdt */
> > +
> > +        .align 4
> > +vga_text_buffer:
> > +        .long   0xb8000
> > +
> > +efi_platform:
> > +        .byte   0
>
> You mean to modify these fields, but you add them to a r/o section.

Yep. So, I think that we should move them to .init.data. Is it OK for you?

> > +.Lefi_multiboot2_proto:
> > +        /* Zero EFI SystemTable and EFI ImageHandle addresses. */
> > +        xor     %esi,%esi
> > +        xor     %edi,%edi
> > +
> > +        /* Skip Multiboot2 information fixed part. */
> > +        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%ecx
>
> In an earlier reply to Andrew's inquiry regarding the possible
> truncation here you said that grub can be made obey to such a
> load restriction. None of the tags added here or in patch 2
> appear to have such an effect, so would you please clarify how
> the address restriction is being enforced?

GRUB2 itself does allocations for all multiboot2 stuff always below 4 GiB.
So, there is no need for extra tags.

Additionally, multiboot2 spec says this:

The bootloader must not load any part of the kernel, the modules, the Multiboot2
information structure, etc. higher than 4 GiB - 1. This requirement is put in
force because most of currently specified tags supports 32-bit addresses only.
Additionally, some kernels, even if they run on EFI 64-bit platform, still
execute some parts of its initialization code in 32-bit mode.

[...]

> > +.Lefi_mb2_next_tag:
> > +        /* Go to next Multiboot2 information tag. */
> > +        add     MB2_tag_size(%rcx),%ecx
> > +        add     $(MULTIBOOT2_TAG_ALIGN-1),%ecx
> > +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
> > +        jmp     .Lefi_mb2_tsize
> > +
> > +run_bs:
>
> .Lrun_bs:

OK.

> > +        /* Are EFI boot services available? */
> > +        cmpb    $0,efi_platform(%rip)
> > +        jnz     0f
> > +
> > +        /* Jump to .Lmb2_no_bs after switching CPU to x86_32 mode. */
> > +        lea     .Lmb2_no_bs(%rip),%edi
> > +        jmp     x86_32_switch
> > +0:
>
> I realize you need to avoid clobbering %rdi in the non-error case,
> but the register choice seems sub-optimal: If you used a register
> which you can clobber here (e.g. %edx as it seems, using %edi in
> its place at x86_32_switch and cs32_switch), you could simplify
> this to
>
>     cmpb ...
>     lea ...
>     je x86_32_switch
>
> at once avoiding all the numeric labels here.

If you do not care that it will be always loaded then OK. However, I think
that %r15d is a bit better here because if we need to add another argument
to efi_multiboot2() and we use %edx then we must change code in more places.
Of course we should do "mov %r15d,%edi" after x86_32_switch label then.

> > +2:
> > +        push    %rax
> > +        push    %rdi
> > +
> > +        /*
> > +         * Initialize BSS (no nasty surprises!).
> > +         * It must be done earlier than in BIOS case
> > +         * because efi_multiboot2() touches it.
> > +         */
> > +        lea     __bss_start(%rip),%edi
> > +        lea     __bss_end(%rip),%ecx
> > +        sub     %edi,%ecx
> > +        shr     $3,%ecx
> > +        xor     %eax,%eax
> > +        rep stosq
> > +
> > +        pop     %rdi
> > +
> > +        /*
> > +         * efi_multiboot2() is called according to System V AMD64 ABI:
> > +         *   - IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
> > +         *   - OUT: %rax - highest allocated memory address below 1 MiB;
> > +         *                 memory below this address is used for trampoline
> > +         *                 stack, trampoline itself and as a storage for
> > +         *                 mbi struct created in reloc().
> > +         *
> > +         * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
> > +         * on EFI platforms. Hence, it could not be used like
> > +         * on legacy BIOS platforms.
> > +         */
> > +        call    efi_multiboot2
>
> Now that we had a need for commit f6b7fedc89 ("x86/EFI: meet
> further spec requirements for runtime calls") I'm worried about stack
> alignment here. Does GrUB call or jmp to our entry point (and is that
> firmly specified to be the case)? Perhaps it would be a good idea to
> align the stack earlier on in any case. Or if not (and if alignment at
> this call is ABI conforming), a comment should be left here to warn
> people of future modifications to the amount of items pushed onto /
> popped off the stack.

Multiboot2 spec requires that stack, among others, is passed to loaded
image according to UEFI spec. Though, IIRC, there are no extra stack checks
there. Maybe we should add something to GRUB2 if it does not exists.

> > +trampoline_setup:
> > +        /* Save trampoline address for later use. */
> >          shl     $4, %ecx
> >          mov     %ecx,sym_phys(trampoline_phys)
>
> I don't think this comment is very useful.

I will drop it if you wish.

> > --- a/xen/arch/x86/efi/efi-boot.h
> > +++ b/xen/arch/x86/efi/efi-boot.h
> > @@ -100,6 +100,9 @@ static void __init relocate_trampoline(unsigned long phys)
> >  {
> >      const s32 *trampoline_ptr;
> >
> > +    if ( !efi_enabled(EFI_LOADER) || trampoline_phys )
> > +        return;
> > +
> >      trampoline_phys = phys;
> >      /* Apply relocations to trampoline. */
> >      for ( trampoline_ptr = __trampoline_rel_start;
> > @@ -210,12 +213,10 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
> >
> >  static void __init efi_arch_pre_exit_boot(void)
> >  {
> > -    if ( !trampoline_phys )
> > -    {
> > -        if ( !cfg.addr )
> > -            blexit(L"No memory for trampoline");
> > -        relocate_trampoline(cfg.addr);
> > -    }
> > +    if ( !cfg.addr )
> > +        blexit(L"No memory for trampoline");
> > +
> > +    relocate_trampoline(cfg.addr);
> >  }
>
> Why can't this function be left untouched, and the change to
> relocate_trampoline() above also be reduced or even be removed
> altogether?

There is pretty good chance that efi_arch_pre_exit_boot() can be left
untouched though relocate_trampoline() needs at least

if ( !efi_enabled(EFI_LOADER) )
    return;

> > --- a/xen/arch/x86/efi/stub.c
> > +++ b/xen/arch/x86/efi/stub.c
> > @@ -3,6 +3,44 @@
> >  #include <xen/init.h>
> >  #include <xen/lib.h>
> >  #include <asm/page.h>
> > +#include <asm/efibind.h>
> > +#include <efi/efidef.h>
> > +#include <efi/eficapsule.h>
> > +#include <efi/eficon.h>
> > +#include <efi/efidevp.h>
> > +#include <efi/efiapi.h>
> > +
> > +/*
> > + * Here we are in EFI stub. EFI calls are not supported due to lack
> > + * of relevant functionality in compiler and/or linker.
> > + *
> > + * efi_multiboot2() is an exception. Please look below for more details.
> > + */
> > +
> > +paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
> > +                                       EFI_SYSTEM_TABLE *SystemTable)
> > +{
> > +    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
>
> static const CHAR16 __initconst err[]
>
> And please reduce the number of exclamation marks.

OK.

> > +    SIMPLE_TEXT_OUTPUT_INTERFACE *StdErr;
> > +
> > +    StdErr = SystemTable->StdErr ? SystemTable->StdErr : SystemTable->ConOut;
> > +
> > +    /*
> > +     * Print error message and halt the system.
> > +     *
> > +     * We have to open code MS x64 calling convention
> > +     * in assembly because here this convention may
> > +     * not be directly supported by C compiler.
> > +     */
> > +    asm volatile(
> > +    "    call *%2                     \n"
> > +    "0:  hlt                          \n"
> > +    "    jmp  0b                      \n"
> > +       : "+c" (StdErr), "+d" (err) : "g" (StdErr->OutputString)
>
> The "g" really wants to be "rm": While the kind of expression doesn't
> really allow for an immediate, you still shouldn't give wrong examples
> of constraints (with the * now properly added to the call operand, it
> doesn't allow for an immediate anymore).

I was considering that change once but I was not sure. Though if you
think that it make sense I will do that.

> > --- a/xen/include/asm-x86/config.h
> > +++ b/xen/include/asm-x86/config.h
> > @@ -73,6 +73,11 @@
> >  #define STACK_ORDER 3
> >  #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
> >
> > +#define MB_TRAMPOLINE_STACK_SIZE        PAGE_SIZE
> > +#define MB_TRAMPOLINE_SIZE              (KB(64) - MB_TRAMPOLINE_STACK_SIZE)
>
> What's the reason for the MB_ prefixes here? The trampoline is
> always the same size, isn't it? Nor am I convinced we really need

Please take a look at efi_arch_memory_setup(). Amount of memory allocated
for trampoline and other stuff depends on boot loader type. So, when I use
"MB_" I would like underline that this is relevant for multiboot protocols.
Though I think that we can use the same size for all protocols. However,
I would not like to touch native EFI loader case.

> two defines - except in the link time assertion you always use
> the sum of the two.
>
> > +#define MBI_SIZE                        (2 * PAGE_SIZE)
>
> On top of Doug's question - if it is needed at all, what does this

Please look around reloc() call in xen/arch/x86/boot/head.S. You quickly
realize that there is following memory layout from top to bottom:

        +------------------+
        | TRAMPOLINE_STACK |
        +------------------+
        |    TRAMPOLINE    |
        +------------------+
        |    mbi struct    |
        +------------------+

> match up with, i.e. how come this is 2 pages (and not 1 or 3)?

Just thought that 8 KiB will be sufficient for Xen/modules arguments,
memory map and other minor things.

Daniel
Jan Beulich Jan. 20, 2017, 12:40 p.m. UTC | #4
>>> On 20.01.17 at 12:43, <daniel.kiper@oracle.com> wrote:
> On Fri, Jan 20, 2017 at 02:46:47AM -0700, Jan Beulich wrote:
>> >>> On 20.01.17 at 02:34, <daniel.kiper@oracle.com> wrote:
>> > @@ -100,20 +107,48 @@ multiboot2_header_start:
>> >  gdt_boot_descr:
>> >          .word   6*8-1
>> >          .long   sym_phys(trampoline_gdt)
>> > +        .long   0 /* Needed for 64-bit lgdt */
>> > +
>> > +        .align 4
>> > +vga_text_buffer:
>> > +        .long   0xb8000
>> > +
>> > +efi_platform:
>> > +        .byte   0
>>
>> You mean to modify these fields, but you add them to a r/o section.
> 
> Yep. So, I think that we should move them to .init.data. Is it OK for you?

That's what I'm asking for, yes.

>> > +.Lefi_multiboot2_proto:
>> > +        /* Zero EFI SystemTable and EFI ImageHandle addresses. */
>> > +        xor     %esi,%esi
>> > +        xor     %edi,%edi
>> > +
>> > +        /* Skip Multiboot2 information fixed part. */
>> > +        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%ecx
>>
>> In an earlier reply to Andrew's inquiry regarding the possible
>> truncation here you said that grub can be made obey to such a
>> load restriction. None of the tags added here or in patch 2
>> appear to have such an effect, so would you please clarify how
>> the address restriction is being enforced?
> 
> GRUB2 itself does allocations for all multiboot2 stuff always below 4 GiB.
> So, there is no need for extra tags.
> 
> Additionally, multiboot2 spec says this:
> 
> The bootloader must not load any part of the kernel, the modules, the Multiboot2
> information structure, etc. higher than 4 GiB - 1. This requirement is put in
> force because most of currently specified tags supports 32-bit addresses only.
> Additionally, some kernels, even if they run on EFI 64-bit platform, still
> execute some parts of its initialization code in 32-bit mode.

Okay, that's good enough for now, even if it escapes me how that's
supposed to work on systems without any memory below 4Gb.

>> > +        /* Are EFI boot services available? */
>> > +        cmpb    $0,efi_platform(%rip)
>> > +        jnz     0f
>> > +
>> > +        /* Jump to .Lmb2_no_bs after switching CPU to x86_32 mode. */
>> > +        lea     .Lmb2_no_bs(%rip),%edi
>> > +        jmp     x86_32_switch
>> > +0:
>>
>> I realize you need to avoid clobbering %rdi in the non-error case,
>> but the register choice seems sub-optimal: If you used a register
>> which you can clobber here (e.g. %edx as it seems, using %edi in
>> its place at x86_32_switch and cs32_switch), you could simplify
>> this to
>>
>>     cmpb ...
>>     lea ...
>>     je x86_32_switch
>>
>> at once avoiding all the numeric labels here.
> 
> If you do not care that it will be always loaded then OK.

That's okay, of course. The main thing is that we should prefer the
one branch variant over the two branches one.

> However, I think
> that %r15d is a bit better here because if we need to add another argument
> to efi_multiboot2() and we use %edx then we must change code in more places.
> Of course we should do "mov %r15d,%edi" after x86_32_switch label then.

As long as all affected code lives outside of the trampoline, using
any of the high 8 registers is certainly fine here.

>> > +2:
>> > +        push    %rax
>> > +        push    %rdi
>> > +
>> > +        /*
>> > +         * Initialize BSS (no nasty surprises!).
>> > +         * It must be done earlier than in BIOS case
>> > +         * because efi_multiboot2() touches it.
>> > +         */
>> > +        lea     __bss_start(%rip),%edi
>> > +        lea     __bss_end(%rip),%ecx
>> > +        sub     %edi,%ecx
>> > +        shr     $3,%ecx
>> > +        xor     %eax,%eax
>> > +        rep stosq
>> > +
>> > +        pop     %rdi
>> > +
>> > +        /*
>> > +         * efi_multiboot2() is called according to System V AMD64 ABI:
>> > +         *   - IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
>> > +         *   - OUT: %rax - highest allocated memory address below 1 MiB;
>> > +         *                 memory below this address is used for trampoline
>> > +         *                 stack, trampoline itself and as a storage for
>> > +         *                 mbi struct created in reloc().
>> > +         *
>> > +         * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
>> > +         * on EFI platforms. Hence, it could not be used like
>> > +         * on legacy BIOS platforms.
>> > +         */
>> > +        call    efi_multiboot2
>>
>> Now that we had a need for commit f6b7fedc89 ("x86/EFI: meet
>> further spec requirements for runtime calls") I'm worried about stack
>> alignment here. Does GrUB call or jmp to our entry point (and is that
>> firmly specified to be the case)? Perhaps it would be a good idea to
>> align the stack earlier on in any case. Or if not (and if alignment at
>> this call is ABI conforming), a comment should be left here to warn
>> people of future modifications to the amount of items pushed onto /
>> popped off the stack.
> 
> Multiboot2 spec requires that stack, among others, is passed to loaded
> image according to UEFI spec. Though, IIRC, there are no extra stack checks
> there. Maybe we should add something to GRUB2 if it does not exists.

That would imply they do a call (and not a jmp), which would make
the present code correct afaict. As said, imo there should still be a
warning added here, for anyone wanting to modify the stack layout.

>> > --- a/xen/arch/x86/efi/efi-boot.h
>> > +++ b/xen/arch/x86/efi/efi-boot.h
>> > @@ -100,6 +100,9 @@ static void __init relocate_trampoline(unsigned long phys)
>> >  {
>> >      const s32 *trampoline_ptr;
>> >
>> > +    if ( !efi_enabled(EFI_LOADER) || trampoline_phys )
>> > +        return;
>> > +
>> >      trampoline_phys = phys;
>> >      /* Apply relocations to trampoline. */
>> >      for ( trampoline_ptr = __trampoline_rel_start;
>> > @@ -210,12 +213,10 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
>> >
>> >  static void __init efi_arch_pre_exit_boot(void)
>> >  {
>> > -    if ( !trampoline_phys )
>> > -    {
>> > -        if ( !cfg.addr )
>> > -            blexit(L"No memory for trampoline");
>> > -        relocate_trampoline(cfg.addr);
>> > -    }
>> > +    if ( !cfg.addr )
>> > +        blexit(L"No memory for trampoline");
>> > +
>> > +    relocate_trampoline(cfg.addr);
>> >  }
>>
>> Why can't this function be left untouched, and the change to
>> relocate_trampoline() above also be reduced or even be removed
>> altogether?
> 
> There is pretty good chance that efi_arch_pre_exit_boot() can be left
> untouched though relocate_trampoline() needs at least
> 
> if ( !efi_enabled(EFI_LOADER) )
>     return;

Right, hence the "reduced".

>> > +    SIMPLE_TEXT_OUTPUT_INTERFACE *StdErr;
>> > +
>> > +    StdErr = SystemTable->StdErr ? SystemTable->StdErr : SystemTable->ConOut;
>> > +
>> > +    /*
>> > +     * Print error message and halt the system.
>> > +     *
>> > +     * We have to open code MS x64 calling convention
>> > +     * in assembly because here this convention may
>> > +     * not be directly supported by C compiler.
>> > +     */
>> > +    asm volatile(
>> > +    "    call *%2                     \n"
>> > +    "0:  hlt                          \n"
>> > +    "    jmp  0b                      \n"
>> > +       : "+c" (StdErr), "+d" (err) : "g" (StdErr->OutputString)
>>
>> The "g" really wants to be "rm": While the kind of expression doesn't
>> really allow for an immediate, you still shouldn't give wrong examples
>> of constraints (with the * now properly added to the call operand, it
>> doesn't allow for an immediate anymore).
> 
> I was considering that change once but I was not sure. Though if you
> think that it make sense I will do that.

Yes please.

>> > --- a/xen/include/asm-x86/config.h
>> > +++ b/xen/include/asm-x86/config.h
>> > @@ -73,6 +73,11 @@
>> >  #define STACK_ORDER 3
>> >  #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
>> >
>> > +#define MB_TRAMPOLINE_STACK_SIZE        PAGE_SIZE
>> > +#define MB_TRAMPOLINE_SIZE              (KB(64) - MB_TRAMPOLINE_STACK_SIZE)
>>
>> What's the reason for the MB_ prefixes here? The trampoline is
>> always the same size, isn't it? Nor am I convinced we really need
> 
> Please take a look at efi_arch_memory_setup(). Amount of memory allocated
> for trampoline and other stuff depends on boot loader type. So, when I use
> "MB_" I would like underline that this is relevant for multiboot protocols.
> Though I think that we can use the same size for all protocols. However,
> I would not like to touch native EFI loader case.

You already don't touch it, and I see no reason why this should
change. Yet this is orthogonal to the naming of the constants here.
As said, the trampoline itself is always the same, and the stack
portion of it is simply unused in the native EFI loader case. Plus
MB_TRAMPOLINE_SIZE is in no way the size of the trampoline in the
first place. Perhaps TRAMPOLINE_SPACE (and then covering both
parts)?

>> two defines - except in the link time assertion you always use
>> the sum of the two.
>>
>> > +#define MBI_SIZE                        (2 * PAGE_SIZE)
>>
>> On top of Doug's question - if it is needed at all, what does this
> 
> Please look around reloc() call in xen/arch/x86/boot/head.S. You quickly
> realize that there is following memory layout from top to bottom:
> 
>         +------------------+
>         | TRAMPOLINE_STACK |
>         +------------------+
>         |    TRAMPOLINE    |
>         +------------------+
>         |    mbi struct    |
>         +------------------+
> 
>> match up with, i.e. how come this is 2 pages (and not 1 or 3)?
> 
> Just thought that 8 KiB will be sufficient for Xen/modules arguments,
> memory map and other minor things.

Even with a couple of dozen modules passed? But the primary
question was left unanswered anyway: Does this need placing in
the low megabyte at all? And even if it does, especially if you
limit it to 8k, I don't see why it wouldn't fit inside the 64k
trampoline area.

Jan
Daniel Kiper Jan. 20, 2017, 1:46 p.m. UTC | #5
On Fri, Jan 20, 2017 at 05:40:55AM -0700, Jan Beulich wrote:
> >>> On 20.01.17 at 12:43, <daniel.kiper@oracle.com> wrote:
> > On Fri, Jan 20, 2017 at 02:46:47AM -0700, Jan Beulich wrote:
> >> >>> On 20.01.17 at 02:34, <daniel.kiper@oracle.com> wrote:

[...]

> >> > +.Lefi_multiboot2_proto:
> >> > +        /* Zero EFI SystemTable and EFI ImageHandle addresses. */
> >> > +        xor     %esi,%esi
> >> > +        xor     %edi,%edi
> >> > +
> >> > +        /* Skip Multiboot2 information fixed part. */
> >> > +        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%ecx
> >>
> >> In an earlier reply to Andrew's inquiry regarding the possible
> >> truncation here you said that grub can be made obey to such a
> >> load restriction. None of the tags added here or in patch 2
> >> appear to have such an effect, so would you please clarify how
> >> the address restriction is being enforced?
> >
> > GRUB2 itself does allocations for all multiboot2 stuff always below 4 GiB.
> > So, there is no need for extra tags.
> >
> > Additionally, multiboot2 spec says this:
> >
> > The bootloader must not load any part of the kernel, the modules, the Multiboot2
> > information structure, etc. higher than 4 GiB - 1. This requirement is put in
> > force because most of currently specified tags supports 32-bit addresses only.
> > Additionally, some kernels, even if they run on EFI 64-bit platform, still
> > execute some parts of its initialization code in 32-bit mode.
>
> Okay, that's good enough for now, even if it escapes me how that's
> supposed to work on systems without any memory below 4Gb.

If you wish I can add relevant comment somewhere. Anyway, if there is no
mem below 4 GiB at least, IIRC, GRUB2 will fail during image load. So,
no worry. At least right now.

[...]

> >> > +        /*
> >> > +         * efi_multiboot2() is called according to System V AMD64 ABI:
> >> > +         *   - IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
> >> > +         *   - OUT: %rax - highest allocated memory address below 1 MiB;
> >> > +         *                 memory below this address is used for trampoline
> >> > +         *                 stack, trampoline itself and as a storage for
> >> > +         *                 mbi struct created in reloc().
> >> > +         *
> >> > +         * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
> >> > +         * on EFI platforms. Hence, it could not be used like
> >> > +         * on legacy BIOS platforms.
> >> > +         */
> >> > +        call    efi_multiboot2
> >>
> >> Now that we had a need for commit f6b7fedc89 ("x86/EFI: meet
> >> further spec requirements for runtime calls") I'm worried about stack
> >> alignment here. Does GrUB call or jmp to our entry point (and is that
> >> firmly specified to be the case)? Perhaps it would be a good idea to
> >> align the stack earlier on in any case. Or if not (and if alignment at
> >> this call is ABI conforming), a comment should be left here to warn
> >> people of future modifications to the amount of items pushed onto /
> >> popped off the stack.
> >
> > Multiboot2 spec requires that stack, among others, is passed to loaded
> > image according to UEFI spec. Though, IIRC, there are no extra stack checks
> > there. Maybe we should add something to GRUB2 if it does not exists.
>
> That would imply they do a call (and not a jmp), which would make
> the present code correct afaict. As said, imo there should still be a
> warning added here, for anyone wanting to modify the stack layout.

Will do.

[...]

> >> > --- a/xen/include/asm-x86/config.h
> >> > +++ b/xen/include/asm-x86/config.h
> >> > @@ -73,6 +73,11 @@
> >> >  #define STACK_ORDER 3
> >> >  #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
> >> >
> >> > +#define MB_TRAMPOLINE_STACK_SIZE        PAGE_SIZE
> >> > +#define MB_TRAMPOLINE_SIZE              (KB(64) - MB_TRAMPOLINE_STACK_SIZE)
> >>
> >> What's the reason for the MB_ prefixes here? The trampoline is
> >> always the same size, isn't it? Nor am I convinced we really need
> >
> > Please take a look at efi_arch_memory_setup(). Amount of memory allocated
> > for trampoline and other stuff depends on boot loader type. So, when I use
> > "MB_" I would like underline that this is relevant for multiboot protocols.
> > Though I think that we can use the same size for all protocols. However,
> > I would not like to touch native EFI loader case.
>
> You already don't touch it, and I see no reason why this should
> change. Yet this is orthogonal to the naming of the constants here.

OK.

> As said, the trampoline itself is always the same, and the stack

Yep.

> portion of it is simply unused in the native EFI loader case. Plus

Hmmm... That is interesting why? I must take a look at trampoline code.

> MB_TRAMPOLINE_SIZE is in no way the size of the trampoline in the
> first place. Perhaps TRAMPOLINE_SPACE (and then covering both
> parts)?

So, I suppose that TRAMPOLINE_SPACE and TRAMPOLINE_STACK_SPACE is
OK for you? And I can agree that this is better naming.

> >> two defines - except in the link time assertion you always use
> >> the sum of the two.
> >>
> >> > +#define MBI_SIZE                        (2 * PAGE_SIZE)
> >>
> >> On top of Doug's question - if it is needed at all, what does this
> >
> > Please look around reloc() call in xen/arch/x86/boot/head.S. You quickly
> > realize that there is following memory layout from top to bottom:
> >
> >         +------------------+
> >         | TRAMPOLINE_STACK |
> >         +------------------+
> >         |    TRAMPOLINE    |
> >         +------------------+
> >         |    mbi struct    |
> >         +------------------+
> >
> >> match up with, i.e. how come this is 2 pages (and not 1 or 3)?
> >
> > Just thought that 8 KiB will be sufficient for Xen/modules arguments,
> > memory map and other minor things.
>
> Even with a couple of dozen modules passed? But the primary

Why not? The biggest thing is memory map. The rest are mostly
command line strings and a few pointers. Modules itself live
in different memory regions.

> question was left unanswered anyway: Does this need placing in
> the low megabyte at all? And even if it does, especially if you

I do not think so. I do not expect that anything in trampoline code
touches it. So, we can put it anywhere below 4 GiB. However, then
we need an allocator to properly allocate mbi struct region. And
at this boot stage this is not an easy thing.

> limit it to 8k, I don't see why it wouldn't fit inside the 64k
> trampoline area.

If you wish why not. Though I think that we should use set of constants
here to avoid currently existing plain numbers mess in assembly. And
I have a feeling that this cleanup/fix should land into separate patch.

Daniel
Jan Beulich Jan. 20, 2017, 2:10 p.m. UTC | #6
>>> On 20.01.17 at 14:46, <daniel.kiper@oracle.com> wrote:
> On Fri, Jan 20, 2017 at 05:40:55AM -0700, Jan Beulich wrote:
>> >>> On 20.01.17 at 12:43, <daniel.kiper@oracle.com> wrote:
>> > On Fri, Jan 20, 2017 at 02:46:47AM -0700, Jan Beulich wrote:
>> >> >>> On 20.01.17 at 02:34, <daniel.kiper@oracle.com> wrote:
>> >> > --- a/xen/include/asm-x86/config.h
>> >> > +++ b/xen/include/asm-x86/config.h
>> >> > @@ -73,6 +73,11 @@
>> >> >  #define STACK_ORDER 3
>> >> >  #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
>> >> >
>> >> > +#define MB_TRAMPOLINE_STACK_SIZE        PAGE_SIZE
>> >> > +#define MB_TRAMPOLINE_SIZE              (KB(64) - MB_TRAMPOLINE_STACK_SIZE)
>> >>
>> >> What's the reason for the MB_ prefixes here? The trampoline is
>> >> always the same size, isn't it? Nor am I convinced we really need
>> >
>> > Please take a look at efi_arch_memory_setup(). Amount of memory allocated
>> > for trampoline and other stuff depends on boot loader type. So, when I use
>> > "MB_" I would like underline that this is relevant for multiboot protocols.
>> > Though I think that we can use the same size for all protocols. However,
>> > I would not like to touch native EFI loader case.
>>
>> You already don't touch it, and I see no reason why this should
>> change. Yet this is orthogonal to the naming of the constants here.
> 
> OK.
> 
>> As said, the trampoline itself is always the same, and the stack
> 
> Yep.
> 
>> portion of it is simply unused in the native EFI loader case. Plus
> 
> Hmmm... That is interesting why? I must take a look at trampoline code.

It's a boot time only thing, and the boot path if different for
native EFI.

>> MB_TRAMPOLINE_SIZE is in no way the size of the trampoline in the
>> first place. Perhaps TRAMPOLINE_SPACE (and then covering both
>> parts)?
> 
> So, I suppose that TRAMPOLINE_SPACE and TRAMPOLINE_STACK_SPACE is
> OK for you? And I can agree that this is better naming.

Again, I don't see why you would need TRAMPOLINE_STACK_SPACE.

>> >> two defines - except in the link time assertion you always use
>> >> the sum of the two.
>> >>
>> >> > +#define MBI_SIZE                        (2 * PAGE_SIZE)
>> >>
>> >> On top of Doug's question - if it is needed at all, what does this
>> >
>> > Please look around reloc() call in xen/arch/x86/boot/head.S. You quickly
>> > realize that there is following memory layout from top to bottom:
>> >
>> >         +------------------+
>> >         | TRAMPOLINE_STACK |
>> >         +------------------+
>> >         |    TRAMPOLINE    |
>> >         +------------------+
>> >         |    mbi struct    |
>> >         +------------------+
>> >
>> >> match up with, i.e. how come this is 2 pages (and not 1 or 3)?
>> >
>> > Just thought that 8 KiB will be sufficient for Xen/modules arguments,
>> > memory map and other minor things.
>>
>> Even with a couple of dozen modules passed? But the primary
> 
> Why not? The biggest thing is memory map. The rest are mostly
> command line strings and a few pointers. Modules itself live
> in different memory regions.

Command line string may get lengthy.

>> question was left unanswered anyway: Does this need placing in
>> the low megabyte at all? And even if it does, especially if you
> 
> I do not think so. I do not expect that anything in trampoline code
> touches it. So, we can put it anywhere below 4 GiB. However, then
> we need an allocator to properly allocate mbi struct region. And
> at this boot stage this is not an easy thing.

Depending for how long you need that data to stay around, there
may be places to put it without much risk of overflowing anything.

>> limit it to 8k, I don't see why it wouldn't fit inside the 64k
>> trampoline area.
> 
> If you wish why not. Though I think that we should use set of constants
> here to avoid currently existing plain numbers mess in assembly. And
> I have a feeling that this cleanup/fix should land into separate patch.

Constants - sure. Separate patch - fine with me.

Jan
Daniel Kiper Jan. 20, 2017, 2:43 p.m. UTC | #7
On Fri, Jan 20, 2017 at 07:10:32AM -0700, Jan Beulich wrote:
> >>> On 20.01.17 at 14:46, <daniel.kiper@oracle.com> wrote:
> > On Fri, Jan 20, 2017 at 05:40:55AM -0700, Jan Beulich wrote:
> >> >>> On 20.01.17 at 12:43, <daniel.kiper@oracle.com> wrote:
> >> > On Fri, Jan 20, 2017 at 02:46:47AM -0700, Jan Beulich wrote:
> >> >> >>> On 20.01.17 at 02:34, <daniel.kiper@oracle.com> wrote:
> >> MB_TRAMPOLINE_SIZE is in no way the size of the trampoline in the
> >> first place. Perhaps TRAMPOLINE_SPACE (and then covering both
> >> parts)?
> >
> > So, I suppose that TRAMPOLINE_SPACE and TRAMPOLINE_STACK_SPACE is
> > OK for you? And I can agree that this is better naming.
>
> Again, I don't see why you would need TRAMPOLINE_STACK_SPACE.

If we use TRAMPOLINE_SPACE only for both trampoline and its stack
then ASSERT() is not very reliable. There is chance that trampoline
code will fill all space specified in TRAMPOLINE_SPACE and then there
will be no space for stack at all. However, I can agree that chance
is small if we assume 64 KiB space for trampoline and trampoline size
is about 10 KiB right now.

> >> >> two defines - except in the link time assertion you always use
> >> >> the sum of the two.
> >> >>
> >> >> > +#define MBI_SIZE                        (2 * PAGE_SIZE)
> >> >>
> >> >> On top of Doug's question - if it is needed at all, what does this
> >> >
> >> > Please look around reloc() call in xen/arch/x86/boot/head.S. You quickly
> >> > realize that there is following memory layout from top to bottom:
> >> >
> >> >         +------------------+
> >> >         | TRAMPOLINE_STACK |
> >> >         +------------------+
> >> >         |    TRAMPOLINE    |
> >> >         +------------------+
> >> >         |    mbi struct    |
> >> >         +------------------+
> >> >
> >> >> match up with, i.e. how come this is 2 pages (and not 1 or 3)?
> >> >
> >> > Just thought that 8 KiB will be sufficient for Xen/modules arguments,
> >> > memory map and other minor things.
> >>
> >> Even with a couple of dozen modules passed? But the primary
> >
> > Why not? The biggest thing is memory map. The rest are mostly
> > command line strings and a few pointers. Modules itself live
> > in different memory regions.
>
> Command line string may get lengthy.

Do you think that we should have more than 8 KiB there? Anyway,
I am more afraid about memory map size here.

> >> question was left unanswered anyway: Does this need placing in
> >> the low megabyte at all? And even if it does, especially if you
> >
> > I do not think so. I do not expect that anything in trampoline code
> > touches it. So, we can put it anywhere below 4 GiB. However, then
> > we need an allocator to properly allocate mbi struct region. And
> > at this boot stage this is not an easy thing.
>
> Depending for how long you need that data to stay around, there

IIRC, it is needed only during init phase. Probably later it can be dropped.

> may be places to put it without much risk of overflowing anything.

I am not sure about which places do you think.

Daniel
Jan Beulich Jan. 20, 2017, 3:23 p.m. UTC | #8
>>> On 20.01.17 at 15:43, <daniel.kiper@oracle.com> wrote:
> On Fri, Jan 20, 2017 at 07:10:32AM -0700, Jan Beulich wrote:
>> >>> On 20.01.17 at 14:46, <daniel.kiper@oracle.com> wrote:
>> > On Fri, Jan 20, 2017 at 05:40:55AM -0700, Jan Beulich wrote:
>> >> >>> On 20.01.17 at 12:43, <daniel.kiper@oracle.com> wrote:
>> >> > On Fri, Jan 20, 2017 at 02:46:47AM -0700, Jan Beulich wrote:
>> >> >> >>> On 20.01.17 at 02:34, <daniel.kiper@oracle.com> wrote:
>> >> MB_TRAMPOLINE_SIZE is in no way the size of the trampoline in the
>> >> first place. Perhaps TRAMPOLINE_SPACE (and then covering both
>> >> parts)?
>> >
>> > So, I suppose that TRAMPOLINE_SPACE and TRAMPOLINE_STACK_SPACE is
>> > OK for you? And I can agree that this is better naming.
>>
>> Again, I don't see why you would need TRAMPOLINE_STACK_SPACE.
> 
> If we use TRAMPOLINE_SPACE only for both trampoline and its stack
> then ASSERT() is not very reliable. There is chance that trampoline
> code will fill all space specified in TRAMPOLINE_SPACE and then there
> will be no space for stack at all. However, I can agree that chance
> is small if we assume 64 KiB space for trampoline and trampoline size
> is about 10 KiB right now.

The ASSERT() is the only place you need the stack size, so just
encode (subtract) it there (like iirc Doug had done).

>> >> >> two defines - except in the link time assertion you always use
>> >> >> the sum of the two.
>> >> >>
>> >> >> > +#define MBI_SIZE                        (2 * PAGE_SIZE)
>> >> >>
>> >> >> On top of Doug's question - if it is needed at all, what does this
>> >> >
>> >> > Please look around reloc() call in xen/arch/x86/boot/head.S. You quickly
>> >> > realize that there is following memory layout from top to bottom:
>> >> >
>> >> >         +------------------+
>> >> >         | TRAMPOLINE_STACK |
>> >> >         +------------------+
>> >> >         |    TRAMPOLINE    |
>> >> >         +------------------+
>> >> >         |    mbi struct    |
>> >> >         +------------------+
>> >> >
>> >> >> match up with, i.e. how come this is 2 pages (and not 1 or 3)?
>> >> >
>> >> > Just thought that 8 KiB will be sufficient for Xen/modules arguments,
>> >> > memory map and other minor things.
>> >>
>> >> Even with a couple of dozen modules passed? But the primary
>> >
>> > Why not? The biggest thing is memory map. The rest are mostly
>> > command line strings and a few pointers. Modules itself live
>> > in different memory regions.
>>
>> Command line string may get lengthy.
> 
> Do you think that we should have more than 8 KiB there? Anyway,
> I am more afraid about memory map size here.

The question is how flexible we want to be. If the size is fixed,
you need to make your code stop using space beyond that size.

>> >> question was left unanswered anyway: Does this need placing in
>> >> the low megabyte at all? And even if it does, especially if you
>> >
>> > I do not think so. I do not expect that anything in trampoline code
>> > touches it. So, we can put it anywhere below 4 GiB. However, then
>> > we need an allocator to properly allocate mbi struct region. And
>> > at this boot stage this is not an easy thing.
>>
>> Depending for how long you need that data to stay around, there
> 
> IIRC, it is needed only during init phase. Probably later it can be dropped.

That's way too imprecise.

>> may be places to put it without much risk of overflowing anything.
> 
> I am not sure about which places do you think.

There are a number of relatively large internal buffers which could
be used for this purpose if the uses of the MBI data all live before
the first use of whichever such buffer we would select as victim.
Otherwise, just go down from e.g. trampoline+60k until you reach
trampoline_end.

Jan
Doug Goldstein Jan. 20, 2017, 7:04 p.m. UTC | #9
On 1/19/17 8:34 PM, Daniel Kiper wrote:
> This way Xen can be loaded on EFI platforms using GRUB2 and
> other boot loaders which support multiboot2 protocol.
> 
> Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
> ---
> v12 - suggestions/fixes:
>     - rename __efi64_start to __efi64_mb2_start
>       (suggested by Andrew Cooper),

Andrew actually asked for __mb2_efi64_start

> +
> +__efi64_mb2_start:

here.
Andrew Cooper Jan. 20, 2017, 7:05 p.m. UTC | #10
On 20/01/17 19:04, Doug Goldstein wrote:
> On 1/19/17 8:34 PM, Daniel Kiper wrote:
>> This way Xen can be loaded on EFI platforms using GRUB2 and
>> other boot loaders which support multiboot2 protocol.
>>
>> Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
>> ---
>> v12 - suggestions/fixes:
>>     - rename __efi64_start to __efi64_mb2_start
>>       (suggested by Andrew Cooper),
> Andrew actually asked for __mb2_efi64_start

I can't say that I am overly fussed which way around this ends up.  I
certainly won't insist on changing.

~Andrew
Doug Goldstein Jan. 20, 2017, 7:34 p.m. UTC | #11
On 1/19/17 8:34 PM, Daniel Kiper wrote:

> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
> index 62c010e..c1285ad 100644
> --- a/xen/arch/x86/efi/efi-boot.h
> +++ b/xen/arch/x86/efi/efi-boot.h


> +
> +    efi_exit_boot(ImageHandle, SystemTable);
> +
> +    /* Return highest allocated memory address below 1 MiB. */
> +    return cfg.addr + cfg.size;

So my comment about overwriting memory on 02/10 was spot on but made the
incorrect conclusion that it was before hand and not after. And here's
the issue. I believe what you meant to do was:

return cfg.addr + MBI_SIZE;

I can't see how this booted for you with OVMF because for all the
different versions I've tried with the original code its writing over
reserved memory that QEMU uses for the graphics buffers. Which
immediately results in the trampolines being overwritten with console data.
Daniel Kiper Jan. 20, 2017, 9:42 p.m. UTC | #12
On Fri, Jan 20, 2017 at 02:34:35PM -0500, Doug Goldstein wrote:
> On 1/19/17 8:34 PM, Daniel Kiper wrote:
> > diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
> > index 62c010e..c1285ad 100644
> > --- a/xen/arch/x86/efi/efi-boot.h
> > +++ b/xen/arch/x86/efi/efi-boot.h
>
> > +
> > +    efi_exit_boot(ImageHandle, SystemTable);
> > +
> > +    /* Return highest allocated memory address below 1 MiB. */
> > +    return cfg.addr + cfg.size;
>
> So my comment about overwriting memory on 02/10 was spot on but made the
> incorrect conclusion that it was before hand and not after. And here's
> the issue. I believe what you meant to do was:
>
> return cfg.addr + MBI_SIZE;

Errr... Sorry, this is the issue which I knew but I forgot to fix in v12.

> I can't see how this booted for you with OVMF because for all the
> different versions I've tried with the original code its writing over
> reserved memory that QEMU uses for the graphics buffers. Which
> immediately results in the trampolines being overwritten with console data.

It booted without any complain...

Daniel

Patch
diff mbox

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index 84cf44d..b8f727a 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -89,6 +89,13 @@  multiboot2_header_start:
                    0, /* Number of the lines - no preference. */ \
                    0  /* Number of bits per pixel - no preference. */
 
+        /* Request that ExitBootServices() not be called. */
+        mb2ht_init MB2_HT(EFI_BS), MB2_HT(OPTIONAL)
+
+        /* EFI64 Multiboot2 entry point. */
+        mb2ht_init MB2_HT(ENTRY_ADDRESS_EFI64), MB2_HT(OPTIONAL), \
+                   sym_phys(__efi64_mb2_start)
+
         /* Multiboot2 header end tag. */
         mb2ht_init MB2_HT(END), MB2_HT(REQUIRED)
 .Lmultiboot2_header_end:
@@ -100,20 +107,48 @@  multiboot2_header_start:
 gdt_boot_descr:
         .word   6*8-1
         .long   sym_phys(trampoline_gdt)
+        .long   0 /* Needed for 64-bit lgdt */
+
+        .align 4
+vga_text_buffer:
+        .long   0xb8000
+
+efi_platform:
+        .byte   0
 
 .Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
 .Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
+.Lbad_ldr_nbs: .asciz "ERR: Bootloader shutdown EFI x64 boot services!"
+.Lbad_ldr_nst: .asciz "ERR: EFI SystemTable is not provided by bootloader!"
+.Lbad_ldr_nih: .asciz "ERR: EFI ImageHandle is not provided by bootloader!"
+.Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
 
         .section .init.text, "ax", @progbits
 
 bad_cpu:
         mov     $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
-        jmp     print_err
+        jmp     .Lget_vtb
 not_multiboot:
         mov     $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
-print_err:
-        mov     $0xB8000,%edi  # VGA framebuffer
-1:      mov     (%esi),%bl
+        jmp     .Lget_vtb
+.Lmb2_no_st:
+        mov     $(sym_phys(.Lbad_ldr_nst)),%esi # Error message
+        jmp     .Lget_vtb
+.Lmb2_no_ih:
+        mov     $(sym_phys(.Lbad_ldr_nih)),%esi # Error message
+        jmp     .Lget_vtb
+.Lmb2_no_bs:
+        mov     $(sym_phys(.Lbad_ldr_nbs)),%esi # Error message
+        xor     %edi,%edi                       # No VGA text buffer
+        jmp     .Lsend_chr
+.Lmb2_efi_ia_32:
+        mov     $(sym_phys(.Lbad_efi_msg)),%esi # Error message
+        xor     %edi,%edi                       # No VGA text buffer
+        jmp     .Lsend_chr
+.Lget_vtb:
+        mov     sym_phys(vga_text_buffer),%edi
+.Lsend_chr:
+        mov     (%esi),%bl
         test    %bl,%bl        # Terminate on '\0' sentinel
         je      .Lhalt
         mov     $0x3f8+5,%dx   # UART Line Status Register
@@ -123,13 +158,186 @@  print_err:
         mov     $0x3f8+0,%dx   # UART Transmit Holding Register
         mov     %bl,%al
         out     %al,%dx        # Send a character over the serial line
-        movsb                  # Write a character to the VGA framebuffer
+        test    %edi,%edi      # Is the VGA text buffer available?
+        jz      .Lsend_chr
+        movsb                  # Write a character to the VGA text buffer
         mov     $7,%al
-        stosb                  # Write an attribute to the VGA framebuffer
-        jmp     1b
+        stosb                  # Write an attribute to the VGA text buffer
+        jmp     .Lsend_chr
 .Lhalt: hlt
         jmp     .Lhalt
 
+        .code64
+
+__efi64_mb2_start:
+        cld
+
+        /* VGA is not available on EFI platforms. */
+        movl   $0,vga_text_buffer(%rip)
+
+        /* Check for Multiboot2 bootloader. */
+        cmp     $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
+        je      .Lefi_multiboot2_proto
+
+        /* Jump to not_multiboot after switching CPU to x86_32 mode. */
+        lea     not_multiboot(%rip),%edi
+        jmp     x86_32_switch
+
+.Lefi_multiboot2_proto:
+        /* Zero EFI SystemTable and EFI ImageHandle addresses. */
+        xor     %esi,%esi
+        xor     %edi,%edi
+
+        /* Skip Multiboot2 information fixed part. */
+        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%ecx
+        and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
+
+.Lefi_mb2_tsize:
+        /* Check Multiboot2 information total size. */
+        mov     %ecx,%r8d
+        sub     %ebx,%r8d
+        cmp     %r8d,MB2_fixed_total_size(%rbx)
+        jbe     run_bs
+
+        /* Are EFI boot services available? */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI_BS,MB2_tag_type(%rcx)
+        jne     .Lefi_mb2_st
+
+        /* We are on EFI platform and EFI boot services are available. */
+        incb    efi_platform(%rip)
+
+        /*
+         * Disable real mode and other legacy stuff which should not
+         * be run on EFI platforms.
+         */
+        incb    skip_realmode(%rip)
+        jmp     .Lefi_mb2_next_tag
+
+.Lefi_mb2_st:
+        /* Get EFI SystemTable address from Multiboot2 information. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%rcx)
+        cmove   MB2_efi64_st(%rcx),%rsi
+        je      .Lefi_mb2_next_tag
+
+        /* Get EFI ImageHandle address from Multiboot2 information. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64_IH,MB2_tag_type(%rcx)
+        cmove   MB2_efi64_ih(%rcx),%rdi
+        je      .Lefi_mb2_next_tag
+
+        /* Is it the end of Multiboot2 information? */
+        cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%rcx)
+        je      run_bs
+
+.Lefi_mb2_next_tag:
+        /* Go to next Multiboot2 information tag. */
+        add     MB2_tag_size(%rcx),%ecx
+        add     $(MULTIBOOT2_TAG_ALIGN-1),%ecx
+        and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
+        jmp     .Lefi_mb2_tsize
+
+run_bs:
+        /* Are EFI boot services available? */
+        cmpb    $0,efi_platform(%rip)
+        jnz     0f
+
+        /* Jump to .Lmb2_no_bs after switching CPU to x86_32 mode. */
+        lea     .Lmb2_no_bs(%rip),%edi
+        jmp     x86_32_switch
+
+0:
+        /* Is EFI SystemTable address provided by boot loader? */
+        test    %rsi,%rsi
+        jnz     1f
+
+        /* Jump to .Lmb2_no_st after switching CPU to x86_32 mode. */
+        lea     .Lmb2_no_st(%rip),%edi
+        jmp     x86_32_switch
+
+1:
+        /* Is EFI ImageHandle address provided by boot loader? */
+        test    %rdi,%rdi
+        jnz     2f
+
+        /* Jump to .Lmb2_no_ih after switching CPU to x86_32 mode. */
+        lea     .Lmb2_no_ih(%rip),%edi
+        jmp     x86_32_switch
+
+2:
+        push    %rax
+        push    %rdi
+
+        /*
+         * Initialize BSS (no nasty surprises!).
+         * It must be done earlier than in BIOS case
+         * because efi_multiboot2() touches it.
+         */
+        lea     __bss_start(%rip),%edi
+        lea     __bss_end(%rip),%ecx
+        sub     %edi,%ecx
+        shr     $3,%ecx
+        xor     %eax,%eax
+        rep stosq
+
+        pop     %rdi
+
+        /*
+         * efi_multiboot2() is called according to System V AMD64 ABI:
+         *   - IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
+         *   - OUT: %rax - highest allocated memory address below 1 MiB;
+         *                 memory below this address is used for trampoline
+         *                 stack, trampoline itself and as a storage for
+         *                 mbi struct created in reloc().
+         *
+         * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
+         * on EFI platforms. Hence, it could not be used like
+         * on legacy BIOS platforms.
+         */
+        call    efi_multiboot2
+
+        /* Convert memory address to bytes/16 and store it in safe place. */
+        shr     $4,%eax
+        mov     %eax,%ecx
+
+        pop     %rax
+
+        /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
+        lea     trampoline_setup(%rip),%edi
+
+x86_32_switch:
+        cli
+
+        /* Initialize GDTR. */
+        lgdt    gdt_boot_descr(%rip)
+
+        /* Reload code selector. */
+        pushq   $BOOT_CS32
+        lea     cs32_switch(%rip),%edx
+        push    %rdx
+        lretq
+
+        .code32
+
+cs32_switch:
+        /* Initialize basic data segments. */
+        mov     $BOOT_DS,%edx
+        mov     %edx,%ds
+        mov     %edx,%es
+        mov     %edx,%ss
+        /* %esp is initialized later. */
+
+        /* Load null descriptor to unused segment registers. */
+        xor     %edx,%edx
+        mov     %edx,%fs
+        mov     %edx,%gs
+
+        /* Disable paging. */
+        mov     %cr0,%edx
+        and     $(~X86_CR0_PG),%edx
+        mov     %edx,%cr0
+
+        /* Jump to earlier loaded address. */
+        jmp     *%edi
+
 __start:
         cld
         cli
@@ -157,7 +365,7 @@  __start:
 
         /* Not available? BDA value will be fine. */
         cmovnz  MB_mem_lower(%ebx),%edx
-        jmp     trampoline_setup
+        jmp     trampoline_bios_setup
 
 .Lmultiboot2_proto:
         /* Skip Multiboot2 information fixed part. */
@@ -169,24 +377,33 @@  __start:
         mov     %ecx,%edi
         sub     %ebx,%edi
         cmp     %edi,MB2_fixed_total_size(%ebx)
-        jbe     trampoline_setup
+        jbe     trampoline_bios_setup
 
         /* Get mem_lower from Multiboot2 information. */
         cmpl    $MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO,MB2_tag_type(%ecx)
         cmove   MB2_mem_lower(%ecx),%edx
-        je      trampoline_setup
+        je      .Lmb2_next_tag
+
+        /* EFI IA-32 platforms are not supported. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
+        je      .Lmb2_efi_ia_32
+
+        /* Bootloader shutdown EFI x64 boot services. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
+        je      .Lmb2_no_bs
 
         /* Is it the end of Multiboot2 information? */
         cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%ecx)
-        je      trampoline_setup
+        je      trampoline_bios_setup
 
+.Lmb2_next_tag:
         /* Go to next Multiboot2 information tag. */
         add     MB2_tag_size(%ecx),%ecx
         add     $(MULTIBOOT2_TAG_ALIGN-1),%ecx
         and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
         jmp     .Lmb2_tsize
 
-trampoline_setup:
+trampoline_bios_setup:
         /* Set up trampoline segment 64k below EBDA */
         movzwl  0x40e,%ecx          /* EBDA segment */
         cmp     $0xa000,%ecx        /* sanity check (high) */
@@ -202,16 +419,19 @@  trampoline_setup:
          * multiboot structure (if available) and use the smallest.
          */
         cmp     $0x100,%edx         /* is the multiboot value too small? */
-        jb      2f                  /* if so, do not use it */
+        jb      trampoline_setup    /* if so, do not use it */
         shl     $10-4,%edx
         cmp     %ecx,%edx           /* compare with BDA value */
         cmovb   %edx,%ecx           /* and use the smaller */
 
-2:      /* Reserve 64kb for the trampoline */
-        sub     $0x1000,%ecx
+        /* Reserve memory for the trampoline. */
+        sub     $((MB_TRAMPOLINE_SIZE+MB_TRAMPOLINE_STACK_SIZE)>>4),%ecx
 
         /* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
         xor     %cl, %cl
+
+trampoline_setup:
+        /* Save trampoline address for later use. */
         shl     $4, %ecx
         mov     %ecx,sym_phys(trampoline_phys)
 
@@ -223,7 +443,14 @@  trampoline_setup:
         call    reloc
         mov     %eax,sym_phys(multiboot_ptr)
 
-        /* Initialize BSS (no nasty surprises!) */
+        /*
+         * Do not zero BSS on EFI platform here.
+         * It was initialized earlier.
+         */
+        cmpb    $0,sym_phys(efi_platform)
+        jnz     1f
+
+        /* Initialize BSS (no nasty surprises!). */
         mov     $sym_phys(__bss_start),%edi
         mov     $sym_phys(__bss_end),%ecx
         sub     %edi,%ecx
@@ -231,6 +458,7 @@  trampoline_setup:
         shr     $2,%ecx
         rep stosl
 
+1:
         /* Interrogate CPU extended features via CPUID. */
         mov     $0x80000000,%eax
         cpuid
@@ -282,6 +510,10 @@  trampoline_setup:
         cmp     $sym_phys(__trampoline_seg_stop),%edi
         jb      1b
 
+        /* Do not parse command line on EFI platform here. */
+        cmpb    $0,sym_phys(efi_platform)
+        jnz     1f
+
         /* Bail if there is no command line to parse. */
         mov     sym_phys(multiboot_ptr),%ebx
         testl   $MBI_CMDLINE,MB_flags(%ebx)
@@ -292,9 +524,9 @@  trampoline_setup:
         call    cmdline_parse_early
 
 1:
-        /* Switch to low-memory stack.  */
+        /* Switch to low-memory stack which lives at the end of trampoline region. */
         mov     sym_phys(trampoline_phys),%edi
-        lea     0x10000(%edi),%esp
+        lea     MB_TRAMPOLINE_SIZE+MB_TRAMPOLINE_STACK_SIZE(%edi),%esp
         lea     trampoline_boot_cpu_entry-trampoline_start(%edi),%eax
         pushl   $BOOT_CS32
         push    %eax
diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
index 62c010e..c1285ad 100644
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -100,6 +100,9 @@  static void __init relocate_trampoline(unsigned long phys)
 {
     const s32 *trampoline_ptr;
 
+    if ( !efi_enabled(EFI_LOADER) || trampoline_phys )
+        return;
+
     trampoline_phys = phys;
     /* Apply relocations to trampoline. */
     for ( trampoline_ptr = __trampoline_rel_start;
@@ -210,12 +213,10 @@  static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
 
 static void __init efi_arch_pre_exit_boot(void)
 {
-    if ( !trampoline_phys )
-    {
-        if ( !cfg.addr )
-            blexit(L"No memory for trampoline");
-        relocate_trampoline(cfg.addr);
-    }
+    if ( !cfg.addr )
+        blexit(L"No memory for trampoline");
+
+    relocate_trampoline(cfg.addr);
 }
 
 static void __init noreturn efi_arch_post_exit_boot(void)
@@ -550,7 +551,12 @@  static void __init efi_arch_memory_setup(void)
 
     /* Allocate space for trampoline (in first Mb). */
     cfg.addr = 0x100000;
-    cfg.size = trampoline_end - trampoline_start;
+
+    if ( efi_enabled(EFI_LOADER) )
+        cfg.size = trampoline_end - trampoline_start;
+    else
+        cfg.size = MB_TRAMPOLINE_SIZE + MB_TRAMPOLINE_STACK_SIZE + MBI_SIZE;
+
     status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData,
                                    PFN_UP(cfg.size), &cfg.addr);
     if ( status == EFI_SUCCESS )
@@ -561,6 +567,9 @@  static void __init efi_arch_memory_setup(void)
         PrintStr(L"Trampoline space cannot be allocated; will try fallback.\r\n");
     }
 
+    if ( !efi_enabled(EFI_LOADER) )
+        return;
+
     /* Initialise L2 identity-map and boot-map page table entries (16MB). */
     for ( i = 0; i < 8; ++i )
     {
@@ -653,6 +662,44 @@  static bool_t __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
 
 static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
 
+paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
+{
+    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
+    UINTN cols, gop_mode = ~0, rows;
+
+    __set_bit(EFI_BOOT, &efi_flags);
+    __set_bit(EFI_RS, &efi_flags);
+
+    efi_init(ImageHandle, SystemTable);
+
+    efi_console_set_mode();
+
+    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
+                           &cols, &rows) == EFI_SUCCESS )
+        efi_arch_console_init(cols, rows);
+
+    gop = efi_get_gop();
+
+    if ( gop )
+        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
+
+    efi_arch_edd();
+    efi_arch_cpu();
+
+    efi_tables();
+    setup_efi_pci();
+    efi_variables();
+    efi_arch_memory_setup();
+
+    if ( gop )
+        efi_set_gop_mode(gop, gop_mode);
+
+    efi_exit_boot(ImageHandle, SystemTable);
+
+    /* Return highest allocated memory address below 1 MiB. */
+    return cfg.addr + cfg.size;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/efi/stub.c b/xen/arch/x86/efi/stub.c
index 4158124..b81adc0 100644
--- a/xen/arch/x86/efi/stub.c
+++ b/xen/arch/x86/efi/stub.c
@@ -3,6 +3,44 @@ 
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <asm/page.h>
+#include <asm/efibind.h>
+#include <efi/efidef.h>
+#include <efi/eficapsule.h>
+#include <efi/eficon.h>
+#include <efi/efidevp.h>
+#include <efi/efiapi.h>
+
+/*
+ * Here we are in EFI stub. EFI calls are not supported due to lack
+ * of relevant functionality in compiler and/or linker.
+ *
+ * efi_multiboot2() is an exception. Please look below for more details.
+ */
+
+paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
+                                       EFI_SYSTEM_TABLE *SystemTable)
+{
+    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
+    SIMPLE_TEXT_OUTPUT_INTERFACE *StdErr;
+
+    StdErr = SystemTable->StdErr ? SystemTable->StdErr : SystemTable->ConOut;
+
+    /*
+     * Print error message and halt the system.
+     *
+     * We have to open code MS x64 calling convention
+     * in assembly because here this convention may
+     * not be directly supported by C compiler.
+     */
+    asm volatile(
+    "    call *%2                     \n"
+    "0:  hlt                          \n"
+    "    jmp  0b                      \n"
+       : "+c" (StdErr), "+d" (err) : "g" (StdErr->OutputString)
+       : "rax", "r8", "r9", "r10", "r11", "memory");
+
+    unreachable();
+}
 
 bool efi_enabled(unsigned int feature)
 {
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 92f5d81..f135654 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -175,6 +175,8 @@  void __dummy__(void)
     OFFSET(MB2_tag_type, multiboot2_tag_t, type);
     OFFSET(MB2_tag_size, multiboot2_tag_t, size);
     OFFSET(MB2_mem_lower, multiboot2_tag_basic_meminfo_t, mem_lower);
+    OFFSET(MB2_efi64_st, multiboot2_tag_efi64_t, pointer);
+    OFFSET(MB2_efi64_ih, multiboot2_tag_efi64_ih_t, pointer);
     BLANK();
 
     OFFSET(DOMAIN_vm_assist, struct domain, vm_assist);
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index b0b1c9b..3a02b2b 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -331,5 +331,8 @@  ASSERT(IS_ALIGNED(__init_end,   PAGE_SIZE), "__init_end misaligned")
 
 ASSERT(IS_ALIGNED(trampoline_start, 4), "trampoline_start misaligned")
 ASSERT(IS_ALIGNED(trampoline_end,   4), "trampoline_end misaligned")
-ASSERT(IS_ALIGNED(__bss_start,      4), "__bss_start misaligned")
-ASSERT(IS_ALIGNED(__bss_end,        4), "__bss_end misaligned")
+ASSERT(IS_ALIGNED(__bss_start,      8), "__bss_start misaligned")
+ASSERT(IS_ALIGNED(__bss_end,        8), "__bss_end misaligned")
+
+ASSERT((trampoline_end - trampoline_start) < MB_TRAMPOLINE_SIZE,
+    "not enough room for trampoline")
diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
index 36dbb71..b6cbdad 100644
--- a/xen/common/efi/boot.c
+++ b/xen/common/efi/boot.c
@@ -79,6 +79,17 @@  static size_t wstrlen(const CHAR16 * s);
 static int set_color(u32 mask, int bpp, u8 *pos, u8 *sz);
 static bool_t match_guid(const EFI_GUID *guid1, const EFI_GUID *guid2);
 
+static void efi_init(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
+static void efi_console_set_mode(void);
+static EFI_GRAPHICS_OUTPUT_PROTOCOL *efi_get_gop(void);
+static UINTN efi_find_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop,
+                               UINTN cols, UINTN rows, UINTN depth);
+static void efi_tables(void);
+static void setup_efi_pci(void);
+static void efi_variables(void);
+static void efi_set_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, UINTN gop_mode);
+static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
+
 static const EFI_BOOT_SERVICES *__initdata efi_bs;
 static UINT32 __initdata efi_bs_revision;
 static EFI_HANDLE __initdata efi_ih;
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 6fd84e7..a86ea12 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -73,6 +73,11 @@ 
 #define STACK_ORDER 3
 #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
 
+#define MB_TRAMPOLINE_STACK_SIZE        PAGE_SIZE
+#define MB_TRAMPOLINE_SIZE              (KB(64) - MB_TRAMPOLINE_STACK_SIZE)
+
+#define MBI_SIZE                        (2 * PAGE_SIZE)
+
 /* Primary stack is restricted to 8kB by guard pages. */
 #define PRIMARY_STACK_SIZE 8192
 
diff --git a/xen/include/xen/config.h b/xen/include/xen/config.h
index 473c5e8..04e4da5 100644
--- a/xen/include/xen/config.h
+++ b/xen/include/xen/config.h
@@ -70,6 +70,7 @@ 
 #define __force
 #define __bitwise
 
+#define KB(_kb)     (_AC(_kb, ULL) << 10)
 #define MB(_mb)     (_AC(_mb, ULL) << 20)
 #define GB(_gb)     (_AC(_gb, ULL) << 30)