diff mbox

[v3,13/16,-,RFC] x86: add multiboot2 protocol support for EFI platforms

Message ID 1460723596-13261-14-git-send-email-daniel.kiper@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Daniel Kiper April 15, 2016, 12:33 p.m. UTC
This way Xen can be loaded on EFI platforms using GRUB2 and
other boot loaders which support multiboot2 protocol.

Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
---
v3 - suggestions/fixes:
   - take into account alignment when skipping multiboot2 fixed part
     (suggested by Konrad Rzeszutek Wilk),
   - improve segment registers initialization
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich and Konrad Rzeszutek Wilk),
   - improve commit message
     (suggested by Jan Beulich).

v3 - not fixed yet:
   - xen/arch/x86/efi/stub.c:efi_multiboot2()
     should print error message and halt system.

v2 - suggestions/fixes:
   - generate multiboot2 header using macros
     (suggested by Jan Beulich),
   - switch CPU to x86_32 mode before
     jumping to 32-bit code
     (suggested by Andrew Cooper),
   - reduce code changes to increase patch readability
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich),
   - ignore MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag on EFI platform
     and find on my own multiboot2.mem_lower value,
   - stop execution if EFI platform is detected
     in legacy BIOS path.
---
 xen/arch/x86/boot/head.S          |  177 +++++++++++++++++++++++++++++++++++--
 xen/arch/x86/efi/efi-boot.h       |   43 +++++++++
 xen/arch/x86/efi/stub.c           |    5 ++
 xen/arch/x86/setup.c              |   10 ++-
 xen/arch/x86/x86_64/asm-offsets.c |    2 +
 xen/arch/x86/xen.lds.S            |    4 +-
 xen/common/efi/boot.c             |   12 +++
 xen/include/xen/efi.h             |    1 +
 8 files changed, 240 insertions(+), 14 deletions(-)

Comments

Jan Beulich May 25, 2016, 9:32 a.m. UTC | #1
>>> On 15.04.16 at 14:33, <daniel.kiper@oracle.com> wrote:
> @@ -100,19 +107,29 @@ multiboot2_header_end:
>  gdt_boot_descr:
>          .word   6*8-1
>          .long   sym_phys(trampoline_gdt)
> +        .long   0 /* Needed for 64-bit lgdt */
> +
> +cs32_switch_addr:
> +        .long   sym_phys(cs32_switch)
> +        .word   BOOT_CS32
>  
>  .Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
>  .Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
> +.Lbad_ldr_mb2: .asciz "ERR: On EFI platform use latest Multiboot2 compatible bootloader!"

What is "latest" going to mean 5 years from now?

>          .section .init.text, "ax", @progbits
>  
>  bad_cpu:
>          mov     $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
> -        jmp     print_err
> +        mov     $0xB8000,%edi                   # VGA framebuffer
> +        jmp     1f
>  not_multiboot:
>          mov     $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
> -print_err:
> -        mov     $0xB8000,%edi  # VGA framebuffer
> +        mov     $0xB8000,%edi                   # VGA framebuffer
> +        jmp     1f
> +mb2_too_old:
> +        mov     $(sym_phys(.Lbad_ldr_mb2)),%esi # Error message
> +        xor     %edi,%edi                       # No VGA framebuffer

Leaving aside that "framebuffer" really is a bad term here (we're
talking of text mode output after all), limiting the output to serial
isn't going to be very helpful in the field, I'm afraid. Even more so
that there's no guarantee for a UART to be at port 0x3f8. That's
not much of a problem for the other two messages as people are
unlikely to try to boot Xen on an unsuitable system, but I view it
as quite possible for Xen to be tried to get booted with an
unsuitable grub2.

IOW - this needs a better solution, presumably using EFI boot
service output functions.

> @@ -130,6 +149,130 @@ print_err:
>  .Lhalt: hlt
>          jmp     .Lhalt
>  
> +        .code64
> +
> +__efi64_start:

As long as we have split files under boot/, I think I'd prefer 64-bit
code to only go into x86_64.S.

> +        cld
> +
> +        /* Check for Multiboot2 bootloader. */
> +        cmp     $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
> +        je      efi_multiboot2_proto
> +
> +        /* Jump to not_multiboot after switching CPU to x86_32 mode. */
> +        lea     not_multiboot(%rip),%rdi
> +        jmp     x86_32_switch

What I've said above would also eliminate the need to switch to
32-bit mode just for emitting an error message and halting the
system.

> +efi_multiboot2_proto:

.Lefi_multiboot2_proto

> +        /*
> +         * Multiboot2 information address is 32-bit,
> +         * so, zero higher half of %rbx.
> +         */
> +        mov     %ebx,%ebx

Wait, no - that's a protocol bug then. We're being entered in 64-bit
mode here, so registers should be in 64-bit clean state.

> +        /* Skip Multiboot2 information fixed part. */
> +        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%rcx
> +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%rcx

Or if there really was a reason to do the above, and if there is a
reason not to assume this data is located below 4Gb, then
calculations like this could avoid the REX64 prefix by using %ecx.

> +0:
> +        /* Get EFI SystemTable address from Multiboot2 information. */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%rcx)
> +        jne     1f
> +
> +        mov     MB2_efi64_st(%rcx),%rsi
> +
> +        /* Do not clear BSS twice and do not go into real mode. */
> +        movb    $1,skip_realmode(%rip)

How is the setting of skip_realmode related to the clearing of BSS?
Oh, I've found the connection below (albeit see there for its
validity), but I think mentioning this here is more confusing than
clarifying.

> +        jmp     3f
> +
> +1:
> +        /* Get EFI ImageHandle address from Multiboot2 information. */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64_IH,MB2_tag_type(%rcx)
> +        jne     2f
> +
> +        mov     MB2_efi64_ih(%rcx),%rdi
> +        jmp     3f
> +
> +2:
> +        /* Is it the end of Multiboot2 information? */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%rcx)
> +        je      run_bs
> +
> +3:
> +        /* Go to next Multiboot2 information tag. */
> +        add     MB2_tag_size(%rcx),%ecx
> +        add     $(MULTIBOOT2_TAG_ALIGN-1),%rcx
> +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%rcx
> +        jmp     0b

At least down to here it looks like this would better live in a C helper
function. Did you consider this?

> +run_bs:
> +        push    %rax
> +        push    %rdi
> +
> +        /*
> +         * Initialize BSS (no nasty surprises!).
> +         * It must be done earlier than in BIOS case
> +         * because efi_multiboot2() touches it.
> +         */
> +        lea     __bss_start(%rip),%rdi
> +        lea     __bss_end(%rip),%rcx
> +        sub     %rdi,%rcx
> +        shr     $3,%rcx
> +        xor     %eax,%eax
> +        rep     stosq

Please let's not repeat pre-existing mistakes: REP is not an
instruction, and STOSB is not an operand. IOW there should be
just a single space between the two.

> +        pop     %rdi
> +
> +        /*
> +         * IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable.
> +         * OUT: %rax - Highest available memory address below 1 MiB.
> +         *
> +         * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
> +         * on EFI platforms. Hence, it could not be used like
> +         * on legacy BIOS platforms.
> +         */
> +        call    efi_multiboot2
> +
> +        /* Convert memory address to bytes/16 and store it in safe place. */
> +        shr     $4,%rax
> +        mov     %rax,%rcx

Again, considering that the starting value in %rax here is an
address below 1Mb, no need to do the calculations on 64-bit
registers.

> +        pop     %rax
> +
> +        /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
> +        lea     trampoline_setup(%rip),%rdi

Same here - the branch below uses %edi only anyway.

> @@ -170,12 +313,19 @@ multiboot2_proto:
>          jne     1f
>  
>          mov     MB2_mem_lower(%ecx),%edx
> -        jmp     trampoline_setup
> +        jmp     trampoline_bios_setup
>  
>  1:
> +        /* EFI mode is not supported via legacy BIOS path. */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
> +        je      mb2_too_old
> +
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
> +        je      mb2_too_old

According to the comment we're on the legacy BIOS boot path
here, yet at mb2_too_old you assume no text mode VGA. I.e. I'm
now even confused about the output handling there.

> @@ -221,6 +372,13 @@ trampoline_setup:
>          add     $12,%esp            /* Remove reloc() args from stack. */
>          mov     %eax,sym_phys(multiboot_ptr)
>  
> +        /*
> +         * Do not zero BSS on EFI platform here.
> +         * It was initialized earlier.
> +         */
> +        cmpb    $1,sym_phys(skip_realmode)
> +        je      1f

So what if skip_realmode is set on a legacy BIOS system because
of the command line option or the use of TBOOT?

> --- a/xen/arch/x86/efi/efi-boot.h
> +++ b/xen/arch/x86/efi/efi-boot.h
> @@ -228,6 +228,9 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
>  
>  static void __init efi_arch_pre_exit_boot(void)
>  {
> +    if ( !efi_enabled(EFI_LOADER) )
> +        return;
> +
>      if ( !trampoline_phys )

Please connect the two if()-s - afaiu you really only care about the
trampoline handling to not be done. Otherwise the new conditional
would probably rather belong on the (arch-independent) call site.

> +paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
> +{
> +    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
> +    UINTN cols, gop_mode = ~0, rows;
> +
> +    set_bit(EFI_PLATFORM, &efi.flags);

__set_bit()

> +    efi_init(ImageHandle, SystemTable);
> +
> +    efi_console_set_mode();
> +
> +    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
> +                           &cols, &rows) == EFI_SUCCESS )
> +        efi_arch_console_init(cols, rows);
> +
> +    gop = efi_get_gop();
> +
> +    if ( gop )
> +        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
> +
> +    efi_arch_edd();
> +
> +    /*
> +     * efi_arch_cpu() is not needed here. boot_cpu_data
> +     * is set later in xen/arch/x86/boot/head.S.
> +     */

That's not a good explanation. Is there any harm in calling it? If not,
I'd suggest calling it here just to avoid missing a dependency on what
it does in any of the functions called subsequently. If there is, the
precise details of that is what you should say here.

> +    efi_tables();
> +    setup_efi_pci();
> +    efi_variables();
> +
> +    if ( gop )
> +        efi_set_gop_mode(gop, gop_mode);
> +
> +    efi_exit_boot(ImageHandle, SystemTable);
> +
> +    /* Return highest available memory address below 1 MiB. */
> +    return cfg.addr;

Didn't you say on some systems all of the memory below 640k is in
use by boot/loader code/data? If so, what meaning has the value
you return here (I don't recall the consumer side special casing any
error value)?

> --- a/xen/arch/x86/efi/stub.c
> +++ b/xen/arch/x86/efi/stub.c
> @@ -13,6 +13,11 @@ struct efi __read_mostly efi = {
>  	.smbios3 = EFI_INVALID_TABLE_ADDR
>  };
>  
> +void __init efi_multiboot2(void)
> +{
> +    /* TODO: Fail if entered! */
> +}

Why not just BUG()? What exactly you do here doesn't seem to
matter, as the symbol is unreachable in this case anyway (you
only need it to please the linker).

> @@ -728,7 +728,11 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>          l3_bootmap[l3_table_offset(BOOTSTRAP_MAP_BASE)] =
>              l3e_from_paddr(__pa(l2_bootmap), __PAGE_HYPERVISOR);
>  
> -        memmap_type = loader;
> +        memmap_type = "EFI";
> +    }
> +    else if ( efi_enabled(EFI_PLATFORM) )
> +    {
> +        memmap_type = "EFI";
>      }

Stray braces.

> --- a/xen/arch/x86/xen.lds.S
> +++ b/xen/arch/x86/xen.lds.S
> @@ -192,7 +192,7 @@ SECTIONS
>    } :text
>  
>    /* Align BSS to speedup its initialization. */
> -  . = ALIGN(4);
> +  . = ALIGN(8);
>    .bss : {                     /* BSS */
>         . = ALIGN(STACK_SIZE);
>         __bss_start = .;
> @@ -207,7 +207,7 @@ SECTIONS
>         *(.bss.percpu.read_mostly)
>         . = ALIGN(SMP_CACHE_BYTES);
>         __per_cpu_data_end = .;
> -       . = ALIGN(4);
> +       . = ALIGN(8);
>         __bss_end = .;

Is that really worth it? I.e. is going from STOSD to STOSQ really a
meaningful win?

> @@ -936,6 +947,7 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
>  
>  #ifndef CONFIG_ARM /* Disabled until runtime services implemented. */
>      set_bit(EFI_PLATFORM, &efi.flags);
> +    set_bit(EFI_LOADER, &efi.flags);
>  #endif

So _neither_ of the two bits get set for ARM? I'm even more puzzled
now, and hence think even more that this can't be fine grained enough.

Jan
Jan Beulich May 25, 2016, 10:29 a.m. UTC | #2
>>> On 25.05.16 at 11:32, <JBeulich@suse.com> wrote:
>>>> On 15.04.16 at 14:33, <daniel.kiper@oracle.com> wrote:
>> @@ -221,6 +372,13 @@ trampoline_setup:
>>          add     $12,%esp            /* Remove reloc() args from stack. */
>>          mov     %eax,sym_phys(multiboot_ptr)
>>  
>> +        /*
>> +         * Do not zero BSS on EFI platform here.
>> +         * It was initialized earlier.
>> +         */
>> +        cmpb    $1,sym_phys(skip_realmode)
>> +        je      1f
> 
> So what if skip_realmode is set on a legacy BIOS system because
> of the command line option or the use of TBOOT?

Oh, I see - this is still before command line parsing ran. The
commentary on this double purpose should be improved, I
think.

Jan
Daniel Kiper May 25, 2016, 9:02 p.m. UTC | #3
On Wed, May 25, 2016 at 03:32:37AM -0600, Jan Beulich wrote:
> >>> On 15.04.16 at 14:33, <daniel.kiper@oracle.com> wrote:
> > @@ -100,19 +107,29 @@ multiboot2_header_end:
> >  gdt_boot_descr:
> >          .word   6*8-1
> >          .long   sym_phys(trampoline_gdt)
> > +        .long   0 /* Needed for 64-bit lgdt */
> > +
> > +cs32_switch_addr:
> > +        .long   sym_phys(cs32_switch)
> > +        .word   BOOT_CS32
> >
> >  .Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
> >  .Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
> > +.Lbad_ldr_mb2: .asciz "ERR: On EFI platform use latest Multiboot2 compatible bootloader!"
>
> What is "latest" going to mean 5 years from now?

:-))) I will try to fix it.

> >          .section .init.text, "ax", @progbits
> >
> >  bad_cpu:
> >          mov     $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
> > -        jmp     print_err
> > +        mov     $0xB8000,%edi                   # VGA framebuffer
> > +        jmp     1f
> >  not_multiboot:
> >          mov     $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
> > -print_err:
> > -        mov     $0xB8000,%edi  # VGA framebuffer
> > +        mov     $0xB8000,%edi                   # VGA framebuffer
> > +        jmp     1f
> > +mb2_too_old:
> > +        mov     $(sym_phys(.Lbad_ldr_mb2)),%esi # Error message
> > +        xor     %edi,%edi                       # No VGA framebuffer
>
> Leaving aside that "framebuffer" really is a bad term here (we're
> talking of text mode output after all), limiting the output to serial

Yep, but then we should change this in other places too. Maybe in separate patch.

> isn't going to be very helpful in the field, I'm afraid. Even more so
> that there's no guarantee for a UART to be at port 0x3f8. That's

Right but we do not have big choice here at very early boot stage... :-(((

> not much of a problem for the other two messages as people are
> unlikely to try to boot Xen on an unsuitable system, but I view it
> as quite possible for Xen to be tried to get booted with an
> unsuitable grub2.
>
> IOW - this needs a better solution, presumably using EFI boot
> service output functions.

No way, here boot services are dead. GRUB2 (or other loader)
shutdown them... :-(((

> > @@ -130,6 +149,130 @@ print_err:
> >  .Lhalt: hlt
> >          jmp     .Lhalt
> >
> > +        .code64
> > +
> > +__efi64_start:
>
> As long as we have split files under boot/, I think I'd prefer 64-bit
> code to only go into x86_64.S.
>
> > +        cld
> > +
> > +        /* Check for Multiboot2 bootloader. */
> > +        cmp     $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
> > +        je      efi_multiboot2_proto
> > +
> > +        /* Jump to not_multiboot after switching CPU to x86_32 mode. */
> > +        lea     not_multiboot(%rip),%rdi
> > +        jmp     x86_32_switch
>
> What I've said above would also eliminate the need to switch to
> 32-bit mode just for emitting an error message and halting the
> system.

It is not possible. We just know that we run on EFI platform here.
However, we are not able to get EFI SystemTable pointer.

> > +efi_multiboot2_proto:
>
> .Lefi_multiboot2_proto

OK if you insist. However, I think that we are loosing helpful
debug information this way.

> > +        /*
> > +         * Multiboot2 information address is 32-bit,
> > +         * so, zero higher half of %rbx.
> > +         */
> > +        mov     %ebx,%ebx
>
> Wait, no - that's a protocol bug then. We're being entered in 64-bit
> mode here, so registers should be in 64-bit clean state.

You mean higher half cleared. Right? This is not guaranteed.
Please check this: http://lists.gnu.org/archive/html/grub-devel/2016-03/msg00304.html

> > +        /* Skip Multiboot2 information fixed part. */
> > +        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%rcx
> > +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%rcx
>
> Or if there really was a reason to do the above, and if there is a
> reason not to assume this data is located below 4Gb, then
> calculations like this could avoid the REX64 prefix by using %ecx.

Here you said something different:
  http://lists.xen.org/archives/html/xen-devel/2015-03/msg03557.html

So?

> > +0:
> > +        /* Get EFI SystemTable address from Multiboot2 information. */
> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%rcx)
> > +        jne     1f
> > +
> > +        mov     MB2_efi64_st(%rcx),%rsi
> > +
> > +        /* Do not clear BSS twice and do not go into real mode. */
> > +        movb    $1,skip_realmode(%rip)
>
> How is the setting of skip_realmode related to the clearing of BSS?
> Oh, I've found the connection below (albeit see there for its
> validity), but I think mentioning this here is more confusing than
> clarifying.

In general I have a problem skip_realmode. The name itself is confusing
here and there. Probably it should be changed. However, I do not have
idea for good compact name describing all skip_realmode usages. And I do
not think that we should add another variable which will be used in similar
way but with different name to clarify situation.

> > +        jmp     3f
> > +
> > +1:
> > +        /* Get EFI ImageHandle address from Multiboot2 information. */
> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64_IH,MB2_tag_type(%rcx)
> > +        jne     2f
> > +
> > +        mov     MB2_efi64_ih(%rcx),%rdi
> > +        jmp     3f
> > +
> > +2:
> > +        /* Is it the end of Multiboot2 information? */
> > +        cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%rcx)
> > +        je      run_bs
> > +
> > +3:
> > +        /* Go to next Multiboot2 information tag. */
> > +        add     MB2_tag_size(%rcx),%ecx
> > +        add     $(MULTIBOOT2_TAG_ALIGN-1),%rcx
> > +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%rcx
> > +        jmp     0b
>
> At least down to here it looks like this would better live in a C helper
> function. Did you consider this?

I will think about it.

> > +run_bs:
> > +        push    %rax
> > +        push    %rdi
> > +
> > +        /*
> > +         * Initialize BSS (no nasty surprises!).
> > +         * It must be done earlier than in BIOS case
> > +         * because efi_multiboot2() touches it.
> > +         */
> > +        lea     __bss_start(%rip),%rdi
> > +        lea     __bss_end(%rip),%rcx
> > +        sub     %rdi,%rcx
> > +        shr     $3,%rcx
> > +        xor     %eax,%eax
> > +        rep     stosq
>
> Please let's not repeat pre-existing mistakes: REP is not an
> instruction, and STOSB is not an operand. IOW there should be
> just a single space between the two.

OK.

[...]

> > @@ -170,12 +313,19 @@ multiboot2_proto:
> >          jne     1f
> >
> >          mov     MB2_mem_lower(%ecx),%edx
> > -        jmp     trampoline_setup
> > +        jmp     trampoline_bios_setup
> >
> >  1:
> > +        /* EFI mode is not supported via legacy BIOS path. */
> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
> > +        je      mb2_too_old
> > +
> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
> > +        je      mb2_too_old
>
> According to the comment we're on the legacy BIOS boot path
> here, yet at mb2_too_old you assume no text mode VGA. I.e. I'm
> now even confused about the output handling there.

It is correct. Old GRUB2 (or other legacy multiboot2 compatible boot
loader) which runs on EFI platform and does not have my features will
jump into that path. And we do not support that, so, we should fail
in the best possible way here.

Your comment suggest that code comment should be improved and
phrased precisely. I will do that.

> > @@ -221,6 +372,13 @@ trampoline_setup:
> >          add     $12,%esp            /* Remove reloc() args from stack. */
> >          mov     %eax,sym_phys(multiboot_ptr)
> >
> > +        /*
> > +         * Do not zero BSS on EFI platform here.
> > +         * It was initialized earlier.
> > +         */
> > +        cmpb    $1,sym_phys(skip_realmode)
> > +        je      1f
>
> So what if skip_realmode is set on a legacy BIOS system because
> of the command line option or the use of TBOOT?
>
> > --- a/xen/arch/x86/efi/efi-boot.h
> > +++ b/xen/arch/x86/efi/efi-boot.h
> > @@ -228,6 +228,9 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
> >
> >  static void __init efi_arch_pre_exit_boot(void)
> >  {
> > +    if ( !efi_enabled(EFI_LOADER) )
> > +        return;
> > +
> >      if ( !trampoline_phys )
>
> Please connect the two if()-s - afaiu you really only care about the
> trampoline handling to not be done. Otherwise the new conditional
> would probably rather belong on the (arch-independent) call site.
>
> > +paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
> > +{
> > +    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
> > +    UINTN cols, gop_mode = ~0, rows;
> > +
> > +    set_bit(EFI_PLATFORM, &efi.flags);
>
> __set_bit()
>
> > +    efi_init(ImageHandle, SystemTable);
> > +
> > +    efi_console_set_mode();
> > +
> > +    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
> > +                           &cols, &rows) == EFI_SUCCESS )
> > +        efi_arch_console_init(cols, rows);
> > +
> > +    gop = efi_get_gop();
> > +
> > +    if ( gop )
> > +        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
> > +
> > +    efi_arch_edd();
> > +
> > +    /*
> > +     * efi_arch_cpu() is not needed here. boot_cpu_data
> > +     * is set later in xen/arch/x86/boot/head.S.
> > +     */
>
> That's not a good explanation. Is there any harm in calling it? If not,
> I'd suggest calling it here just to avoid missing a dependency on what
> it does in any of the functions called subsequently. If there is, the
> precise details of that is what you should say here.

It looks that it should work. However, probably there was an reason for
which this call was disabled. So, I will reinvestigate the issue.

> > +    efi_tables();
> > +    setup_efi_pci();
> > +    efi_variables();
> > +
> > +    if ( gop )
> > +        efi_set_gop_mode(gop, gop_mode);
> > +
> > +    efi_exit_boot(ImageHandle, SystemTable);
> > +
> > +    /* Return highest available memory address below 1 MiB. */
> > +    return cfg.addr;
>
> Didn't you say on some systems all of the memory below 640k is in
> use by boot/loader code/data? If so, what meaning has the value
> you return here (I don't recall the consumer side special casing any
> error value)?

It is usually correct because boot services are dead here. However, if it
does not (e.g. somebody called Xen with mapbs argument) then we should fail
because there is no memory for trampoline.

> > --- a/xen/arch/x86/efi/stub.c
> > +++ b/xen/arch/x86/efi/stub.c
> > @@ -13,6 +13,11 @@ struct efi __read_mostly efi = {
> >  	.smbios3 = EFI_INVALID_TABLE_ADDR
> >  };
> >
> > +void __init efi_multiboot2(void)
> > +{
> > +    /* TODO: Fail if entered! */
> > +}
>
> Why not just BUG()? What exactly you do here doesn't seem to
> matter, as the symbol is unreachable in this case anyway (you
> only need it to please the linker).

We should print meaningful message here using boot services.
To do that we need a few line of assembly probably. And BUG()
is not solution here.

> > @@ -728,7 +728,11 @@ void __init noreturn __start_xen(unsigned long mbi_p)
> >          l3_bootmap[l3_table_offset(BOOTSTRAP_MAP_BASE)] =
> >              l3e_from_paddr(__pa(l2_bootmap), __PAGE_HYPERVISOR);
> >
> > -        memmap_type = loader;
> > +        memmap_type = "EFI";
> > +    }
> > +    else if ( efi_enabled(EFI_PLATFORM) )
> > +    {
> > +        memmap_type = "EFI";
> >      }
>
> Stray braces.
>
> > --- a/xen/arch/x86/xen.lds.S
> > +++ b/xen/arch/x86/xen.lds.S
> > @@ -192,7 +192,7 @@ SECTIONS
> >    } :text
> >
> >    /* Align BSS to speedup its initialization. */
> > -  . = ALIGN(4);
> > +  . = ALIGN(8);
> >    .bss : {                     /* BSS */
> >         . = ALIGN(STACK_SIZE);
> >         __bss_start = .;
> > @@ -207,7 +207,7 @@ SECTIONS
> >         *(.bss.percpu.read_mostly)
> >         . = ALIGN(SMP_CACHE_BYTES);
> >         __per_cpu_data_end = .;
> > -       . = ALIGN(4);
> > +       . = ALIGN(8);
> >         __bss_end = .;
>
> Is that really worth it? I.e. is going from STOSD to STOSQ really a
> meaningful win?

Probably yes but I do not think that anybody will be see boot time
difference. On the other hand why not do that? It does not cost a lot.

> > @@ -936,6 +947,7 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
> >
> >  #ifndef CONFIG_ARM /* Disabled until runtime services implemented. */
> >      set_bit(EFI_PLATFORM, &efi.flags);
> > +    set_bit(EFI_LOADER, &efi.flags);
> >  #endif
>
> So _neither_ of the two bits get set for ARM? I'm even more puzzled
> now, and hence think even more that this can't be fine grained enough.

As I saw code around old efi_enabled, which was replaced by EFI_PLATFORM,
was changed recently. So, I must rethink this stuff too.

Daniel
Jan Beulich May 27, 2016, 9:02 a.m. UTC | #4
>>> On 25.05.16 at 23:02, <daniel.kiper@oracle.com> wrote:
> On Wed, May 25, 2016 at 03:32:37AM -0600, Jan Beulich wrote:
>> >>> On 15.04.16 at 14:33, <daniel.kiper@oracle.com> wrote:
>> >  bad_cpu:
>> >          mov     $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
>> > -        jmp     print_err
>> > +        mov     $0xB8000,%edi                   # VGA framebuffer
>> > +        jmp     1f
>> >  not_multiboot:
>> >          mov     $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
>> > -print_err:
>> > -        mov     $0xB8000,%edi  # VGA framebuffer
>> > +        mov     $0xB8000,%edi                   # VGA framebuffer
>> > +        jmp     1f
>> > +mb2_too_old:
>> > +        mov     $(sym_phys(.Lbad_ldr_mb2)),%esi # Error message
>> > +        xor     %edi,%edi                       # No VGA framebuffer
>>
>> Leaving aside that "framebuffer" really is a bad term here (we're
>> talking of text mode output after all), limiting the output to serial
> 
> Yep, but then we should change this in other places too. Maybe in separate 
> patch.

Since you touch (move) it, replacing the word "framebuffer" wouldn't
seem like something making the patch any more difficult to understand.

>> isn't going to be very helpful in the field, I'm afraid. Even more so
>> that there's no guarantee for a UART to be at port 0x3f8. That's
> 
> Right but we do not have big choice here at very early boot stage... :-(((

Excuse me? You're running on EFI, so you have full infrastructure
available to you. As much as in a normal BIOS scenario (and on a
half way normal system) we can assume a text mode screen with
video memory mapped at B8000, under EFI we can assume output
capabilities (whichever the system owner set up in the firmware
setup).

>> not much of a problem for the other two messages as people are
>> unlikely to try to boot Xen on an unsuitable system, but I view it
>> as quite possible for Xen to be tried to get booted with an
>> unsuitable grub2.
>>
>> IOW - this needs a better solution, presumably using EFI boot
>> service output functions.
> 
> No way, here boot services are dead. GRUB2 (or other loader)
> shutdown them... :-(((

Wasn't one of your grub2 changes being precisely the deferral of
that to Xen?

>> > +        cld
>> > +
>> > +        /* Check for Multiboot2 bootloader. */
>> > +        cmp     $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
>> > +        je      efi_multiboot2_proto
>> > +
>> > +        /* Jump to not_multiboot after switching CPU to x86_32 mode. */
>> > +        lea     not_multiboot(%rip),%rdi
>> > +        jmp     x86_32_switch
>>
>> What I've said above would also eliminate the need to switch to
>> 32-bit mode just for emitting an error message and halting the
>> system.
> 
> It is not possible. We just know that we run on EFI platform here.
> However, we are not able to get EFI SystemTable pointer.

How are we not able? Later C code does it afair, so why would it not
be possible here?

>> > +efi_multiboot2_proto:
>>
>> .Lefi_multiboot2_proto
> 
> OK if you insist. However, I think that we are loosing helpful
> debug information this way.

I don't see why or how. Labels persisting in the final symbol table
are useful only for generating stack traces, yet if you crash this
early there won't be any stack trace anyway - you don't even
have an IDT set up yet.

>> > +        /*
>> > +         * Multiboot2 information address is 32-bit,
>> > +         * so, zero higher half of %rbx.
>> > +         */
>> > +        mov     %ebx,%ebx
>>
>> Wait, no - that's a protocol bug then. We're being entered in 64-bit
>> mode here, so registers should be in 64-bit clean state.
> 
> You mean higher half cleared. Right? This is not guaranteed.

Hence me saying "that's a protocol bug then".

> Please check this: 
> http://lists.gnu.org/archive/html/grub-devel/2016-03/msg00304.html 

Other than the description of the patch I can't see anything like that,
in particular
- no occurrence of "ebx" in any of the added or changed code
- MULTIBOOT_EFI_MBI_REGISTER getting #define-d as rbx

>> > +        /* Skip Multiboot2 information fixed part. */
>> > +        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%rcx
>> > +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%rcx
>>
>> Or if there really was a reason to do the above, and if there is a
>> reason not to assume this data is located below 4Gb, then
>> calculations like this could avoid the REX64 prefix by using %ecx.
> 
> Here you said something different:
>   http://lists.xen.org/archives/html/xen-devel/2015-03/msg03557.html 
> 
> So?

So? You simply went too far: There talk was of memory accesses,
i.e. the (%rbx) part of the instruction above. Using (%ebx) there
would cause a pointless address size override prefix emitted. Now
in the new form above you force a pointless REX prefix to be
emitted. The whole point of both of my requests is - avoid prefixes
if you don't really need them.

>> > +0:
>> > +        /* Get EFI SystemTable address from Multiboot2 information. */
>> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%rcx)
>> > +        jne     1f
>> > +
>> > +        mov     MB2_efi64_st(%rcx),%rsi
>> > +
>> > +        /* Do not clear BSS twice and do not go into real mode. */
>> > +        movb    $1,skip_realmode(%rip)
>>
>> How is the setting of skip_realmode related to the clearing of BSS?
>> Oh, I've found the connection below (albeit see there for its
>> validity), but I think mentioning this here is more confusing than
>> clarifying.
> 
> In general I have a problem skip_realmode. The name itself is confusing
> here and there. Probably it should be changed. However, I do not have
> idea for good compact name describing all skip_realmode usages. And I do
> not think that we should add another variable which will be used in similar
> way but with different name to clarify situation.

Just clarify things by suitable (brief) comments?

>> > @@ -170,12 +313,19 @@ multiboot2_proto:
>> >          jne     1f
>> >
>> >          mov     MB2_mem_lower(%ecx),%edx
>> > -        jmp     trampoline_setup
>> > +        jmp     trampoline_bios_setup
>> >
>> >  1:
>> > +        /* EFI mode is not supported via legacy BIOS path. */
>> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
>> > +        je      mb2_too_old
>> > +
>> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
>> > +        je      mb2_too_old
>>
>> According to the comment we're on the legacy BIOS boot path
>> here, yet at mb2_too_old you assume no text mode VGA. I.e. I'm
>> now even confused about the output handling there.
> 
> It is correct. Old GRUB2 (or other legacy multiboot2 compatible boot
> loader) which runs on EFI platform and does not have my features will
> jump into that path. And we do not support that, so, we should fail
> in the best possible way here.
> 
> Your comment suggest that code comment should be improved and
> phrased precisely. I will do that.

Not necessarily: First of all you didn't clarify what video mode we're
in in that old-grub2 case. Do we have text mode? If so, output should
not be avoided. And if we're in a graphical mode without any vga=
option that grub2 may have chosen to interpret, that would smell like
a bug in grub2.

>> > +    efi_tables();
>> > +    setup_efi_pci();
>> > +    efi_variables();
>> > +
>> > +    if ( gop )
>> > +        efi_set_gop_mode(gop, gop_mode);
>> > +
>> > +    efi_exit_boot(ImageHandle, SystemTable);

(Btw, regarding your earlier statement of boot services being
unavailable in the early assembly entry code: Here is the
exit-boot-services place, i.e. up to here boot services can be used,
which therefore includes the assembly part of the boot path.)

>> > --- a/xen/arch/x86/efi/stub.c
>> > +++ b/xen/arch/x86/efi/stub.c
>> > @@ -13,6 +13,11 @@ struct efi __read_mostly efi = {
>> >  	.smbios3 = EFI_INVALID_TABLE_ADDR
>> >  };
>> >
>> > +void __init efi_multiboot2(void)
>> > +{
>> > +    /* TODO: Fail if entered! */
>> > +}
>>
>> Why not just BUG()? What exactly you do here doesn't seem to
>> matter, as the symbol is unreachable in this case anyway (you
>> only need it to please the linker).
> 
> We should print meaningful message here using boot services.

Which boot services? We're not running on EFI if we get here. And
as said, this function is unreachable on non-EFI afaict, so ...

> To do that we need a few line of assembly probably. And BUG()
> is not solution here.

... I don't follow you here.

>> > --- a/xen/arch/x86/xen.lds.S
>> > +++ b/xen/arch/x86/xen.lds.S
>> > @@ -192,7 +192,7 @@ SECTIONS
>> >    } :text
>> >
>> >    /* Align BSS to speedup its initialization. */
>> > -  . = ALIGN(4);
>> > +  . = ALIGN(8);
>> >    .bss : {                     /* BSS */
>> >         . = ALIGN(STACK_SIZE);
>> >         __bss_start = .;
>> > @@ -207,7 +207,7 @@ SECTIONS
>> >         *(.bss.percpu.read_mostly)
>> >         . = ALIGN(SMP_CACHE_BYTES);
>> >         __per_cpu_data_end = .;
>> > -       . = ALIGN(4);
>> > +       . = ALIGN(8);
>> >         __bss_end = .;
>>
>> Is that really worth it? I.e. is going from STOSD to STOSQ really a
>> meaningful win?
> 
> Probably yes but I do not think that anybody will be see boot time
> difference. On the other hand why not do that? It does not cost a lot.

But it grows an already largish patch.

Jan
Daniel Kiper June 1, 2016, 7:03 p.m. UTC | #5
On Fri, May 27, 2016 at 03:02:25AM -0600, Jan Beulich wrote:
> >>> On 25.05.16 at 23:02, <daniel.kiper@oracle.com> wrote:
> > On Wed, May 25, 2016 at 03:32:37AM -0600, Jan Beulich wrote:
> >> >>> On 15.04.16 at 14:33, <daniel.kiper@oracle.com> wrote:
> >> >  bad_cpu:
> >> >          mov     $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
> >> > -        jmp     print_err
> >> > +        mov     $0xB8000,%edi                   # VGA framebuffer
> >> > +        jmp     1f
> >> >  not_multiboot:
> >> >          mov     $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
> >> > -print_err:
> >> > -        mov     $0xB8000,%edi  # VGA framebuffer
> >> > +        mov     $0xB8000,%edi                   # VGA framebuffer
> >> > +        jmp     1f
> >> > +mb2_too_old:
> >> > +        mov     $(sym_phys(.Lbad_ldr_mb2)),%esi # Error message
> >> > +        xor     %edi,%edi                       # No VGA framebuffer
> >>
> >> Leaving aside that "framebuffer" really is a bad term here (we're
> >> talking of text mode output after all), limiting the output to serial
> >
> > Yep, but then we should change this in other places too. Maybe in separate
> > patch.
>
> Since you touch (move) it, replacing the word "framebuffer" wouldn't
> seem like something making the patch any more difficult to understand.
>
> >> isn't going to be very helpful in the field, I'm afraid. Even more so
> >> that there's no guarantee for a UART to be at port 0x3f8. That's
> >
> > Right but we do not have big choice here at very early boot stage... :-(((
>
> Excuse me? You're running on EFI, so you have full infrastructure
> available to you. As much as in a normal BIOS scenario (and on a
> half way normal system) we can assume a text mode screen with
> video memory mapped at B8000, under EFI we can assume output
> capabilities (whichever the system owner set up in the firmware
> setup).

Potentially we can do that for bad_cpu only. However, does it pays?
I suppose that most, if not all, platforms with UEFI have CPUs with
X86_FEATURE_LM.

Sadly, It it not feasible for not_multiboot and mb2_too_old. Former
means that we were loaded by unknown boot loader and interface is
not defined. Hence, we are not able to get anything from EFI. Latter
means that we were booted via older multiboot2 version which shutdown
boot services.

> >> not much of a problem for the other two messages as people are
> >> unlikely to try to boot Xen on an unsuitable system, but I view it
> >> as quite possible for Xen to be tried to get booted with an
> >> unsuitable grub2.
> >>
> >> IOW - this needs a better solution, presumably using EFI boot
> >> service output functions.
> >
> > No way, here boot services are dead. GRUB2 (or other loader)
> > shutdown them... :-(((
>
> Wasn't one of your grub2 changes being precisely the deferral of
> that to Xen?

Sure, but if we are here then it means that we were loaded by earlier multiboot2
protocol version which does not understand features added by me.

> >> > +        cld
> >> > +
> >> > +        /* Check for Multiboot2 bootloader. */
> >> > +        cmp     $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
> >> > +        je      efi_multiboot2_proto
> >> > +
> >> > +        /* Jump to not_multiboot after switching CPU to x86_32 mode. */
> >> > +        lea     not_multiboot(%rip),%rdi
> >> > +        jmp     x86_32_switch
> >>
> >> What I've said above would also eliminate the need to switch to
> >> 32-bit mode just for emitting an error message and halting the
> >> system.
> >
> > It is not possible. We just know that we run on EFI platform here.
> > However, we are not able to get EFI SystemTable pointer.
>
> How are we not able? Later C code does it afair, so why would it not
> be possible here?

Ditto.

> >> > +efi_multiboot2_proto:
> >>
> >> .Lefi_multiboot2_proto
> >
> > OK if you insist. However, I think that we are loosing helpful
> > debug information this way.
>
> I don't see why or how. Labels persisting in the final symbol table
> are useful only for generating stack traces, yet if you crash this
> early there won't be any stack trace anyway - you don't even
> have an IDT set up yet.

OK, but it is much easier to identify addresses for breakpoints if you
have proper labels. And this one looks quite useful.

> >> > +        /*
> >> > +         * Multiboot2 information address is 32-bit,
> >> > +         * so, zero higher half of %rbx.
> >> > +         */
> >> > +        mov     %ebx,%ebx
> >>
> >> Wait, no - that's a protocol bug then. We're being entered in 64-bit
> >> mode here, so registers should be in 64-bit clean state.
> >
> > You mean higher half cleared. Right? This is not guaranteed.
>
> Hence me saying "that's a protocol bug then".

Why? Protocol strictly says that "this is not guaranteed".
What is the problem with that? Potentially loader can set
%rbx higher half to e.g. 0 but I do not think it is needed.

> > Please check this:
> > http://lists.gnu.org/archive/html/grub-devel/2016-03/msg00304.html
>
> Other than the description of the patch I can't see anything like that,
> in particular
> - no occurrence of "ebx" in any of the added or changed code
> - MULTIBOOT_EFI_MBI_REGISTER getting #define-d as rbx

Please check multiboot2 spec, section 3.2, Machine state. It is not
freshest one (I am working on EFI updates right now) but I hope that it,
together with patch comment, will shed some light.

> >> > +        /* Skip Multiboot2 information fixed part. */
> >> > +        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%rcx
> >> > +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%rcx
> >>
> >> Or if there really was a reason to do the above, and if there is a
> >> reason not to assume this data is located below 4Gb, then
> >> calculations like this could avoid the REX64 prefix by using %ecx.
> >
> > Here you said something different:
> >   http://lists.xen.org/archives/html/xen-devel/2015-03/msg03557.html
> >
> > So?
>
> So? You simply went too far: There talk was of memory accesses,
> i.e. the (%rbx) part of the instruction above. Using (%ebx) there
> would cause a pointless address size override prefix emitted. Now
> in the new form above you force a pointless REX prefix to be
> emitted. The whole point of both of my requests is - avoid prefixes
> if you don't really need them.

Wilco! Thanks for explanation.

> >> > +0:
> >> > +        /* Get EFI SystemTable address from Multiboot2 information. */
> >> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%rcx)
> >> > +        jne     1f
> >> > +
> >> > +        mov     MB2_efi64_st(%rcx),%rsi
> >> > +
> >> > +        /* Do not clear BSS twice and do not go into real mode. */
> >> > +        movb    $1,skip_realmode(%rip)
> >>
> >> How is the setting of skip_realmode related to the clearing of BSS?
> >> Oh, I've found the connection below (albeit see there for its
> >> validity), but I think mentioning this here is more confusing than
> >> clarifying.
> >
> > In general I have a problem skip_realmode. The name itself is confusing
> > here and there. Probably it should be changed. However, I do not have
> > idea for good compact name describing all skip_realmode usages. And I do
> > not think that we should add another variable which will be used in similar
> > way but with different name to clarify situation.
>
> Just clarify things by suitable (brief) comments?

If it is acceptable by you guys then OK.

> >> > @@ -170,12 +313,19 @@ multiboot2_proto:
> >> >          jne     1f
> >> >
> >> >          mov     MB2_mem_lower(%ecx),%edx
> >> > -        jmp     trampoline_setup
> >> > +        jmp     trampoline_bios_setup
> >> >
> >> >  1:
> >> > +        /* EFI mode is not supported via legacy BIOS path. */
> >> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
> >> > +        je      mb2_too_old
> >> > +
> >> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
> >> > +        je      mb2_too_old
> >>
> >> According to the comment we're on the legacy BIOS boot path
> >> here, yet at mb2_too_old you assume no text mode VGA. I.e. I'm
> >> now even confused about the output handling there.
> >
> > It is correct. Old GRUB2 (or other legacy multiboot2 compatible boot
> > loader) which runs on EFI platform and does not have my features will
> > jump into that path. And we do not support that, so, we should fail
> > in the best possible way here.
> >
> > Your comment suggest that code comment should be improved and
> > phrased precisely. I will do that.
>
> Not necessarily: First of all you didn't clarify what video mode we're
> in in that old-grub2 case. Do we have text mode? If so, output should
> not be avoided. And if we're in a graphical mode without any vga=
> option that grub2 may have chosen to interpret, that would smell like
> a bug in grub2.

Here boot services are dead. They were shutdown by GRUB2 (or other
legacy boot loader). So, we do not have simple access to GOP or
anything like that. Am I missing something?

> >> > +    efi_tables();
> >> > +    setup_efi_pci();
> >> > +    efi_variables();
> >> > +
> >> > +    if ( gop )
> >> > +        efi_set_gop_mode(gop, gop_mode);
> >> > +
> >> > +    efi_exit_boot(ImageHandle, SystemTable);
>
> (Btw, regarding your earlier statement of boot services being
> unavailable in the early assembly entry code: Here is the
> exit-boot-services place, i.e. up to here boot services can be used,
> which therefore includes the assembly part of the boot path.)

This is reachable if everything went OK. If it does not very early
then UEFI stuff is unavailable. Please look above for more details.

> >> > --- a/xen/arch/x86/efi/stub.c
> >> > +++ b/xen/arch/x86/efi/stub.c
> >> > @@ -13,6 +13,11 @@ struct efi __read_mostly efi = {
> >> >  	.smbios3 = EFI_INVALID_TABLE_ADDR
> >> >  };
> >> >
> >> > +void __init efi_multiboot2(void)
> >> > +{
> >> > +    /* TODO: Fail if entered! */
> >> > +}
> >>
> >> Why not just BUG()? What exactly you do here doesn't seem to
> >> matter, as the symbol is unreachable in this case anyway (you
> >> only need it to please the linker).
> >
> > We should print meaningful message here using boot services.
>
> Which boot services? We're not running on EFI if we get here. And
> as said, this function is unreachable on non-EFI afaict, so ...

It is. Assembly code in head.S is build unconditionally.

> > To do that we need a few line of assembly probably. And BUG()
> > is not solution here.
>
> ... I don't follow you here.
>
> >> > --- a/xen/arch/x86/xen.lds.S
> >> > +++ b/xen/arch/x86/xen.lds.S
> >> > @@ -192,7 +192,7 @@ SECTIONS
> >> >    } :text
> >> >
> >> >    /* Align BSS to speedup its initialization. */
> >> > -  . = ALIGN(4);
> >> > +  . = ALIGN(8);
> >> >    .bss : {                     /* BSS */
> >> >         . = ALIGN(STACK_SIZE);
> >> >         __bss_start = .;
> >> > @@ -207,7 +207,7 @@ SECTIONS
> >> >         *(.bss.percpu.read_mostly)
> >> >         . = ALIGN(SMP_CACHE_BYTES);
> >> >         __per_cpu_data_end = .;
> >> > -       . = ALIGN(4);
> >> > +       . = ALIGN(8);
> >> >         __bss_end = .;
> >>
> >> Is that really worth it? I.e. is going from STOSD to STOSQ really a
> >> meaningful win?
> >
> > Probably yes but I do not think that anybody will be see boot time
> > difference. On the other hand why not do that? It does not cost a lot.
>
> But it grows an already largish patch.

If Andrew do not object I can leave/use stosd/stosl as is.

Daniel
Jan Beulich June 2, 2016, 8:34 a.m. UTC | #6
>>> On 01.06.16 at 21:03, <daniel.kiper@oracle.com> wrote:
> On Fri, May 27, 2016 at 03:02:25AM -0600, Jan Beulich wrote:
>> >>> On 25.05.16 at 23:02, <daniel.kiper@oracle.com> wrote:
>> > On Wed, May 25, 2016 at 03:32:37AM -0600, Jan Beulich wrote:
>> >> isn't going to be very helpful in the field, I'm afraid. Even more so
>> >> that there's no guarantee for a UART to be at port 0x3f8. That's
>> >
>> > Right but we do not have big choice here at very early boot stage... :-(((
>>
>> Excuse me? You're running on EFI, so you have full infrastructure
>> available to you. As much as in a normal BIOS scenario (and on a
>> half way normal system) we can assume a text mode screen with
>> video memory mapped at B8000, under EFI we can assume output
>> capabilities (whichever the system owner set up in the firmware
>> setup).
> 
> Potentially we can do that for bad_cpu only. However, does it pays?
> I suppose that most, if not all, platforms with UEFI have CPUs with
> X86_FEATURE_LM.

I'm not sure about that, keeping various Atoms in mind.

> Sadly, It it not feasible for not_multiboot and mb2_too_old. Former
> means that we were loaded by unknown boot loader and interface is
> not defined.

Hence we may at least assume accessing VGA memory won't do
much bad.

> Hence, we are not able to get anything from EFI. Latter
> means that we were booted via older multiboot2 version which shutdown
> boot services.

Hmm, that's certainly one of the possibilities, yes. I wonder then
whether we wouldn't better do away with all of those output
attempts then.

>> >> > +efi_multiboot2_proto:
>> >>
>> >> .Lefi_multiboot2_proto
>> >
>> > OK if you insist. However, I think that we are loosing helpful
>> > debug information this way.
>>
>> I don't see why or how. Labels persisting in the final symbol table
>> are useful only for generating stack traces, yet if you crash this
>> early there won't be any stack trace anyway - you don't even
>> have an IDT set up yet.
> 
> OK, but it is much easier to identify addresses for breakpoints if you
> have proper labels. And this one looks quite useful.

I disagree, especially to the "much".

>> >> > +        /*
>> >> > +         * Multiboot2 information address is 32-bit,
>> >> > +         * so, zero higher half of %rbx.
>> >> > +         */
>> >> > +        mov     %ebx,%ebx
>> >>
>> >> Wait, no - that's a protocol bug then. We're being entered in 64-bit
>> >> mode here, so registers should be in 64-bit clean state.
>> >
>> > You mean higher half cleared. Right? This is not guaranteed.
>>
>> Hence me saying "that's a protocol bug then".
> 
> Why? Protocol strictly says that "this is not guaranteed".
> What is the problem with that? Potentially loader can set
> %rbx higher half to e.g. 0 but I do not think it is needed.

A 64-bit interface shouldn't specify values to live only in halves
of registers, in my opinion. Remember that the architecture
guarantees high halves to get zeroed for 32-bit writes to
registers, so I don't even see any complication for the provider
side of the interface (which necessarily runs in 64-bit mode).

>> > Please check this:
>> > http://lists.gnu.org/archive/html/grub-devel/2016-03/msg00304.html 
>>
>> Other than the description of the patch I can't see anything like that,
>> in particular
>> - no occurrence of "ebx" in any of the added or changed code
>> - MULTIBOOT_EFI_MBI_REGISTER getting #define-d as rbx
> 
> Please check multiboot2 spec, section 3.2, Machine state. It is not
> freshest one (I am working on EFI updates right now) but I hope that it,
> together with patch comment, will shed some light.

May I refer you back to you, in another patch, adding just two
architecture defines for MB2: i386 and MIPS? That's a pretty
clear indication that there can't be much consistency to be
expected when talk comes to x86-64 (which most definitely is
not i386).

>> >> > @@ -170,12 +313,19 @@ multiboot2_proto:
>> >> >          jne     1f
>> >> >
>> >> >          mov     MB2_mem_lower(%ecx),%edx
>> >> > -        jmp     trampoline_setup
>> >> > +        jmp     trampoline_bios_setup
>> >> >
>> >> >  1:
>> >> > +        /* EFI mode is not supported via legacy BIOS path. */
>> >> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
>> >> > +        je      mb2_too_old
>> >> > +
>> >> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
>> >> > +        je      mb2_too_old
>> >>
>> >> According to the comment we're on the legacy BIOS boot path
>> >> here, yet at mb2_too_old you assume no text mode VGA. I.e. I'm
>> >> now even confused about the output handling there.
>> >
>> > It is correct. Old GRUB2 (or other legacy multiboot2 compatible boot
>> > loader) which runs on EFI platform and does not have my features will
>> > jump into that path. And we do not support that, so, we should fail
>> > in the best possible way here.
>> >
>> > Your comment suggest that code comment should be improved and
>> > phrased precisely. I will do that.
>>
>> Not necessarily: First of all you didn't clarify what video mode we're
>> in in that old-grub2 case. Do we have text mode? If so, output should
>> not be avoided. And if we're in a graphical mode without any vga=
>> option that grub2 may have chosen to interpret, that would smell like
>> a bug in grub2.
> 
> Here boot services are dead. They were shutdown by GRUB2 (or other
> legacy boot loader). So, we do not have simple access to GOP or
> anything like that. Am I missing something?

How are boot services coming into the picture here? As the code
comment still visible above says, we're on the legacy boot path
here. And legacy boot, from all I know, implies a text mode on
the primary VGA (if any).

>> >> > --- a/xen/arch/x86/efi/stub.c
>> >> > +++ b/xen/arch/x86/efi/stub.c
>> >> > @@ -13,6 +13,11 @@ struct efi __read_mostly efi = {
>> >> >  	.smbios3 = EFI_INVALID_TABLE_ADDR
>> >> >  };
>> >> >
>> >> > +void __init efi_multiboot2(void)
>> >> > +{
>> >> > +    /* TODO: Fail if entered! */
>> >> > +}
>> >>
>> >> Why not just BUG()? What exactly you do here doesn't seem to
>> >> matter, as the symbol is unreachable in this case anyway (you
>> >> only need it to please the linker).
>> >
>> > We should print meaningful message here using boot services.
>>
>> Which boot services? We're not running on EFI if we get here. And
>> as said, this function is unreachable on non-EFI afaict, so ...
> 
> It is. Assembly code in head.S is build unconditionally.

Oh, I see - a new-style xen.gz which doesn't have EFI support
enabled due to tool chain issues but gets booted from EFI/grub2.

Jan
Daniel Kiper June 2, 2016, 4:12 p.m. UTC | #7
On Thu, Jun 02, 2016 at 02:34:28AM -0600, Jan Beulich wrote:
> >>> On 01.06.16 at 21:03, <daniel.kiper@oracle.com> wrote:
> > On Fri, May 27, 2016 at 03:02:25AM -0600, Jan Beulich wrote:
> >> >>> On 25.05.16 at 23:02, <daniel.kiper@oracle.com> wrote:
> >> > On Wed, May 25, 2016 at 03:32:37AM -0600, Jan Beulich wrote:
> >> >> isn't going to be very helpful in the field, I'm afraid. Even more so
> >> >> that there's no guarantee for a UART to be at port 0x3f8. That's
> >> >
> >> > Right but we do not have big choice here at very early boot stage... :-(((
> >>
> >> Excuse me? You're running on EFI, so you have full infrastructure
> >> available to you. As much as in a normal BIOS scenario (and on a
> >> half way normal system) we can assume a text mode screen with
> >> video memory mapped at B8000, under EFI we can assume output
> >> capabilities (whichever the system owner set up in the firmware
> >> setup).
> >
> > Potentially we can do that for bad_cpu only. However, does it pays?
> > I suppose that most, if not all, platforms with UEFI have CPUs with
> > X86_FEATURE_LM.
>
> I'm not sure about that, keeping various Atoms in mind.

OK.

> > Sadly, It it not feasible for not_multiboot and mb2_too_old. Former
> > means that we were loaded by unknown boot loader and interface is
> > not defined.
>
> Hence we may at least assume accessing VGA memory won't do
> much bad.

Well, we are in black hole here but I am not sure that even in that
situation we should blindly poke VGA memory region. Especially on
EFI platforms behavior can be at least undefined.

> > Hence, we are not able to get anything from EFI. Latter
> > means that we were booted via older multiboot2 version which shutdown
> > boot services.
>
> Hmm, that's certainly one of the possibilities, yes. I wonder then
> whether we wouldn't better do away with all of those output
> attempts then.

This is one option. However, IMO, not nice. Maybe we should stay with
serial plus VGA on legacy BIOS platforms. Serials looks quite well
defined even on EFI platforms. However, if it is not available x86
out instruction should not do any harm. Though if we wish to increase
chance of reaching interested parties I would send error messages to
0x3f8 and 0x2f8 at once. AFAICT, on many machines usually at least one
if not both standard/legacy serial ports are available.

[...]

> >> >> > +        /*
> >> >> > +         * Multiboot2 information address is 32-bit,
> >> >> > +         * so, zero higher half of %rbx.
> >> >> > +         */
> >> >> > +        mov     %ebx,%ebx
> >> >>
> >> >> Wait, no - that's a protocol bug then. We're being entered in 64-bit
> >> >> mode here, so registers should be in 64-bit clean state.
> >> >
> >> > You mean higher half cleared. Right? This is not guaranteed.
> >>
> >> Hence me saying "that's a protocol bug then".
> >
> > Why? Protocol strictly says that "this is not guaranteed".
> > What is the problem with that? Potentially loader can set
> > %rbx higher half to e.g. 0 but I do not think it is needed.
>
> A 64-bit interface shouldn't specify values to live only in halves
> of registers, in my opinion. Remember that the architecture
> guarantees high halves to get zeroed for 32-bit writes to
> registers, so I don't even see any complication for the provider
> side of the interface (which necessarily runs in 64-bit mode).

OK, you convinced me. I will update GRUB2 patches.

> >> > Please check this:
> >> > http://lists.gnu.org/archive/html/grub-devel/2016-03/msg00304.html
> >>
> >> Other than the description of the patch I can't see anything like that,
> >> in particular
> >> - no occurrence of "ebx" in any of the added or changed code
> >> - MULTIBOOT_EFI_MBI_REGISTER getting #define-d as rbx
> >
> > Please check multiboot2 spec, section 3.2, Machine state. It is not
> > freshest one (I am working on EFI updates right now) but I hope that it,
> > together with patch comment, will shed some light.
>
> May I refer you back to you, in another patch, adding just two
> architecture defines for MB2: i386 and MIPS? That's a pretty
> clear indication that there can't be much consistency to be
> expected when talk comes to x86-64 (which most definitely is
> not i386).
>
> >> >> > @@ -170,12 +313,19 @@ multiboot2_proto:
> >> >> >          jne     1f
> >> >> >
> >> >> >          mov     MB2_mem_lower(%ecx),%edx
> >> >> > -        jmp     trampoline_setup
> >> >> > +        jmp     trampoline_bios_setup
> >> >> >
> >> >> >  1:
> >> >> > +        /* EFI mode is not supported via legacy BIOS path. */
> >> >> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
> >> >> > +        je      mb2_too_old
> >> >> > +
> >> >> > +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
> >> >> > +        je      mb2_too_old
> >> >>
> >> >> According to the comment we're on the legacy BIOS boot path
> >> >> here, yet at mb2_too_old you assume no text mode VGA. I.e. I'm
> >> >> now even confused about the output handling there.
> >> >
> >> > It is correct. Old GRUB2 (or other legacy multiboot2 compatible boot
> >> > loader) which runs on EFI platform and does not have my features will
> >> > jump into that path. And we do not support that, so, we should fail
> >> > in the best possible way here.
> >> >
> >> > Your comment suggest that code comment should be improved and
> >> > phrased precisely. I will do that.
> >>
> >> Not necessarily: First of all you didn't clarify what video mode we're
> >> in in that old-grub2 case. Do we have text mode? If so, output should
> >> not be avoided. And if we're in a graphical mode without any vga=
> >> option that grub2 may have chosen to interpret, that would smell like
> >> a bug in grub2.
> >
> > Here boot services are dead. They were shutdown by GRUB2 (or other
> > legacy boot loader). So, we do not have simple access to GOP or
> > anything like that. Am I missing something?
>
> How are boot services coming into the picture here? As the code
> comment still visible above says, we're on the legacy boot path
> here. And legacy boot, from all I know, implies a text mode on
> the primary VGA (if any).

Old multiboot2 protocol (without my features) used one path for legacy
BIOS and EFI platforms. If we are here then we are sure that:
  - we are running on EFI platform,
  - we were loaded by old multiboot2 protocol,
  - EFI boot services are shutdown,
  - there is no VGA here.

Daniel
Jan Beulich June 3, 2016, 9:26 a.m. UTC | #8
>>> On 02.06.16 at 18:12, <daniel.kiper@oracle.com> wrote:
> On Thu, Jun 02, 2016 at 02:34:28AM -0600, Jan Beulich wrote:
>> >>> On 01.06.16 at 21:03, <daniel.kiper@oracle.com> wrote:
>> > Hence, we are not able to get anything from EFI. Latter
>> > means that we were booted via older multiboot2 version which shutdown
>> > boot services.
>>
>> Hmm, that's certainly one of the possibilities, yes. I wonder then
>> whether we wouldn't better do away with all of those output
>> attempts then.
> 
> This is one option. However, IMO, not nice. Maybe we should stay with
> serial plus VGA on legacy BIOS platforms. Serials looks quite well
> defined even on EFI platforms. However, if it is not available x86
> out instruction should not do any harm. Though if we wish to increase
> chance of reaching interested parties I would send error messages to
> 0x3f8 and 0x2f8 at once. AFAICT, on many machines usually at least one
> if not both standard/legacy serial ports are available.

On many _older_ machines you mean. Any new machines I got
hold of in the last couple of years didn't have any serial ports
anymore (some still have them implemented in the LPC, but not
wired through, but legacy free systems wouldn't have them at all
anymore).

Jan
Konrad Rzeszutek Wilk June 3, 2016, 5:06 p.m. UTC | #9
On Fri, Jun 03, 2016 at 03:26:59AM -0600, Jan Beulich wrote:
> >>> On 02.06.16 at 18:12, <daniel.kiper@oracle.com> wrote:
> > On Thu, Jun 02, 2016 at 02:34:28AM -0600, Jan Beulich wrote:
> >> >>> On 01.06.16 at 21:03, <daniel.kiper@oracle.com> wrote:
> >> > Hence, we are not able to get anything from EFI. Latter
> >> > means that we were booted via older multiboot2 version which shutdown
> >> > boot services.
> >>
> >> Hmm, that's certainly one of the possibilities, yes. I wonder then
> >> whether we wouldn't better do away with all of those output
> >> attempts then.
> > 
> > This is one option. However, IMO, not nice. Maybe we should stay with
> > serial plus VGA on legacy BIOS platforms. Serials looks quite well
> > defined even on EFI platforms. However, if it is not available x86
> > out instruction should not do any harm. Though if we wish to increase
> > chance of reaching interested parties I would send error messages to
> > 0x3f8 and 0x2f8 at once. AFAICT, on many machines usually at least one
> > if not both standard/legacy serial ports are available.
> 
> On many _older_ machines you mean. Any new machines I got
> hold of in the last couple of years didn't have any serial ports
> anymore (some still have them implemented in the LPC, but not
> wired through, but legacy free systems wouldn't have them at all
> anymore).

And there are also new EFI machines that have no VGA whatsoever but
only serial.

> 
> Jan
>
diff mbox

Patch

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index e46d691..efb0614 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -89,6 +89,13 @@  multiboot2_header_start:
                    0, /* Number of the lines - no preference. */ \
                    0  /* Number of bits per pixel - no preference. */
 
+        /* Inhibit bootloader from calling ExitBootServices(). */
+        mb2ht_init MB2_HT(EFI_BS), MB2_HT(OPTIONAL)
+
+        /* EFI64 entry point. */
+        mb2ht_init MB2_HT(ENTRY_ADDRESS_EFI64), MB2_HT(OPTIONAL), \
+                   sym_phys(__efi64_start)
+
         /* Multiboot2 header end tag. */
         mb2ht_init MB2_HT(END), MB2_HT(REQUIRED)
 multiboot2_header_end:
@@ -100,19 +107,29 @@  multiboot2_header_end:
 gdt_boot_descr:
         .word   6*8-1
         .long   sym_phys(trampoline_gdt)
+        .long   0 /* Needed for 64-bit lgdt */
+
+cs32_switch_addr:
+        .long   sym_phys(cs32_switch)
+        .word   BOOT_CS32
 
 .Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
 .Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
+.Lbad_ldr_mb2: .asciz "ERR: On EFI platform use latest Multiboot2 compatible bootloader!"
 
         .section .init.text, "ax", @progbits
 
 bad_cpu:
         mov     $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
-        jmp     print_err
+        mov     $0xB8000,%edi                   # VGA framebuffer
+        jmp     1f
 not_multiboot:
         mov     $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
-print_err:
-        mov     $0xB8000,%edi  # VGA framebuffer
+        mov     $0xB8000,%edi                   # VGA framebuffer
+        jmp     1f
+mb2_too_old:
+        mov     $(sym_phys(.Lbad_ldr_mb2)),%esi # Error message
+        xor     %edi,%edi                       # No VGA framebuffer
 1:      mov     (%esi),%bl
         test    %bl,%bl        # Terminate on '\0' sentinel
         je      .Lhalt
@@ -123,6 +140,8 @@  print_err:
         mov     $0x3f8+0,%dx   # UART Transmit Holding Register
         mov     %bl,%al
         out     %al,%dx        # Send a character over the serial line
+        test    %edi,%edi      # Is VGA framebuffer available?
+        jz      1b
         movsb                  # Write a character to the VGA framebuffer
         mov     $7,%al
         stosb                  # Write an attribute to the VGA framebuffer
@@ -130,6 +149,130 @@  print_err:
 .Lhalt: hlt
         jmp     .Lhalt
 
+        .code64
+
+__efi64_start:
+        cld
+
+        /* Check for Multiboot2 bootloader. */
+        cmp     $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
+        je      efi_multiboot2_proto
+
+        /* Jump to not_multiboot after switching CPU to x86_32 mode. */
+        lea     not_multiboot(%rip),%rdi
+        jmp     x86_32_switch
+
+efi_multiboot2_proto:
+        /*
+         * Multiboot2 information address is 32-bit,
+         * so, zero higher half of %rbx.
+         */
+        mov     %ebx,%ebx
+
+        /* Skip Multiboot2 information fixed part. */
+        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%rcx
+        and     $~(MULTIBOOT2_TAG_ALIGN-1),%rcx
+
+0:
+        /* Get EFI SystemTable address from Multiboot2 information. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%rcx)
+        jne     1f
+
+        mov     MB2_efi64_st(%rcx),%rsi
+
+        /* Do not clear BSS twice and do not go into real mode. */
+        movb    $1,skip_realmode(%rip)
+        jmp     3f
+
+1:
+        /* Get EFI ImageHandle address from Multiboot2 information. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64_IH,MB2_tag_type(%rcx)
+        jne     2f
+
+        mov     MB2_efi64_ih(%rcx),%rdi
+        jmp     3f
+
+2:
+        /* Is it the end of Multiboot2 information? */
+        cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%rcx)
+        je      run_bs
+
+3:
+        /* Go to next Multiboot2 information tag. */
+        add     MB2_tag_size(%rcx),%ecx
+        add     $(MULTIBOOT2_TAG_ALIGN-1),%rcx
+        and     $~(MULTIBOOT2_TAG_ALIGN-1),%rcx
+        jmp     0b
+
+run_bs:
+        push    %rax
+        push    %rdi
+
+        /*
+         * Initialize BSS (no nasty surprises!).
+         * It must be done earlier than in BIOS case
+         * because efi_multiboot2() touches it.
+         */
+        lea     __bss_start(%rip),%rdi
+        lea     __bss_end(%rip),%rcx
+        sub     %rdi,%rcx
+        shr     $3,%rcx
+        xor     %eax,%eax
+        rep     stosq
+
+        pop     %rdi
+
+        /*
+         * IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable.
+         * OUT: %rax - Highest available memory address below 1 MiB.
+         *
+         * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
+         * on EFI platforms. Hence, it could not be used like
+         * on legacy BIOS platforms.
+         */
+        call    efi_multiboot2
+
+        /* Convert memory address to bytes/16 and store it in safe place. */
+        shr     $4,%rax
+        mov     %rax,%rcx
+
+        pop     %rax
+
+        /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
+        lea     trampoline_setup(%rip),%rdi
+
+x86_32_switch:
+        cli
+
+        /* Initialise GDT. */
+        lgdt    gdt_boot_descr(%rip)
+
+        /* Reload code selector. */
+        ljmpl   *cs32_switch_addr(%rip)
+
+        .code32
+
+cs32_switch:
+        /* Initialise basic data segments. */
+        mov     $BOOT_DS,%edx
+        mov     %edx,%ds
+        mov     %edx,%es
+        mov     %edx,%ss
+        /* %esp is initialised later. */
+
+        /* Load null descriptor to unused segment registers. */
+        xor     %edx,%edx
+        mov     %edx,%fs
+        mov     %edx,%gs
+
+        /* Disable paging. */
+        mov     %cr0,%edx
+        and     $(~X86_CR0_PG),%edx
+        mov     %edx,%cr0
+
+        /* Jump to earlier loaded address. */
+        jmp     *%edi
+
 __start:
         cld
         cli
@@ -157,7 +300,7 @@  __start:
 
         /* Not available? BDA value will be fine. */
         cmovnz  MB_mem_lower(%ebx),%edx
-        jmp     trampoline_setup
+        jmp     trampoline_bios_setup
 
 multiboot2_proto:
         /* Skip Multiboot2 information fixed part. */
@@ -170,12 +313,19 @@  multiboot2_proto:
         jne     1f
 
         mov     MB2_mem_lower(%ecx),%edx
-        jmp     trampoline_setup
+        jmp     trampoline_bios_setup
 
 1:
+        /* EFI mode is not supported via legacy BIOS path. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
+        je      mb2_too_old
+
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
+        je      mb2_too_old
+
         /* Is it the end of Multiboot2 information? */
         cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%ecx)
-        je      trampoline_setup
+        je      trampoline_bios_setup
 
         /* Go to next Multiboot2 information tag. */
         add     MB2_tag_size(%ecx),%ecx
@@ -183,7 +333,7 @@  multiboot2_proto:
         and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
         jmp     0b
 
-trampoline_setup:
+trampoline_bios_setup:
         /* Set up trampoline segment 64k below EBDA */
         movzwl  0x40e,%ecx          /* EBDA segment */
         cmp     $0xa000,%ecx        /* sanity check (high) */
@@ -199,12 +349,13 @@  trampoline_setup:
          * multiboot structure (if available) and use the smallest.
          */
         cmp     $0x100,%edx         /* is the multiboot value too small? */
-        jb      2f                  /* if so, do not use it */
+        jb      trampoline_setup    /* if so, do not use it */
         shl     $10-4,%edx
         cmp     %ecx,%edx           /* compare with BDA value */
         cmovb   %edx,%ecx           /* and use the smaller */
 
-2:      /* Reserve 64kb for the trampoline */
+trampoline_setup:
+        /* Reserve 64kb for the trampoline. */
         sub     $0x1000,%ecx
 
         /* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
@@ -221,6 +372,13 @@  trampoline_setup:
         add     $12,%esp            /* Remove reloc() args from stack. */
         mov     %eax,sym_phys(multiboot_ptr)
 
+        /*
+         * Do not zero BSS on EFI platform here.
+         * It was initialized earlier.
+         */
+        cmpb    $1,sym_phys(skip_realmode)
+        je      1f
+
         /* Initialize BSS (no nasty surprises!). */
         mov     $sym_phys(__bss_start),%edi
         mov     $sym_phys(__bss_end),%ecx
@@ -229,6 +387,7 @@  trampoline_setup:
         xor     %eax,%eax
         rep     stosl
 
+1:
         /* Interrogate CPU extended features via CPUID. */
         mov     $0x80000000,%eax
         cpuid
diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
index 84afffa..b311b7c 100644
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -228,6 +228,9 @@  static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
 
 static void __init efi_arch_pre_exit_boot(void)
 {
+    if ( !efi_enabled(EFI_LOADER) )
+        return;
+
     if ( !trampoline_phys )
     {
         if ( !cfg.addr )
@@ -665,6 +668,46 @@  static bool_t __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
 
 static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
 
+paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
+{
+    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
+    UINTN cols, gop_mode = ~0, rows;
+
+    set_bit(EFI_PLATFORM, &efi.flags);
+
+    efi_init(ImageHandle, SystemTable);
+
+    efi_console_set_mode();
+
+    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
+                           &cols, &rows) == EFI_SUCCESS )
+        efi_arch_console_init(cols, rows);
+
+    gop = efi_get_gop();
+
+    if ( gop )
+        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
+
+    efi_arch_edd();
+
+    /*
+     * efi_arch_cpu() is not needed here. boot_cpu_data
+     * is set later in xen/arch/x86/boot/head.S.
+     */
+
+    efi_tables();
+    setup_efi_pci();
+    efi_variables();
+
+    if ( gop )
+        efi_set_gop_mode(gop, gop_mode);
+
+    efi_exit_boot(ImageHandle, SystemTable);
+
+    /* Return highest available memory address below 1 MiB. */
+    return cfg.addr;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/efi/stub.c b/xen/arch/x86/efi/stub.c
index c5ae369..d30fe89 100644
--- a/xen/arch/x86/efi/stub.c
+++ b/xen/arch/x86/efi/stub.c
@@ -13,6 +13,11 @@  struct efi __read_mostly efi = {
 	.smbios3 = EFI_INVALID_TABLE_ADDR
 };
 
+void __init efi_multiboot2(void)
+{
+    /* TODO: Fail if entered! */
+}
+
 void __init efi_init_memory(void) { }
 
 void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t l4e) { }
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 4eb8572..21bbe6a 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -715,7 +715,7 @@  void __init noreturn __start_xen(unsigned long mbi_p)
     if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
         panic("dom0 kernel not specified. Check bootloader configuration.");
 
-    if ( efi_enabled(EFI_PLATFORM) )
+    if ( efi_enabled(EFI_LOADER) )
     {
         set_pdx_range(xen_phys_start >> PAGE_SHIFT,
                       (xen_phys_start + BOOTSTRAP_MAP_BASE) >> PAGE_SHIFT);
@@ -728,7 +728,11 @@  void __init noreturn __start_xen(unsigned long mbi_p)
         l3_bootmap[l3_table_offset(BOOTSTRAP_MAP_BASE)] =
             l3e_from_paddr(__pa(l2_bootmap), __PAGE_HYPERVISOR);
 
-        memmap_type = loader;
+        memmap_type = "EFI";
+    }
+    else if ( efi_enabled(EFI_PLATFORM) )
+    {
+        memmap_type = "EFI";
     }
     else if ( e820_raw_nr != 0 )
     {
@@ -826,7 +830,7 @@  void __init noreturn __start_xen(unsigned long mbi_p)
      * we can relocate the dom0 kernel and other multiboot modules. Also, on
      * x86/64, we relocate Xen to higher memory.
      */
-    for ( i = 0; !efi_enabled(EFI_PLATFORM) && i < mbi->mods_count; i++ )
+    for ( i = 0; !efi_enabled(EFI_LOADER) && i < mbi->mods_count; i++ )
     {
         if ( mod[i].mod_start & (PAGE_SIZE - 1) )
             panic("Bootloader didn't honor module alignment request.");
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index b926082..b7aed49 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -173,4 +173,6 @@  void __dummy__(void)
     OFFSET(MB2_tag_type, multiboot2_tag_t, type);
     OFFSET(MB2_tag_size, multiboot2_tag_t, size);
     OFFSET(MB2_mem_lower, multiboot2_tag_basic_meminfo_t, mem_lower);
+    OFFSET(MB2_efi64_st, multiboot2_tag_efi64_t, pointer);
+    OFFSET(MB2_efi64_ih, multiboot2_tag_efi64_ih_t, pointer);
 }
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 6376bfa..fa1da37 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -192,7 +192,7 @@  SECTIONS
   } :text
 
   /* Align BSS to speedup its initialization. */
-  . = ALIGN(4);
+  . = ALIGN(8);
   .bss : {                     /* BSS */
        . = ALIGN(STACK_SIZE);
        __bss_start = .;
@@ -207,7 +207,7 @@  SECTIONS
        *(.bss.percpu.read_mostly)
        . = ALIGN(SMP_CACHE_BYTES);
        __per_cpu_data_end = .;
-       . = ALIGN(4);
+       . = ALIGN(8);
        __bss_end = .;
   } :text
   _end = . ;
diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
index d10c0ab..129512f 100644
--- a/xen/common/efi/boot.c
+++ b/xen/common/efi/boot.c
@@ -79,6 +79,17 @@  static size_t wstrlen(const CHAR16 * s);
 static int set_color(u32 mask, int bpp, u8 *pos, u8 *sz);
 static bool_t match_guid(const EFI_GUID *guid1, const EFI_GUID *guid2);
 
+static void efi_init(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
+static void efi_console_set_mode(void);
+static EFI_GRAPHICS_OUTPUT_PROTOCOL *efi_get_gop(void);
+static UINTN efi_find_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop,
+                               UINTN cols, UINTN rows, UINTN depth);
+static void efi_tables(void);
+static void setup_efi_pci(void);
+static void efi_variables(void);
+static void efi_set_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, UINTN gop_mode);
+static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
+
 static const EFI_BOOT_SERVICES *__initdata efi_bs;
 static UINT32 __initdata efi_bs_revision;
 static EFI_HANDLE __initdata efi_ih;
@@ -936,6 +947,7 @@  efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
 
 #ifndef CONFIG_ARM /* Disabled until runtime services implemented. */
     set_bit(EFI_PLATFORM, &efi.flags);
+    set_bit(EFI_LOADER, &efi.flags);
 #endif
 
     efi_init(ImageHandle, SystemTable);
diff --git a/xen/include/xen/efi.h b/xen/include/xen/efi.h
index 659c7c4..7b0a7ca 100644
--- a/xen/include/xen/efi.h
+++ b/xen/include/xen/efi.h
@@ -9,6 +9,7 @@ 
 #define EFI_INVALID_TABLE_ADDR (~0UL)
 
 #define EFI_PLATFORM	0
+#define EFI_LOADER	1
 
 /* Add fields here only if they need to be referenced from non-EFI code. */
 struct efi {