[v11,07/13] x86: add multiboot2 protocol support for EFI platforms
diff mbox

Message ID 1480976718-12198-8-git-send-email-daniel.kiper@oracle.com
State New, archived
Headers show

Commit Message

Daniel Kiper Dec. 5, 2016, 10:25 p.m. UTC
This way Xen can be loaded on EFI platforms using GRUB2 and
other boot loaders which support multiboot2 protocol.

Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
---
v10 - suggestions/fixes:
    - replace ljmpl with lretq
      (suggested by Andrew Cooper),
    - introduce efi_platform to increase code readability
      (suggested by Andrew Cooper).

v9 - suggestions/fixes:
   - use .L labels instead of numeric ones in multiboot2 data scanning loops
     (suggested by Jan Beulich).

v8 - suggestions/fixes:
   - use __bss_start(%rip)/__bss_end(%rip) instead of
     of .startof.(.bss)(%rip)/$.sizeof.(.bss) because
     latter is not tested extensively in different
     built environments yet
     (suggested by Andrew Cooper),
   - fix multiboot2 data scanning loop in x86_32 code
     (suggested by Jan Beulich),
   - add check for extra mem for mbi data if Xen is loaded
     via multiboot2 protocol on EFI platform
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich).

v7 - suggestions/fixes:
   - do not allocate twice memory for trampoline if we were
     loaded via multiboot2 protocol on EFI platform,
   - wrap long line
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich).

v6 - suggestions/fixes:
   - improve label names in assembly
     error printing code
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich),
   - various minor cleanups and fixes
     (suggested by Jan Beulich).

v4 - suggestions/fixes:
   - remove redundant BSS alignment,
   - update BSS alignment check,
   - use __set_bit() instead of set_bit() if possible
     (suggested by Jan Beulich),
   - call efi_arch_cpu() from efi_multiboot2()
     even if the same work is done later in
     other place right now
     (suggested by Jan Beulich),
   - xen/arch/x86/efi/stub.c:efi_multiboot2()
     fail properly on EFI platforms,
   - do not read data beyond the end of multiboot2
     information in xen/arch/x86/boot/head.S
     (suggested by Jan Beulich),
   - use 32-bit registers in x86_64 code if possible
     (suggested by Jan Beulich),
   - multiboot2 information address is 64-bit
     in x86_64 code, so, treat it is as is
     (suggested by Jan Beulich),
   - use cmovcc if possible,
   - leave only one space between rep and stosq
     (suggested by Jan Beulich),
   - improve error handling,
   - improve early error messages,
     (suggested by Jan Beulich),
   - improve early error messages printing code,
   - improve label names
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich),
   - various minor cleanups.

v3 - suggestions/fixes:
   - take into account alignment when skipping multiboot2 fixed part
     (suggested by Konrad Rzeszutek Wilk),
   - improve segment registers initialization
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich and Konrad Rzeszutek Wilk),
   - improve commit message
     (suggested by Jan Beulich).

v2 - suggestions/fixes:
   - generate multiboot2 header using macros
     (suggested by Jan Beulich),
   - switch CPU to x86_32 mode before
     jumping to 32-bit code
     (suggested by Andrew Cooper),
   - reduce code changes to increase patch readability
     (suggested by Jan Beulich),
   - improve comments
     (suggested by Jan Beulich),
   - ignore MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag on EFI platform
     and find on my own multiboot2.mem_lower value,
   - stop execution if EFI platform is detected
     in legacy BIOS path.
---
 xen/arch/x86/boot/head.S          |  263 ++++++++++++++++++++++++++++++++++---
 xen/arch/x86/efi/efi-boot.h       |   54 +++++++-
 xen/arch/x86/efi/stub.c           |   38 ++++++
 xen/arch/x86/x86_64/asm-offsets.c |    2 +
 xen/arch/x86/xen.lds.S            |    4 +-
 xen/common/efi/boot.c             |   11 ++
 6 files changed, 349 insertions(+), 23 deletions(-)

Comments

Andrew Cooper Dec. 16, 2016, 1:38 p.m. UTC | #1
On 05/12/16 22:25, Daniel Kiper wrote:
> diff --git a/xen/arch/x86/efi/stub.c b/xen/arch/x86/efi/stub.c
> index 4158124..6ea6aa1 100644
> --- a/xen/arch/x86/efi/stub.c
> +++ b/xen/arch/x86/efi/stub.c
> @@ -3,6 +3,44 @@
>  #include <xen/init.h>
>  #include <xen/lib.h>
>  #include <asm/page.h>
> +#include <asm/efibind.h>
> +#include <efi/efidef.h>
> +#include <efi/eficapsule.h>
> +#include <efi/eficon.h>
> +#include <efi/efidevp.h>
> +#include <efi/efiapi.h>
> +
> +/*
> + * Here we are in EFI stub. EFI calls are not supported due to lack
> + * of relevant functionality in compiler and/or linker.
> + *
> + * efi_multiboot2() is an exception. Please look below for more details.
> + */
> +
> +paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
> +                                       EFI_SYSTEM_TABLE *SystemTable)
> +{
> +    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
> +    SIMPLE_TEXT_OUTPUT_INTERFACE *StdErr;
> +
> +    StdErr = SystemTable->StdErr ? SystemTable->StdErr : SystemTable->ConOut;
> +
> +    /*
> +     * Print error message and halt the system.
> +     *
> +     * We have to open code MS x64 calling convention
> +     * in assembly because here this convention may
> +     * not be directly supported by C compiler.
> +     */
> +    asm volatile(
> +    "    call %2                      \n"

This should be `call *%2`, because otherwise gcc complains with:

stub.c: Assembler messages:
stub.c:35: Warning: indirect call without `*'

Interestingly, it isn't fatal to the build which is why I didn't spot it
earlier.  I also can't work out why it isn't fatal to the build, because
I'd have thought the -Werror should have been enough.

~Andrew

> +    "0:  hlt                          \n"
> +    "    jmp  0b                      \n"
> +       : "+c" (StdErr), "+d" (err) : "g" (StdErr->OutputString)
> +       : "rax", "r8", "r9", "r10", "r11", "memory");
> +
> +    unreachable();
> +}
>  
>  bool efi_enabled(unsigned int feature)
>  {
Jan Beulich Dec. 16, 2016, 1:50 p.m. UTC | #2
>>> On 16.12.16 at 14:38, <andrew.cooper3@citrix.com> wrote:
> On 05/12/16 22:25, Daniel Kiper wrote:
>> diff --git a/xen/arch/x86/efi/stub.c b/xen/arch/x86/efi/stub.c
>> index 4158124..6ea6aa1 100644
>> --- a/xen/arch/x86/efi/stub.c
>> +++ b/xen/arch/x86/efi/stub.c
>> @@ -3,6 +3,44 @@
>>  #include <xen/init.h>
>>  #include <xen/lib.h>
>>  #include <asm/page.h>
>> +#include <asm/efibind.h>
>> +#include <efi/efidef.h>
>> +#include <efi/eficapsule.h>
>> +#include <efi/eficon.h>
>> +#include <efi/efidevp.h>
>> +#include <efi/efiapi.h>
>> +
>> +/*
>> + * Here we are in EFI stub. EFI calls are not supported due to lack
>> + * of relevant functionality in compiler and/or linker.
>> + *
>> + * efi_multiboot2() is an exception. Please look below for more details.
>> + */
>> +
>> +paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
>> +                                       EFI_SYSTEM_TABLE *SystemTable)
>> +{
>> +    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
>> +    SIMPLE_TEXT_OUTPUT_INTERFACE *StdErr;
>> +
>> +    StdErr = SystemTable->StdErr ? SystemTable->StdErr : SystemTable->ConOut;
>> +
>> +    /*
>> +     * Print error message and halt the system.
>> +     *
>> +     * We have to open code MS x64 calling convention
>> +     * in assembly because here this convention may
>> +     * not be directly supported by C compiler.
>> +     */
>> +    asm volatile(
>> +    "    call %2                      \n"
> 
> This should be `call *%2`, because otherwise gcc complains with:
> 
> stub.c: Assembler messages:
> stub.c:35: Warning: indirect call without `*'
> 
> Interestingly, it isn't fatal to the build which is why I didn't spot it
> earlier.  I also can't work out why it isn't fatal to the build, because
> I'd have thought the -Werror should have been enough.

-Werror only affects compiler warnings. The compiler doesn't try
to interpret assembler diagnostics.

Jan
Doug Goldstein Jan. 10, 2017, 1:37 a.m. UTC | #3
On 12/5/16 4:25 PM, Daniel Kiper wrote:
> This way Xen can be loaded on EFI platforms using GRUB2 and
> other boot loaders which support multiboot2 protocol.
> 
> Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
> ---
> v10 - suggestions/fixes:
>     - replace ljmpl with lretq
>       (suggested by Andrew Cooper),
>     - introduce efi_platform to increase code readability
>       (suggested by Andrew Cooper).
> 
> v9 - suggestions/fixes:
>    - use .L labels instead of numeric ones in multiboot2 data scanning loops
>      (suggested by Jan Beulich).
> 
> v8 - suggestions/fixes:
>    - use __bss_start(%rip)/__bss_end(%rip) instead of
>      of .startof.(.bss)(%rip)/$.sizeof.(.bss) because
>      latter is not tested extensively in different
>      built environments yet
>      (suggested by Andrew Cooper),
>    - fix multiboot2 data scanning loop in x86_32 code
>      (suggested by Jan Beulich),
>    - add check for extra mem for mbi data if Xen is loaded
>      via multiboot2 protocol on EFI platform
>      (suggested by Jan Beulich),
>    - improve comments
>      (suggested by Jan Beulich).
> 
> v7 - suggestions/fixes:
>    - do not allocate twice memory for trampoline if we were
>      loaded via multiboot2 protocol on EFI platform,
>    - wrap long line
>      (suggested by Jan Beulich),
>    - improve comments
>      (suggested by Jan Beulich).
> 
> v6 - suggestions/fixes:
>    - improve label names in assembly
>      error printing code
>      (suggested by Jan Beulich),
>    - improve comments
>      (suggested by Jan Beulich),
>    - various minor cleanups and fixes
>      (suggested by Jan Beulich).
> 
> v4 - suggestions/fixes:
>    - remove redundant BSS alignment,
>    - update BSS alignment check,
>    - use __set_bit() instead of set_bit() if possible
>      (suggested by Jan Beulich),
>    - call efi_arch_cpu() from efi_multiboot2()
>      even if the same work is done later in
>      other place right now
>      (suggested by Jan Beulich),
>    - xen/arch/x86/efi/stub.c:efi_multiboot2()
>      fail properly on EFI platforms,
>    - do not read data beyond the end of multiboot2
>      information in xen/arch/x86/boot/head.S
>      (suggested by Jan Beulich),
>    - use 32-bit registers in x86_64 code if possible
>      (suggested by Jan Beulich),
>    - multiboot2 information address is 64-bit
>      in x86_64 code, so, treat it is as is
>      (suggested by Jan Beulich),
>    - use cmovcc if possible,
>    - leave only one space between rep and stosq
>      (suggested by Jan Beulich),
>    - improve error handling,
>    - improve early error messages,
>      (suggested by Jan Beulich),
>    - improve early error messages printing code,
>    - improve label names
>      (suggested by Jan Beulich),
>    - improve comments
>      (suggested by Jan Beulich),
>    - various minor cleanups.
> 
> v3 - suggestions/fixes:
>    - take into account alignment when skipping multiboot2 fixed part
>      (suggested by Konrad Rzeszutek Wilk),
>    - improve segment registers initialization
>      (suggested by Jan Beulich),
>    - improve comments
>      (suggested by Jan Beulich and Konrad Rzeszutek Wilk),
>    - improve commit message
>      (suggested by Jan Beulich).
> 
> v2 - suggestions/fixes:
>    - generate multiboot2 header using macros
>      (suggested by Jan Beulich),
>    - switch CPU to x86_32 mode before
>      jumping to 32-bit code
>      (suggested by Andrew Cooper),
>    - reduce code changes to increase patch readability
>      (suggested by Jan Beulich),
>    - improve comments
>      (suggested by Jan Beulich),
>    - ignore MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag on EFI platform
>      and find on my own multiboot2.mem_lower value,
>    - stop execution if EFI platform is detected
>      in legacy BIOS path.
> ---
>  xen/arch/x86/boot/head.S          |  263 ++++++++++++++++++++++++++++++++++---
>  xen/arch/x86/efi/efi-boot.h       |   54 +++++++-
>  xen/arch/x86/efi/stub.c           |   38 ++++++
>  xen/arch/x86/x86_64/asm-offsets.c |    2 +
>  xen/arch/x86/xen.lds.S            |    4 +-
>  xen/common/efi/boot.c             |   11 ++
>  6 files changed, 349 insertions(+), 23 deletions(-)
> 
> diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
> index d423fd8..ac93df0 100644
> --- a/xen/arch/x86/boot/head.S
> +++ b/xen/arch/x86/boot/head.S
> @@ -89,6 +89,13 @@ multiboot2_header_start:
>                     0, /* Number of the lines - no preference. */ \
>                     0  /* Number of bits per pixel - no preference. */
>  
> +        /* Inhibit bootloader from calling ExitBootServices(). */
> +        mb2ht_init MB2_HT(EFI_BS), MB2_HT(OPTIONAL)
> +
> +        /* EFI64 entry point. */
> +        mb2ht_init MB2_HT(ENTRY_ADDRESS_EFI64), MB2_HT(OPTIONAL), \
> +                   sym_phys(__efi64_start)
> +
>          /* Multiboot2 header end tag. */
>          mb2ht_init MB2_HT(END), MB2_HT(REQUIRED)
>  .Lmultiboot2_header_end:
> @@ -100,20 +107,48 @@ multiboot2_header_start:
>  gdt_boot_descr:
>          .word   6*8-1
>          .long   sym_phys(trampoline_gdt)
> +        .long   0 /* Needed for 64-bit lgdt */
> +
> +        .align 4
> +vga_text_buffer:
> +        .long   0xb8000
> +
> +efi_platform:
> +        .byte   0
>  
>  .Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
>  .Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
> +.Lbad_ldr_nbs: .asciz "ERR: Bootloader shutdown EFI x64 boot services!"
> +.Lbad_ldr_nst: .asciz "ERR: EFI SystemTable is not provided by bootloader!"
> +.Lbad_ldr_nih: .asciz "ERR: EFI ImageHandle is not provided by bootloader!"
> +.Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
>  
>          .section .init.text, "ax", @progbits
>  
>  bad_cpu:
>          mov     $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
> -        jmp     print_err
> +        jmp     .Lget_vtb
>  not_multiboot:
>          mov     $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
> -print_err:
> -        mov     $0xB8000,%edi  # VGA framebuffer
> -1:      mov     (%esi),%bl
> +        jmp     .Lget_vtb
> +.Lmb2_no_st:
> +        mov     $(sym_phys(.Lbad_ldr_nst)),%esi # Error message
> +        jmp     .Lget_vtb
> +.Lmb2_no_ih:
> +        mov     $(sym_phys(.Lbad_ldr_nih)),%esi # Error message
> +        jmp     .Lget_vtb
> +.Lmb2_no_bs:
> +        mov     $(sym_phys(.Lbad_ldr_nbs)),%esi # Error message
> +        xor     %edi,%edi                       # No VGA text buffer
> +        jmp     .Lsend_chr
> +.Lmb2_efi_ia_32:
> +        mov     $(sym_phys(.Lbad_efi_msg)),%esi # Error message
> +        xor     %edi,%edi                       # No VGA text buffer
> +        jmp     .Lsend_chr
> +.Lget_vtb:
> +        mov     sym_phys(vga_text_buffer),%edi
> +.Lsend_chr:
> +        mov     (%esi),%bl
>          test    %bl,%bl        # Terminate on '\0' sentinel
>          je      .Lhalt
>          mov     $0x3f8+5,%dx   # UART Line Status Register
> @@ -123,13 +158,186 @@ print_err:
>          mov     $0x3f8+0,%dx   # UART Transmit Holding Register
>          mov     %bl,%al
>          out     %al,%dx        # Send a character over the serial line
> -        movsb                  # Write a character to the VGA framebuffer
> +        test    %edi,%edi      # Is the VGA text buffer available?
> +        jz      .Lsend_chr
> +        movsb                  # Write a character to the VGA text buffer
>          mov     $7,%al
> -        stosb                  # Write an attribute to the VGA framebuffer
> -        jmp     1b
> +        stosb                  # Write an attribute to the VGA text buffer
> +        jmp     .Lsend_chr
>  .Lhalt: hlt
>          jmp     .Lhalt
>  
> +        .code64
> +
> +__efi64_start:
> +        cld
> +
> +        /* VGA is not available on EFI platforms. */
> +        movl   $0,vga_text_buffer(%rip)
> +
> +        /* Check for Multiboot2 bootloader. */
> +        cmp     $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
> +        je      .Lefi_multiboot2_proto
> +
> +        /* Jump to not_multiboot after switching CPU to x86_32 mode. */
> +        lea     not_multiboot(%rip),%edi
> +        jmp     x86_32_switch
> +
> +.Lefi_multiboot2_proto:
> +        /* Zero EFI SystemTable and EFI ImageHandle addresses. */
> +        xor     %esi,%esi
> +        xor     %edi,%edi
> +
> +        /* Skip Multiboot2 information fixed part. */
> +        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%ecx
> +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
> +
> +.Lefi_mb2_tsize:
> +        /* Check Multiboot2 information total size. */
> +        mov     %ecx,%r8d
> +        sub     %ebx,%r8d
> +        cmp     %r8d,MB2_fixed_total_size(%rbx)
> +        jbe     run_bs
> +
> +        /* Are EFI boot services available? */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI_BS,MB2_tag_type(%rcx)
> +        jne     .Lefi_mb2_st
> +
> +        /* We are on EFI platform and EFI boot services are available. */
> +        incb    efi_platform(%rip)
> +
> +        /*
> +         * Disable real mode and other legacy stuff which should not
> +         * be run on EFI platforms.
> +         */
> +        incb    skip_realmode(%rip)
> +        jmp     .Lefi_mb2_next_tag
> +
> +.Lefi_mb2_st:
> +        /* Get EFI SystemTable address from Multiboot2 information. */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%rcx)
> +        cmove   MB2_efi64_st(%rcx),%rsi
> +        je      .Lefi_mb2_next_tag
> +
> +        /* Get EFI ImageHandle address from Multiboot2 information. */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64_IH,MB2_tag_type(%rcx)
> +        cmove   MB2_efi64_ih(%rcx),%rdi
> +        je      .Lefi_mb2_next_tag
> +
> +        /* Is it the end of Multiboot2 information? */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%rcx)
> +        je      run_bs
> +
> +.Lefi_mb2_next_tag:
> +        /* Go to next Multiboot2 information tag. */
> +        add     MB2_tag_size(%rcx),%ecx
> +        add     $(MULTIBOOT2_TAG_ALIGN-1),%ecx
> +        and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
> +        jmp     .Lefi_mb2_tsize
> +
> +run_bs:
> +        /* Are EFI boot services available? */
> +        cmpb    $0,efi_platform(%rip)
> +        jnz     0f
> +
> +        /* Jump to .Lmb2_no_bs after switching CPU to x86_32 mode. */
> +        lea     .Lmb2_no_bs(%rip),%edi
> +        jmp     x86_32_switch
> +
> +0:
> +        /* Is EFI SystemTable address provided by boot loader? */
> +        test    %rsi,%rsi
> +        jnz     1f
> +
> +        /* Jump to .Lmb2_no_st after switching CPU to x86_32 mode. */
> +        lea     .Lmb2_no_st(%rip),%edi
> +        jmp     x86_32_switch
> +
> +1:
> +        /* Is EFI ImageHandle address provided by boot loader? */
> +        test    %rdi,%rdi
> +        jnz     2f
> +
> +        /* Jump to .Lmb2_no_ih after switching CPU to x86_32 mode. */
> +        lea     .Lmb2_no_ih(%rip),%edi
> +        jmp     x86_32_switch
> +
> +2:
> +        push    %rax
> +        push    %rdi
> +
> +        /*
> +         * Initialize BSS (no nasty surprises!).
> +         * It must be done earlier than in BIOS case
> +         * because efi_multiboot2() touches it.
> +         */
> +        lea     __bss_start(%rip),%edi
> +        lea     __bss_end(%rip),%ecx
> +        sub     %edi,%ecx
> +        shr     $3,%ecx
> +        xor     %eax,%eax
> +        rep stosq
> +
> +        pop     %rdi
> +
> +        /*
> +         * efi_multiboot2() is called according to System V AMD64 ABI:
> +         *   - IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
> +         *   - OUT: %rax - highest usable memory address below 1 MiB;
> +         *                 memory above this address is reserved for trampoline;
> +         *                 memory below this address is used as a storage for
> +         *                 mbi struct created in reloc().
> +         *
> +         * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
> +         * on EFI platforms. Hence, it could not be used like
> +         * on legacy BIOS platforms.
> +         */
> +        call    efi_multiboot2
> +
> +        /* Convert memory address to bytes/16 and store it in safe place. */
> +        shr     $4,%eax
> +        mov     %eax,%ecx
> +
> +        pop     %rax
> +
> +        /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
> +        lea     trampoline_setup(%rip),%edi
> +
> +x86_32_switch:
> +        cli
> +
> +        /* Initialize GDTR. */
> +        lgdt    gdt_boot_descr(%rip)
> +
> +        /* Reload code selector. */
> +        pushq   $BOOT_CS32
> +        lea     cs32_switch(%rip),%edx
> +        push    %rdx
> +        lretq
> +
> +        .code32
> +
> +cs32_switch:
> +        /* Initialize basic data segments. */
> +        mov     $BOOT_DS,%edx
> +        mov     %edx,%ds
> +        mov     %edx,%es
> +        mov     %edx,%ss
> +        /* %esp is initialized later. */
> +
> +        /* Load null descriptor to unused segment registers. */
> +        xor     %edx,%edx
> +        mov     %edx,%fs
> +        mov     %edx,%gs
> +
> +        /* Disable paging. */
> +        mov     %cr0,%edx
> +        and     $(~X86_CR0_PG),%edx
> +        mov     %edx,%cr0
> +
> +        /* Jump to earlier loaded address. */
> +        jmp     *%edi
> +
>  __start:
>          cld
>          cli
> @@ -157,7 +365,7 @@ __start:
>  
>          /* Not available? BDA value will be fine. */
>          cmovnz  MB_mem_lower(%ebx),%edx
> -        jmp     trampoline_setup
> +        jmp     trampoline_bios_setup
>  
>  .Lmultiboot2_proto:
>          /* Skip Multiboot2 information fixed part. */
> @@ -169,24 +377,33 @@ __start:
>          mov     %ecx,%edi
>          sub     %ebx,%edi
>          cmp     %edi,MB2_fixed_total_size(%ebx)
> -        jbe     trampoline_setup
> +        jbe     trampoline_bios_setup
>  
>          /* Get mem_lower from Multiboot2 information. */
>          cmpl    $MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO,MB2_tag_type(%ecx)
>          cmove   MB2_mem_lower(%ecx),%edx
> -        je      trampoline_setup
> +        je      .Lmb2_next_tag
> +
> +        /* EFI IA-32 platforms are not supported. */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
> +        je      .Lmb2_efi_ia_32
> +
> +        /* Bootloader shutdown EFI x64 boot services. */
> +        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
> +        je      .Lmb2_no_bs
>  
>          /* Is it the end of Multiboot2 information? */
>          cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%ecx)
> -        je      trampoline_setup
> +        je      trampoline_bios_setup
>  
> +.Lmb2_next_tag:
>          /* Go to next Multiboot2 information tag. */
>          add     MB2_tag_size(%ecx),%ecx
>          add     $(MULTIBOOT2_TAG_ALIGN-1),%ecx
>          and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
>          jmp     .Lmb2_tsize
>  
> -trampoline_setup:
> +trampoline_bios_setup:
>          /* Set up trampoline segment 64k below EBDA */
>          movzwl  0x40e,%ecx          /* EBDA segment */
>          cmp     $0xa000,%ecx        /* sanity check (high) */
> @@ -202,16 +419,19 @@ trampoline_setup:
>           * multiboot structure (if available) and use the smallest.
>           */
>          cmp     $0x100,%edx         /* is the multiboot value too small? */
> -        jb      2f                  /* if so, do not use it */
> +        jb      trampoline_setup    /* if so, do not use it */
>          shl     $10-4,%edx
>          cmp     %ecx,%edx           /* compare with BDA value */
>          cmovb   %edx,%ecx           /* and use the smaller */
>  
> -2:      /* Reserve 64kb for the trampoline */
> +        /* Reserve 64kb for the trampoline. */
>          sub     $0x1000,%ecx
>  
>          /* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
>          xor     %cl, %cl
> +
> +trampoline_setup:
> +        /* Save trampoline address for later use. */
>          shl     $4, %ecx
>          mov     %ecx,sym_phys(trampoline_phys)
>  
> @@ -223,7 +443,14 @@ trampoline_setup:
>          call    reloc
>          mov     %eax,sym_phys(multiboot_ptr)
>  
> -        /* Initialize BSS (no nasty surprises!) */
> +        /*
> +         * Do not zero BSS on EFI platform here.
> +         * It was initialized earlier.
> +         */
> +        cmpb    $0,sym_phys(efi_platform)
> +        jnz     1f
> +
> +        /* Initialize BSS (no nasty surprises!). */
>          mov     $sym_phys(__bss_start),%edi
>          mov     $sym_phys(__bss_end),%ecx
>          sub     %edi,%ecx
> @@ -231,6 +458,7 @@ trampoline_setup:
>          shr     $2,%ecx
>          rep stosl
>  
> +1:
>          /* Interrogate CPU extended features via CPUID. */
>          mov     $0x80000000,%eax
>          cpuid
> @@ -282,8 +510,13 @@ trampoline_setup:
>          cmp     $sym_phys(__trampoline_seg_stop),%edi
>          jb      1b
>  
> +        /* Do not parse command line on EFI platform here. */
> +        cmpb    $0,sym_phys(efi_platform)
> +        jnz     1f
> +
>          call    cmdline_parse_early
>  
> +1:
>          /* Switch to low-memory stack.  */
>          mov     sym_phys(trampoline_phys),%edi
>          lea     0x10000(%edi),%esp
> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
> index 62c010e..dc857d8 100644
> --- a/xen/arch/x86/efi/efi-boot.h
> +++ b/xen/arch/x86/efi/efi-boot.h
> @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>  {
>      struct e820entry *e;
>      unsigned int i;
> +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
> +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);

Just wondering where the constant came from? And if there should be a
little bit of information about it. To me its just weird to shift 64.

>  
>      /* Populate E820 table and check trampoline area availability. */
>      e = e820map - 1;
> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>              /* fall through */
>          case EfiConventionalMemory:
>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
> +                 len >= cfg.size + extra_mem &&
> +                 desc->PhysicalStart + len > cfg.addr )
>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;

So this is where the current series blows up and fails on real hardware.
No where in the EFI + MB2 code path is cfg.size ever initialized. Its
only initialized in the straight EFI case. The result is that cfg.addr
is set to the section immediately following this. Took a bit to
trackdown because I checked for memory overlaps with where Xen was
loaded and where it was relocated to but forgot to check for overlaps
with the trampoline code. This is the address where the trampoline jumps
are copied.

Personally I'd like to see an ASSERT added or the code swizzled around
in such a way that its not possible to get into a bad state. But this is
probably another patch series.

>              /* fall through */
>          case EfiLoaderCode:
> @@ -210,12 +213,14 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
>  
>  static void __init efi_arch_pre_exit_boot(void)
>  {
> -    if ( !trampoline_phys )
> -    {
> -        if ( !cfg.addr )
> -            blexit(L"No memory for trampoline");
> +    if ( trampoline_phys )
> +        return;
> +
> +    if ( !cfg.addr )
> +        blexit(L"No memory for trampoline");
> +
> +    if ( efi_enabled(EFI_LOADER) )
>          relocate_trampoline(cfg.addr);
> -    }
>  }
>  
>  static void __init noreturn efi_arch_post_exit_boot(void)
> @@ -653,6 +658,43 @@ static bool_t __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
>  
>  static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
>  
> +paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
> +{
> +    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
> +    UINTN cols, gop_mode = ~0, rows;
> +
> +    __set_bit(EFI_BOOT, &efi_flags);
> +    __set_bit(EFI_RS, &efi_flags);
> +
> +    efi_init(ImageHandle, SystemTable);
> +
> +    efi_console_set_mode();
> +
> +    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
> +                           &cols, &rows) == EFI_SUCCESS )
> +        efi_arch_console_init(cols, rows);
> +
> +    gop = efi_get_gop();
> +
> +    if ( gop )
> +        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
> +
> +    efi_arch_edd();
> +    efi_arch_cpu();
> +
> +    efi_tables();
> +    setup_efi_pci();
> +    efi_variables();

This is probably where you missed the call to "efi_arch_memory_setup();"
that gave me hell above.


> +
> +    if ( gop )
> +        efi_set_gop_mode(gop, gop_mode);
> +
> +    efi_exit_boot(ImageHandle, SystemTable);
> +
> +    /* Return highest usable memory address below 1 MiB. */
> +    return cfg.addr;
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/efi/stub.c b/xen/arch/x86/efi/stub.c
> index 4158124..6ea6aa1 100644
> --- a/xen/arch/x86/efi/stub.c
> +++ b/xen/arch/x86/efi/stub.c
> @@ -3,6 +3,44 @@
>  #include <xen/init.h>
>  #include <xen/lib.h>
>  #include <asm/page.h>
> +#include <asm/efibind.h>
> +#include <efi/efidef.h>
> +#include <efi/eficapsule.h>
> +#include <efi/eficon.h>
> +#include <efi/efidevp.h>
> +#include <efi/efiapi.h>
> +
> +/*
> + * Here we are in EFI stub. EFI calls are not supported due to lack
> + * of relevant functionality in compiler and/or linker.
> + *
> + * efi_multiboot2() is an exception. Please look below for more details.
> + */
> +
> +paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
> +                                       EFI_SYSTEM_TABLE *SystemTable)
> +{
> +    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
> +    SIMPLE_TEXT_OUTPUT_INTERFACE *StdErr;
> +
> +    StdErr = SystemTable->StdErr ? SystemTable->StdErr : SystemTable->ConOut;
> +
> +    /*
> +     * Print error message and halt the system.
> +     *
> +     * We have to open code MS x64 calling convention
> +     * in assembly because here this convention may
> +     * not be directly supported by C compiler.
> +     */
> +    asm volatile(
> +    "    call %2                      \n"
> +    "0:  hlt                          \n"
> +    "    jmp  0b                      \n"
> +       : "+c" (StdErr), "+d" (err) : "g" (StdErr->OutputString)
> +       : "rax", "r8", "r9", "r10", "r11", "memory");
> +
> +    unreachable();
> +}
>  
>  bool efi_enabled(unsigned int feature)
>  {
> diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
> index b437a8f..2a22659 100644
> --- a/xen/arch/x86/x86_64/asm-offsets.c
> +++ b/xen/arch/x86/x86_64/asm-offsets.c
> @@ -175,6 +175,8 @@ void __dummy__(void)
>      OFFSET(MB2_tag_type, multiboot2_tag_t, type);
>      OFFSET(MB2_tag_size, multiboot2_tag_t, size);
>      OFFSET(MB2_mem_lower, multiboot2_tag_basic_meminfo_t, mem_lower);
> +    OFFSET(MB2_efi64_st, multiboot2_tag_efi64_t, pointer);
> +    OFFSET(MB2_efi64_ih, multiboot2_tag_efi64_ih_t, pointer);
>      BLANK();
>  
>      OFFSET(DOMAIN_vm_assist, struct domain, vm_assist);
> diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
> index b0b1c9b..e0e2529 100644
> --- a/xen/arch/x86/xen.lds.S
> +++ b/xen/arch/x86/xen.lds.S
> @@ -331,5 +331,5 @@ ASSERT(IS_ALIGNED(__init_end,   PAGE_SIZE), "__init_end misaligned")
>  
>  ASSERT(IS_ALIGNED(trampoline_start, 4), "trampoline_start misaligned")
>  ASSERT(IS_ALIGNED(trampoline_end,   4), "trampoline_end misaligned")
> -ASSERT(IS_ALIGNED(__bss_start,      4), "__bss_start misaligned")
> -ASSERT(IS_ALIGNED(__bss_end,        4), "__bss_end misaligned")
> +ASSERT(IS_ALIGNED(__bss_start,      8), "__bss_start misaligned")
> +ASSERT(IS_ALIGNED(__bss_end,        8), "__bss_end misaligned")
> diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
> index 0a93e61..70ed836 100644
> --- a/xen/common/efi/boot.c
> +++ b/xen/common/efi/boot.c
> @@ -79,6 +79,17 @@ static size_t wstrlen(const CHAR16 * s);
>  static int set_color(u32 mask, int bpp, u8 *pos, u8 *sz);
>  static bool_t match_guid(const EFI_GUID *guid1, const EFI_GUID *guid2);
>  
> +static void efi_init(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
> +static void efi_console_set_mode(void);
> +static EFI_GRAPHICS_OUTPUT_PROTOCOL *efi_get_gop(void);
> +static UINTN efi_find_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop,
> +                               UINTN cols, UINTN rows, UINTN depth);
> +static void efi_tables(void);
> +static void setup_efi_pci(void);
> +static void efi_variables(void);
> +static void efi_set_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, UINTN gop_mode);
> +static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
> +
>  static const EFI_BOOT_SERVICES *__initdata efi_bs;
>  static UINT32 __initdata efi_bs_revision;
>  static EFI_HANDLE __initdata efi_ih;
> 

So as an aside, IMHO this is where the series should end and the next
set of patches should be a follow on.
Doug Goldstein Jan. 10, 2017, 8:51 p.m. UTC | #4
On 1/9/17 7:37 PM, Doug Goldstein wrote:
> On 12/5/16 4:25 PM, Daniel Kiper wrote:

>> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
>> index 62c010e..dc857d8 100644
>> --- a/xen/arch/x86/efi/efi-boot.h
>> +++ b/xen/arch/x86/efi/efi-boot.h
>> @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>  {
>>      struct e820entry *e;
>>      unsigned int i;
>> +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
>> +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
> 
> Just wondering where the constant came from? And if there should be a
> little bit of information about it. To me its just weird to shift 64.

Its the size of the stack used in the assembly code.

> 
>>  
>>      /* Populate E820 table and check trampoline area availability. */
>>      e = e820map - 1;
>> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>              /* fall through */
>>          case EfiConventionalMemory:
>>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
>> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
>> +                 len >= cfg.size + extra_mem &&
>> +                 desc->PhysicalStart + len > cfg.addr )
>>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
> 
> So this is where the current series blows up and fails on real hardware.

Honestly this was my misunderstanding and this shouldn't ever be used to
get memory for the trampoline. This also has the bug in it that it needs
to be:

ASSERT(cfg.size > 0);
cfg.addr = (desc->PhysicalStart + len - (cfg.size + extra_mem) & PAGE_MASK;

> No where in the EFI + MB2 code path is cfg.size ever initialized. Its
> only initialized in the straight EFI case. The result is that cfg.addr
> is set to the section immediately following this. Took a bit to
> trackdown because I checked for memory overlaps with where Xen was
> loaded and where it was relocated to but forgot to check for overlaps
> with the trampoline code. This is the address where the trampoline jumps
> are copied.
> 
> Personally I'd like to see an ASSERT added or the code swizzled around
> in such a way that its not possible to get into a bad state. But this is
> probably another patch series.
> 
>>              /* fall through */
>>          case EfiLoaderCode:
>> @@ -210,12 +213,14 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
>>  
>>  static void __init efi_arch_pre_exit_boot(void)
>>  {
>> -    if ( !trampoline_phys )
>> -    {
>> -        if ( !cfg.addr )
>> -            blexit(L"No memory for trampoline");
>> +    if ( trampoline_phys )
>> +        return;
>> +
>> +    if ( !cfg.addr )
>> +        blexit(L"No memory for trampoline");
>> +
>> +    if ( efi_enabled(EFI_LOADER) )
>>          relocate_trampoline(cfg.addr);

Why is this call even here anymore? Its called in
efi_arch_memory_setup() already. If it was unable to allocate memory
under the 1mb region its just going to trample over ANY conventional
memory region that might be in use.


>> -    }
>>  }
>>  
>>  static void __init noreturn efi_arch_post_exit_boot(void)
>> @@ -653,6 +658,43 @@ static bool_t __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
>>  
>>  static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
>>  
>> +paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
>> +{
>> +    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
>> +    UINTN cols, gop_mode = ~0, rows;
>> +
>> +    __set_bit(EFI_BOOT, &efi_flags);
>> +    __set_bit(EFI_RS, &efi_flags);
>> +
>> +    efi_init(ImageHandle, SystemTable);
>> +
>> +    efi_console_set_mode();
>> +
>> +    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
>> +                           &cols, &rows) == EFI_SUCCESS )
>> +        efi_arch_console_init(cols, rows);
>> +
>> +    gop = efi_get_gop();
>> +
>> +    if ( gop )
>> +        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
>> +
>> +    efi_arch_edd();
>> +    efi_arch_cpu();
>> +
>> +    efi_tables();
>> +    setup_efi_pci();
>> +    efi_variables();
> 
> This is probably where you missed the call to "efi_arch_memory_setup();"
> that gave me hell above.

Well it turns out that calling "efi_arch_memory_setup()" isn't correct
because it also messes with the page tables AND also does the trampoline
relocation. Which after this call finishes then it does the trampoline
relocations in assembly. The code currently makes the assumption it can
use any conventional memory range below 1mb (and unfortunately does the
math incorrectly and instead uses the region following the conventional
memory region). You need to use AllocatePages() otherwise you are
trampling memory that might have been allocated by the bootloader or any
multiboot modules (e.g. tboot) prior to Xen.

I'm just going to write a patch to fix the issues that I've found thus
far. I believe at this point there is something wrong with the page
tables because setting cr0 for the non-0 CPU in
trampoline_protmode_entry causes the machine to reboot.

I've also found where we have the possibility to call
relocate_trampoline() twice in the EFI case. Its protected by a check to
trampoline_phys but I'm not sure why we even have the code there to be
able to do so.
Daniel Kiper Jan. 11, 2017, 7:08 p.m. UTC | #5
On Mon, Jan 09, 2017 at 07:37:59PM -0600, Doug Goldstein wrote:
> On 12/5/16 4:25 PM, Daniel Kiper wrote:

[...]

> > diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
> > index 62c010e..dc857d8 100644
> > --- a/xen/arch/x86/efi/efi-boot.h
> > +++ b/xen/arch/x86/efi/efi-boot.h
> > @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
> >  {
> >      struct e820entry *e;
> >      unsigned int i;
> > +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
> > +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
>
> Just wondering where the constant came from? And if there should be a
> little bit of information about it. To me its just weird to shift 64.

64 << 10 == 64 KiB which is the size of trampoline region reserved in
xen/arch/x86/boot/head.S. Though I think that we should reserve real
size needed for trampoline code. IIRC, it was discussed here earlier
but there is no go for it in this patch series.

> >      /* Populate E820 table and check trampoline area availability. */
> >      e = e820map - 1;
> > @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
> >              /* fall through */
> >          case EfiConventionalMemory:
> >              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
> > -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
> > +                 len >= cfg.size + extra_mem &&
> > +                 desc->PhysicalStart + len > cfg.addr )
> >                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
>
> So this is where the current series blows up and fails on real hardware.
> No where in the EFI + MB2 code path is cfg.size ever initialized. Its
> only initialized in the straight EFI case. The result is that cfg.addr
> is set to the section immediately following this. Took a bit to
> trackdown because I checked for memory overlaps with where Xen was
> loaded and where it was relocated to but forgot to check for overlaps
> with the trampoline code. This is the address where the trampoline jumps
> are copied.
>
> Personally I'd like to see an ASSERT added or the code swizzled around
> in such a way that its not possible to get into a bad state. But this is
> probably another patch series.

Nice catch! Thanks a lot. I think that we should initialize cfg.size = 64 << 10
in efi_multiboot2(). It looks like real fix. extra_mem stuff is bogus.

[...]

> > +paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
> > +{
> > +    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
> > +    UINTN cols, gop_mode = ~0, rows;
> > +
> > +    __set_bit(EFI_BOOT, &efi_flags);
> > +    __set_bit(EFI_RS, &efi_flags);
> > +
> > +    efi_init(ImageHandle, SystemTable);
> > +
> > +    efi_console_set_mode();
> > +
> > +    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
> > +                           &cols, &rows) == EFI_SUCCESS )
> > +        efi_arch_console_init(cols, rows);
> > +
> > +    gop = efi_get_gop();
> > +
> > +    if ( gop )
> > +        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
> > +
> > +    efi_arch_edd();
> > +    efi_arch_cpu();
> > +
> > +    efi_tables();
> > +    setup_efi_pci();
> > +    efi_variables();
>
> This is probably where you missed the call to "efi_arch_memory_setup();"
> that gave me hell above.

This does not work in MB2 case.

[...]

> > diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
> > index 0a93e61..70ed836 100644
> > --- a/xen/common/efi/boot.c
> > +++ b/xen/common/efi/boot.c
> > @@ -79,6 +79,17 @@ static size_t wstrlen(const CHAR16 * s);
> >  static int set_color(u32 mask, int bpp, u8 *pos, u8 *sz);
> >  static bool_t match_guid(const EFI_GUID *guid1, const EFI_GUID *guid2);
> >
> > +static void efi_init(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
> > +static void efi_console_set_mode(void);
> > +static EFI_GRAPHICS_OUTPUT_PROTOCOL *efi_get_gop(void);
> > +static UINTN efi_find_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop,
> > +                               UINTN cols, UINTN rows, UINTN depth);
> > +static void efi_tables(void);
> > +static void setup_efi_pci(void);
> > +static void efi_variables(void);
> > +static void efi_set_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, UINTN gop_mode);
> > +static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
> > +
> >  static const EFI_BOOT_SERVICES *__initdata efi_bs;
> >  static UINT32 __initdata efi_bs_revision;
> >  static EFI_HANDLE __initdata efi_ih;
> >
>
> So as an aside, IMHO this is where the series should end and the next
> set of patches should be a follow on.

Hmmm... Why? If you do not apply rest of patches then MB2 does not
work on all EFI platforms.

Daniel
Daniel Kiper Jan. 11, 2017, 7:47 p.m. UTC | #6
On Tue, Jan 10, 2017 at 02:51:27PM -0600, Doug Goldstein wrote:
> On 1/9/17 7:37 PM, Doug Goldstein wrote:
> > On 12/5/16 4:25 PM, Daniel Kiper wrote:
>
> >> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
> >> index 62c010e..dc857d8 100644
> >> --- a/xen/arch/x86/efi/efi-boot.h
> >> +++ b/xen/arch/x86/efi/efi-boot.h
> >> @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
> >>  {
> >>      struct e820entry *e;
> >>      unsigned int i;
> >> +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
> >> +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
> >
> > Just wondering where the constant came from? And if there should be a
> > little bit of information about it. To me its just weird to shift 64.
>
> Its the size of the stack used in the assembly code.

No, it is trampoline region size.

> >>      /* Populate E820 table and check trampoline area availability. */
> >>      e = e820map - 1;
> >> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
> >>              /* fall through */
> >>          case EfiConventionalMemory:
> >>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
> >> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
> >> +                 len >= cfg.size + extra_mem &&
> >> +                 desc->PhysicalStart + len > cfg.addr )
> >>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
> >
> > So this is where the current series blows up and fails on real hardware.
>
> Honestly this was my misunderstanding and this shouldn't ever be used to
> get memory for the trampoline. This also has the bug in it that it needs
> to be:
>
> ASSERT(cfg.size > 0);
> cfg.addr = (desc->PhysicalStart + len - (cfg.size + extra_mem) & PAGE_MASK;

As I said earlier. This extra_mem stuff is (maybe) wrong and should be fixed
in one way or another. Hmmm... It looks OK. I will double check it because
I do not looked at this code long time and maybe I am missing something.

> > No where in the EFI + MB2 code path is cfg.size ever initialized. Its
> > only initialized in the straight EFI case. The result is that cfg.addr
> > is set to the section immediately following this. Took a bit to
> > trackdown because I checked for memory overlaps with where Xen was
> > loaded and where it was relocated to but forgot to check for overlaps
> > with the trampoline code. This is the address where the trampoline jumps
> > are copied.
> >
> > Personally I'd like to see an ASSERT added or the code swizzled around
> > in such a way that its not possible to get into a bad state. But this is
> > probably another patch series.
> >
> >>              /* fall through */
> >>          case EfiLoaderCode:
> >> @@ -210,12 +213,14 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
> >>
> >>  static void __init efi_arch_pre_exit_boot(void)
> >>  {
> >> -    if ( !trampoline_phys )
> >> -    {
> >> -        if ( !cfg.addr )
> >> -            blexit(L"No memory for trampoline");
> >> +    if ( trampoline_phys )
> >> +        return;
> >> +
> >> +    if ( !cfg.addr )
> >> +        blexit(L"No memory for trampoline");
> >> +
> >> +    if ( efi_enabled(EFI_LOADER) )
> >>          relocate_trampoline(cfg.addr);
>
> Why is this call even here anymore? Its called in
> efi_arch_memory_setup() already. If it was unable to allocate memory
> under the 1mb region its just going to trample over ANY conventional
> memory region that might be in use.

Trampoline is relocated in xen/arch/x86/boot/head.S.

> >> -    }
> >>  }
> >>
> >>  static void __init noreturn efi_arch_post_exit_boot(void)
> >> @@ -653,6 +658,43 @@ static bool_t __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
> >>
> >>  static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
> >>
> >> +paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
> >> +{
> >> +    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
> >> +    UINTN cols, gop_mode = ~0, rows;
> >> +
> >> +    __set_bit(EFI_BOOT, &efi_flags);
> >> +    __set_bit(EFI_RS, &efi_flags);
> >> +
> >> +    efi_init(ImageHandle, SystemTable);
> >> +
> >> +    efi_console_set_mode();
> >> +
> >> +    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
> >> +                           &cols, &rows) == EFI_SUCCESS )
> >> +        efi_arch_console_init(cols, rows);
> >> +
> >> +    gop = efi_get_gop();
> >> +
> >> +    if ( gop )
> >> +        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
> >> +
> >> +    efi_arch_edd();
> >> +    efi_arch_cpu();
> >> +
> >> +    efi_tables();
> >> +    setup_efi_pci();
> >> +    efi_variables();
> >
> > This is probably where you missed the call to "efi_arch_memory_setup();"
> > that gave me hell above.
>
> Well it turns out that calling "efi_arch_memory_setup()" isn't correct
> because it also messes with the page tables AND also does the trampoline

Yep.

> relocation. Which after this call finishes then it does the trampoline
> relocations in assembly. The code currently makes the assumption it can

I am not sure what do you mean here.

> use any conventional memory range below 1mb (and unfortunately does the
> math incorrectly and instead uses the region following the conventional

Which code? Which math?

> memory region). You need to use AllocatePages() otherwise you are
> trampling memory that might have been allocated by the bootloader or any

Bootloader code/data should be dead here.

> multiboot modules (e.g. tboot) prior to Xen.

How come?

> I'm just going to write a patch to fix the issues that I've found thus
> far. I believe at this point there is something wrong with the page
> tables because setting cr0 for the non-0 CPU in
> trampoline_protmode_entry causes the machine to reboot.

I have a feeling that problem lays somewhere else. Probably far before
trampoline_protmode_entry.

> I've also found where we have the possibility to call
> relocate_trampoline() twice in the EFI case. Its protected by a check to
> trampoline_phys but I'm not sure why we even have the code there to be
> able to do so.

Consider case when boot services use memory below 1 MiB.

Thank you for testing my patch series.

Daniel
Doug Goldstein Jan. 11, 2017, 7:50 p.m. UTC | #7
On 1/11/17 1:08 PM, Daniel Kiper wrote:
> On Mon, Jan 09, 2017 at 07:37:59PM -0600, Doug Goldstein wrote:
>> On 12/5/16 4:25 PM, Daniel Kiper wrote:
> 
> [...]
> 
>>> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
>>> index 62c010e..dc857d8 100644
>>> --- a/xen/arch/x86/efi/efi-boot.h
>>> +++ b/xen/arch/x86/efi/efi-boot.h
>>> @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>>  {
>>>      struct e820entry *e;
>>>      unsigned int i;
>>> +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
>>> +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
>>
>> Just wondering where the constant came from? And if there should be a
>> little bit of information about it. To me its just weird to shift 64.
> 
> 64 << 10 == 64 KiB which is the size of trampoline region reserved in
> xen/arch/x86/boot/head.S. Though I think that we should reserve real
> size needed for trampoline code. IIRC, it was discussed here earlier
> but there is no go for it in this patch series.

See my comments below but that's not entirely correct. But yes I would
avoid doing those changes in this series.

> 
>>>      /* Populate E820 table and check trampoline area availability. */
>>>      e = e820map - 1;
>>> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>>              /* fall through */
>>>          case EfiConventionalMemory:
>>>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
>>> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
>>> +                 len >= cfg.size + extra_mem &&
>>> +                 desc->PhysicalStart + len > cfg.addr )
>>>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
>>
>> So this is where the current series blows up and fails on real hardware.
>> No where in the EFI + MB2 code path is cfg.size ever initialized. Its
>> only initialized in the straight EFI case. The result is that cfg.addr
>> is set to the section immediately following this. Took a bit to
>> trackdown because I checked for memory overlaps with where Xen was
>> loaded and where it was relocated to but forgot to check for overlaps
>> with the trampoline code. This is the address where the trampoline jumps
>> are copied.
>>
>> Personally I'd like to see an ASSERT added or the code swizzled around
>> in such a way that its not possible to get into a bad state. But this is
>> probably another patch series.
> 
> Nice catch! Thanks a lot. I think that we should initialize cfg.size = 64 << 10
> in efi_multiboot2(). It looks like real fix. extra_mem stuff is bogus.
> 

Except in head.S you start the stack 64kb after the end of the
trampoline size. So cfg.size = (64 << 10); won't work. It needs to be
the size of the trampoline + 64k.


>>> +    efi_tables();
>>> +    setup_efi_pci();
>>> +    efi_variables();
>>
>> This is probably where you missed the call to "efi_arch_memory_setup();"
>> that gave me hell above.
> 
> This does not work in MB2 case.

You're looking at the oldest email. I've have further follow ups that
point that out and I've included a patch to fix the issues.

>>
>> So as an aside, IMHO this is where the series should end and the next
>> set of patches should be a follow on.
> 
> Hmmm... Why? If you do not apply rest of patches then MB2 does not
> work on all EFI platforms.
> 
> Daniel
> 

Q: How do you eat an elephant?
A: One bite at a time.

The point is we have 0 MB2 support presently. We can add it in
incremental hunks. Otherwise we get a patch series that's been floating
around for around 3 years and missed at least 2 releases where it should
have gotten in. We've only got several weeks before the 4.9 window
closes as well.
Doug Goldstein Jan. 11, 2017, 8:20 p.m. UTC | #8
On 1/11/17 1:47 PM, Daniel Kiper wrote:
> On Tue, Jan 10, 2017 at 02:51:27PM -0600, Doug Goldstein wrote:
>> On 1/9/17 7:37 PM, Doug Goldstein wrote:
>>> On 12/5/16 4:25 PM, Daniel Kiper wrote:
>>
>>>> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
>>>> index 62c010e..dc857d8 100644
>>>> --- a/xen/arch/x86/efi/efi-boot.h
>>>> +++ b/xen/arch/x86/efi/efi-boot.h
>>>> @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>>>  {
>>>>      struct e820entry *e;
>>>>      unsigned int i;
>>>> +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
>>>> +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
>>>
>>> Just wondering where the constant came from? And if there should be a
>>> little bit of information about it. To me its just weird to shift 64.
>>
>> Its the size of the stack used in the assembly code.
> 
> No, it is trampoline region size.

trampoline + stack in head.S We take the address where we're going to
copy the trampoline and set the stack to 0x10000 past it.


> 
>>>>      /* Populate E820 table and check trampoline area availability. */
>>>>      e = e820map - 1;
>>>> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>>>              /* fall through */
>>>>          case EfiConventionalMemory:
>>>>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
>>>> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
>>>> +                 len >= cfg.size + extra_mem &&
>>>> +                 desc->PhysicalStart + len > cfg.addr )
>>>>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
>>>
>>> So this is where the current series blows up and fails on real hardware.
>>
>> Honestly this was my misunderstanding and this shouldn't ever be used to
>> get memory for the trampoline. This also has the bug in it that it needs
>> to be:
>>
>> ASSERT(cfg.size > 0);
>> cfg.addr = (desc->PhysicalStart + len - (cfg.size + extra_mem) & PAGE_MASK;
> 
> As I said earlier. This extra_mem stuff is (maybe) wrong and should be fixed
> in one way or another. Hmmm... It looks OK. I will double check it because
> I do not looked at this code long time and maybe I am missing something.

cfg.size needs to be the size of the trampolines + stack.


>>> No where in the EFI + MB2 code path is cfg.size ever initialized. Its
>>> only initialized in the straight EFI case. The result is that cfg.addr
>>> is set to the section immediately following this. Took a bit to
>>> trackdown because I checked for memory overlaps with where Xen was
>>> loaded and where it was relocated to but forgot to check for overlaps
>>> with the trampoline code. This is the address where the trampoline jumps
>>> are copied.
>>>
>>> Personally I'd like to see an ASSERT added or the code swizzled around
>>> in such a way that its not possible to get into a bad state. But this is
>>> probably another patch series.
>>>
>>>>              /* fall through */
>>>>          case EfiLoaderCode:
>>>> @@ -210,12 +213,14 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
>>>>
>>>>  static void __init efi_arch_pre_exit_boot(void)
>>>>  {
>>>> -    if ( !trampoline_phys )
>>>> -    {
>>>> -        if ( !cfg.addr )
>>>> -            blexit(L"No memory for trampoline");
>>>> +    if ( trampoline_phys )
>>>> +        return;
>>>> +
>>>> +    if ( !cfg.addr )
>>>> +        blexit(L"No memory for trampoline");
>>>> +
>>>> +    if ( efi_enabled(EFI_LOADER) )
>>>>          relocate_trampoline(cfg.addr);
>>
>> Why is this call even here anymore? Its called in
>> efi_arch_memory_setup() already. If it was unable to allocate memory
>> under the 1mb region its just going to trample over ANY conventional
>> memory region that might be in use.
> 
> Trampoline is relocated in xen/arch/x86/boot/head.S.

For the MB2/MB case. But for the straight EFI case its called in
efi_arch_memory_setup() and then you've added the wrapper of
efi_enabled(EFI_LOADER) which in theory would have it called again in
the straight EFI case if trampoline_phys isn't set and cfg.addr is set.


> 
>>>> -    }
>>>>  }
>>>>
>>>>  static void __init noreturn efi_arch_post_exit_boot(void)
>>>> @@ -653,6 +658,43 @@ static bool_t __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
>>>>
>>>>  static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
>>>>
>>>> +paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
>>>> +{
>>>> +    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
>>>> +    UINTN cols, gop_mode = ~0, rows;
>>>> +
>>>> +    __set_bit(EFI_BOOT, &efi_flags);
>>>> +    __set_bit(EFI_RS, &efi_flags);
>>>> +
>>>> +    efi_init(ImageHandle, SystemTable);
>>>> +
>>>> +    efi_console_set_mode();
>>>> +
>>>> +    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
>>>> +                           &cols, &rows) == EFI_SUCCESS )
>>>> +        efi_arch_console_init(cols, rows);
>>>> +
>>>> +    gop = efi_get_gop();
>>>> +
>>>> +    if ( gop )
>>>> +        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
>>>> +
>>>> +    efi_arch_edd();
>>>> +    efi_arch_cpu();
>>>> +
>>>> +    efi_tables();
>>>> +    setup_efi_pci();
>>>> +    efi_variables();
>>>
>>> This is probably where you missed the call to "efi_arch_memory_setup();"
>>> that gave me hell above.
>>
>> Well it turns out that calling "efi_arch_memory_setup()" isn't correct
>> because it also messes with the page tables AND also does the trampoline
> 
> Yep.
> 
>> relocation. Which after this call finishes then it does the trampoline
>> relocations in assembly. The code currently makes the assumption it can
> 
> I am not sure what do you mean here.
> 
>> use any conventional memory range below 1mb (and unfortunately does the
>> math incorrectly and instead uses the region following the conventional
> 
> Which code? Which math?

The code where cfg.size = 0 and the extra_mem was missing.

> 
>> memory region). You need to use AllocatePages() otherwise you are
>> trampling memory that might have been allocated by the bootloader or any
> 
> Bootloader code/data should be dead here.

Correct. Unfortunately on my Lenovo laptop and my Intel NUCs I can't
currently call ExitBootServices and a timer that iPXE has wired up has
some memory reserved down there and it was getting trampled. The real
answer is that we need to fix up stock Xen to be able to always call EBS.

> 
>> multiboot modules (e.g. tboot) prior to Xen.
> 
> How come?
> 
>> I'm just going to write a patch to fix the issues that I've found thus
>> far. I believe at this point there is something wrong with the page
>> tables because setting cr0 for the non-0 CPU in
>> trampoline_protmode_entry causes the machine to reboot.
> 
> I have a feeling that problem lays somewhere else. Probably far before
> trampoline_protmode_entry.
> 
>> I've also found where we have the possibility to call
>> relocate_trampoline() twice in the EFI case. Its protected by a check to
>> trampoline_phys but I'm not sure why we even have the code there to be
>> able to do so.
> 
> Consider case when boot services use memory below 1 MiB.
> 
> Thank you for testing my patch series.
> 
> Daniel
> 

So do we really need to go down below 1MiB? We're never going into
16-bit mode. Unless the other CPUs are starting up in 16-bit mode.

I'll keep whacking away at this until it lands on any piece of hardware
I've got.
Doug Goldstein Jan. 11, 2017, 8:31 p.m. UTC | #9
On 1/11/17 1:08 PM, Daniel Kiper wrote:
> On Mon, Jan 09, 2017 at 07:37:59PM -0600, Doug Goldstein wrote:
>> On 12/5/16 4:25 PM, Daniel Kiper wrote:
> 
> 
>>> diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
>>> index 0a93e61..70ed836 100644
>>> --- a/xen/common/efi/boot.c
>>> +++ b/xen/common/efi/boot.c
>>> @@ -79,6 +79,17 @@ static size_t wstrlen(const CHAR16 * s);
>>>  static int set_color(u32 mask, int bpp, u8 *pos, u8 *sz);
>>>  static bool_t match_guid(const EFI_GUID *guid1, const EFI_GUID *guid2);
>>>
>>> +static void efi_init(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
>>> +static void efi_console_set_mode(void);
>>> +static EFI_GRAPHICS_OUTPUT_PROTOCOL *efi_get_gop(void);
>>> +static UINTN efi_find_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop,
>>> +                               UINTN cols, UINTN rows, UINTN depth);
>>> +static void efi_tables(void);
>>> +static void setup_efi_pci(void);
>>> +static void efi_variables(void);
>>> +static void efi_set_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, UINTN gop_mode);
>>> +static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
>>> +
>>>  static const EFI_BOOT_SERVICES *__initdata efi_bs;
>>>  static UINT32 __initdata efi_bs_revision;
>>>  static EFI_HANDLE __initdata efi_ih;
>>>
>>
>> So as an aside, IMHO this is where the series should end and the next
>> set of patches should be a follow on.
> 
> Hmmm... Why? If you do not apply rest of patches then MB2 does not
> work on all EFI platforms.
> 
> Daniel
> 

So I should have expanded more in my other email. I've got this series
pulled in on top of 4.8 along with different fixes as discussed on this
thread:

https://github.com/cardoe/xen/tree/48-and-daniel

This boots up on my NUC but reports the other CPUs as stuck and the
error is -5. This starts to come up on the Lenovo and it gets to near
where it starts the dom0 kernel and then blanks the screen and hard
hangs. This causes cr0 crashes on the other boards I've got access to.


I've also got the series only to this point with the fixes.

https://github.com/cardoe/xen/tree/48-and-daniel-sans-relocate

The later version boots up on my NUC with all CPUs. It still hangs on
the Lenovo. It works on the other boards. It also appears work under QEMU.
Daniel Kiper Jan. 11, 2017, 8:36 p.m. UTC | #10
On Wed, Jan 11, 2017 at 01:50:56PM -0600, Doug Goldstein wrote:
> On 1/11/17 1:08 PM, Daniel Kiper wrote:
> > On Mon, Jan 09, 2017 at 07:37:59PM -0600, Doug Goldstein wrote:
> >> On 12/5/16 4:25 PM, Daniel Kiper wrote:

[...]

> >>>      /* Populate E820 table and check trampoline area availability. */
> >>>      e = e820map - 1;
> >>> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
> >>>              /* fall through */
> >>>          case EfiConventionalMemory:
> >>>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
> >>> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
> >>> +                 len >= cfg.size + extra_mem &&
> >>> +                 desc->PhysicalStart + len > cfg.addr )
> >>>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
> >>
> >> So this is where the current series blows up and fails on real hardware.
> >> No where in the EFI + MB2 code path is cfg.size ever initialized. Its
> >> only initialized in the straight EFI case. The result is that cfg.addr
> >> is set to the section immediately following this. Took a bit to
> >> trackdown because I checked for memory overlaps with where Xen was
> >> loaded and where it was relocated to but forgot to check for overlaps
> >> with the trampoline code. This is the address where the trampoline jumps
> >> are copied.
> >>
> >> Personally I'd like to see an ASSERT added or the code swizzled around
> >> in such a way that its not possible to get into a bad state. But this is
> >> probably another patch series.
> >
> > Nice catch! Thanks a lot. I think that we should initialize cfg.size = 64 << 10
> > in efi_multiboot2(). It looks like real fix. extra_mem stuff is bogus.
>
> Except in head.S you start the stack 64kb after the end of the
> trampoline size. So cfg.size = (64 << 10); won't work. It needs to be
> the size of the trampoline + 64k.

I will double check it and drop you a line. Probably in 1-2 weeks.
Now I am tidying up my backlog after vacation.

> >>> +    efi_tables();
> >>> +    setup_efi_pci();
> >>> +    efi_variables();
> >>
> >> This is probably where you missed the call to "efi_arch_memory_setup();"
> >> that gave me hell above.
> >
> > This does not work in MB2 case.
>
> You're looking at the oldest email. I've have further follow ups that
> point that out and I've included a patch to fix the issues.

I have tried to reply to all your emails.

> >> So as an aside, IMHO this is where the series should end and the next
> >> set of patches should be a follow on.
> >
> > Hmmm... Why? If you do not apply rest of patches then MB2 does not
> > work on all EFI platforms.
> >
> > Daniel
> >
>
> Q: How do you eat an elephant?
> A: One bite at a time.
>
> The point is we have 0 MB2 support presently. We can add it in
> incremental hunks. Otherwise we get a patch series that's been floating

I think that nothing prevents maintainers to apply my patch series partially.
And many prerequisite patches went that way. However, I think that they should
not apply this patch without the rest (and more importantly patch series should
not end at this patch). If we do that we will provide MB2 functionality which
is broken and confuse users. However, if you think that it make sense please
convince maintainers to do that. Though I will not support such request.

> around for around 3 years and missed at least 2 releases where it should

Do not forget that it required a lot of changes in MB2 protocol, GRUB2 (changes
are in git repository and will come into next release), etc. And I have carried
almost all of that stuff myself. Please, believe me this is not easy task.

> have gotten in. We've only got several weeks before the 4.9 window
> closes as well.

Window closes at the end of the march. I am going to release new version
in 1-2 weeks after clearing my backlog. I hope that this patch series
will go into 4.9. At least I will do all my best to achieve that goal.

Daniel
Jan Beulich Jan. 12, 2017, 10:22 a.m. UTC | #11
>>> On 11.01.17 at 21:20, <cardoe@cardoe.com> wrote:
> So do we really need to go down below 1MiB? We're never going into
> 16-bit mode. Unless the other CPUs are starting up in 16-bit mode.

Well, AP startup works by providing an 8-bit page number value
for where the processor should start fetching instructions.

Jan
Daniel Kiper Jan. 12, 2017, 12:18 p.m. UTC | #12
On Wed, Jan 11, 2017 at 02:31:35PM -0600, Doug Goldstein wrote:
> On 1/11/17 1:08 PM, Daniel Kiper wrote:
> > On Mon, Jan 09, 2017 at 07:37:59PM -0600, Doug Goldstein wrote:
> >> On 12/5/16 4:25 PM, Daniel Kiper wrote:
> >>> diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
> >>> index 0a93e61..70ed836 100644
> >>> --- a/xen/common/efi/boot.c
> >>> +++ b/xen/common/efi/boot.c
> >>> @@ -79,6 +79,17 @@ static size_t wstrlen(const CHAR16 * s);
> >>>  static int set_color(u32 mask, int bpp, u8 *pos, u8 *sz);
> >>>  static bool_t match_guid(const EFI_GUID *guid1, const EFI_GUID *guid2);
> >>>
> >>> +static void efi_init(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
> >>> +static void efi_console_set_mode(void);
> >>> +static EFI_GRAPHICS_OUTPUT_PROTOCOL *efi_get_gop(void);
> >>> +static UINTN efi_find_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop,
> >>> +                               UINTN cols, UINTN rows, UINTN depth);
> >>> +static void efi_tables(void);
> >>> +static void setup_efi_pci(void);
> >>> +static void efi_variables(void);
> >>> +static void efi_set_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, UINTN gop_mode);
> >>> +static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
> >>> +
> >>>  static const EFI_BOOT_SERVICES *__initdata efi_bs;
> >>>  static UINT32 __initdata efi_bs_revision;
> >>>  static EFI_HANDLE __initdata efi_ih;
> >>>
> >>
> >> So as an aside, IMHO this is where the series should end and the next
> >> set of patches should be a follow on.
> >
> > Hmmm... Why? If you do not apply rest of patches then MB2 does not
> > work on all EFI platforms.
> >
> > Daniel
>
> So I should have expanded more in my other email. I've got this series
> pulled in on top of 4.8 along with different fixes as discussed on this
> thread:
>
> https://github.com/cardoe/xen/tree/48-and-daniel
>
> This boots up on my NUC but reports the other CPUs as stuck and the
> error is -5. This starts to come up on the Lenovo and it gets to near
> where it starts the dom0 kernel and then blanks the screen and hard
> hangs. This causes cr0 crashes on the other boards I've got access to.
>
> I've also got the series only to this point with the fixes.
>
> https://github.com/cardoe/xen/tree/48-and-daniel-sans-relocate
>
> The later version boots up on my NUC with all CPUs. It still hangs on
> the Lenovo. It works on the other boards. It also appears work under QEMU.

AIUI, you are trying to add full (legacy BIOS and EFI) MB2 support to iPXE. Great!.
Though I think that you should do this in steps. First of all you should have MB2
fully running on legacy BIOS platforms. It is much simpler. If it works move to EFI
platforms. OVMF is good choice for start but of course finally tests should be done
on real hardware. You can do tests on legacy BIOS with just patch #01. If everything
works then apply whole patch series to Xen and add MB2 reloc functionality. If it
works move to EFI platform tests. It is important that you do EFI platform tests with
whole patch series. This way you avoid issues related to overwriting BS/RS code/data.

Daniel
Daniel Kiper Jan. 12, 2017, 12:50 p.m. UTC | #13
On Wed, Jan 11, 2017 at 02:20:15PM -0600, Doug Goldstein wrote:
> On 1/11/17 1:47 PM, Daniel Kiper wrote:
> > On Tue, Jan 10, 2017 at 02:51:27PM -0600, Doug Goldstein wrote:
> >> On 1/9/17 7:37 PM, Doug Goldstein wrote:
> >>> On 12/5/16 4:25 PM, Daniel Kiper wrote:
> >>
> >>>> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
> >>>> index 62c010e..dc857d8 100644
> >>>> --- a/xen/arch/x86/efi/efi-boot.h
> >>>> +++ b/xen/arch/x86/efi/efi-boot.h
> >>>> @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
> >>>>  {
> >>>>      struct e820entry *e;
> >>>>      unsigned int i;
> >>>> +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
> >>>> +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
> >>>
> >>> Just wondering where the constant came from? And if there should be a
> >>> little bit of information about it. To me its just weird to shift 64.
> >>
> >> Its the size of the stack used in the assembly code.
> >
> > No, it is trampoline region size.
>
> trampoline + stack in head.S We take the address where we're going to
> copy the trampoline and set the stack to 0x10000 past it.

I suppose that you think about this:

        /* Switch to low-memory stack.  */
        mov     sym_fs(trampoline_phys),%edi
        lea     0x10000(%edi),%esp

However, trampoline region size is (should be) 64 KiB. No way. Please
look below for more details.

> >>>>      /* Populate E820 table and check trampoline area availability. */
> >>>>      e = e820map - 1;
> >>>> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
> >>>>              /* fall through */
> >>>>          case EfiConventionalMemory:
> >>>>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
> >>>> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
> >>>> +                 len >= cfg.size + extra_mem &&
> >>>> +                 desc->PhysicalStart + len > cfg.addr )
> >>>>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
> >>>
> >>> So this is where the current series blows up and fails on real hardware.
> >>
> >> Honestly this was my misunderstanding and this shouldn't ever be used to
> >> get memory for the trampoline. This also has the bug in it that it needs
> >> to be:
> >>
> >> ASSERT(cfg.size > 0);
> >> cfg.addr = (desc->PhysicalStart + len - (cfg.size + extra_mem) & PAGE_MASK;
> >
> > As I said earlier. This extra_mem stuff is (maybe) wrong and should be fixed
> > in one way or another. Hmmm... It looks OK. I will double check it because
> > I do not looked at this code long time and maybe I am missing something.
>
> cfg.size needs to be the size of the trampolines + stack.

It looks that during some code rearrangement I moved one instruction too
much to trampoline_bios_setup. So, I can agree that right now cfg.size
should be properly initialized. Though it should be cfg.size = 64 << 10.
Then extra_mem should be dropped.

> >>> No where in the EFI + MB2 code path is cfg.size ever initialized. Its
> >>> only initialized in the straight EFI case. The result is that cfg.addr
> >>> is set to the section immediately following this. Took a bit to
> >>> trackdown because I checked for memory overlaps with where Xen was
> >>> loaded and where it was relocated to but forgot to check for overlaps
> >>> with the trampoline code. This is the address where the trampoline jumps
> >>> are copied.
> >>>
> >>> Personally I'd like to see an ASSERT added or the code swizzled around
> >>> in such a way that its not possible to get into a bad state. But this is
> >>> probably another patch series.
> >>>
> >>>>              /* fall through */
> >>>>          case EfiLoaderCode:
> >>>> @@ -210,12 +213,14 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
> >>>>
> >>>>  static void __init efi_arch_pre_exit_boot(void)
> >>>>  {
> >>>> -    if ( !trampoline_phys )
> >>>> -    {
> >>>> -        if ( !cfg.addr )
> >>>> -            blexit(L"No memory for trampoline");
> >>>> +    if ( trampoline_phys )
> >>>> +        return;
> >>>> +
> >>>> +    if ( !cfg.addr )
> >>>> +        blexit(L"No memory for trampoline");
> >>>> +
> >>>> +    if ( efi_enabled(EFI_LOADER) )
> >>>>          relocate_trampoline(cfg.addr);
> >>
> >> Why is this call even here anymore? Its called in
> >> efi_arch_memory_setup() already. If it was unable to allocate memory
> >> under the 1mb region its just going to trample over ANY conventional
> >> memory region that might be in use.
> >
> > Trampoline is relocated in xen/arch/x86/boot/head.S.
>
> For the MB2/MB case. But for the straight EFI case its called in
> efi_arch_memory_setup() and then you've added the wrapper of

That is true.

> efi_enabled(EFI_LOADER) which in theory would have it called again in
> the straight EFI case if trampoline_phys isn't set and cfg.addr is set.

Why "in theory"?

> >>>> -    }
> >>>>  }
> >>>>
> >>>>  static void __init noreturn efi_arch_post_exit_boot(void)
> >>>> @@ -653,6 +658,43 @@ static bool_t __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
> >>>>
> >>>>  static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
> >>>>
> >>>> +paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
> >>>> +{
> >>>> +    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
> >>>> +    UINTN cols, gop_mode = ~0, rows;
> >>>> +
> >>>> +    __set_bit(EFI_BOOT, &efi_flags);
> >>>> +    __set_bit(EFI_RS, &efi_flags);
> >>>> +
> >>>> +    efi_init(ImageHandle, SystemTable);
> >>>> +
> >>>> +    efi_console_set_mode();
> >>>> +
> >>>> +    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
> >>>> +                           &cols, &rows) == EFI_SUCCESS )
> >>>> +        efi_arch_console_init(cols, rows);
> >>>> +
> >>>> +    gop = efi_get_gop();
> >>>> +
> >>>> +    if ( gop )
> >>>> +        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
> >>>> +
> >>>> +    efi_arch_edd();
> >>>> +    efi_arch_cpu();
> >>>> +
> >>>> +    efi_tables();
> >>>> +    setup_efi_pci();
> >>>> +    efi_variables();
> >>>
> >>> This is probably where you missed the call to "efi_arch_memory_setup();"
> >>> that gave me hell above.
> >>
> >> Well it turns out that calling "efi_arch_memory_setup()" isn't correct
> >> because it also messes with the page tables AND also does the trampoline
> >
> > Yep.
> >
> >> relocation. Which after this call finishes then it does the trampoline
> >> relocations in assembly. The code currently makes the assumption it can
> >
> > I am not sure what do you mean here.
> >
> >> use any conventional memory range below 1mb (and unfortunately does the
> >> math incorrectly and instead uses the region following the conventional
> >
> > Which code? Which math?
>
> The code where cfg.size = 0 and the extra_mem was missing.

This should be fixed as I stated above.

> >> memory region). You need to use AllocatePages() otherwise you are
> >> trampling memory that might have been allocated by the bootloader or any
> >
> > Bootloader code/data should be dead here.
>
> Correct. Unfortunately on my Lenovo laptop and my Intel NUCs I can't
> currently call ExitBootServices and a timer that iPXE has wired up has

If you disable an important wheel in a machine you should not expect
that the machine will work. Sorry! No way!

> some memory reserved down there and it was getting trampled. The real

I still do not know why remnants of iPXE should run at this Xen boot stage.
It looks like an iPXE bug and IMO it should be fixed first.

> answer is that we need to fix up stock Xen to be able to always call EBS.

It looks that ExitBootServices() is always called. So, I do not think that
anything have to be fixed.

Daniel
Doug Goldstein Jan. 12, 2017, 3:44 p.m. UTC | #14
On 1/12/17 6:18 AM, Daniel Kiper wrote:

>>>>
>>>> So as an aside, IMHO this is where the series should end and the next
>>>> set of patches should be a follow on.
>>>
>>> Hmmm... Why? If you do not apply rest of patches then MB2 does not
>>> work on all EFI platforms.
>>>
>>> Daniel
>>
>> So I should have expanded more in my other email. I've got this series
>> pulled in on top of 4.8 along with different fixes as discussed on this
>> thread:
>>
>> https://github.com/cardoe/xen/tree/48-and-daniel
>>
>> This boots up on my NUC but reports the other CPUs as stuck and the
>> error is -5. This starts to come up on the Lenovo and it gets to near
>> where it starts the dom0 kernel and then blanks the screen and hard
>> hangs. This causes cr0 crashes on the other boards I've got access to.
>>
>> I've also got the series only to this point with the fixes.
>>
>> https://github.com/cardoe/xen/tree/48-and-daniel-sans-relocate
>>
>> The later version boots up on my NUC with all CPUs. It still hangs on
>> the Lenovo. It works on the other boards. It also appears work under QEMU.
> 
> AIUI, you are trying to add full (legacy BIOS and EFI) MB2 support to iPXE. Great!.
> Though I think that you should do this in steps. First of all you should have MB2
> fully running on legacy BIOS platforms. It is much simpler. If it works move to EFI
> platforms. OVMF is good choice for start but of course finally tests should be done
> on real hardware. You can do tests on legacy BIOS with just patch #01. If everything
> works then apply whole patch series to Xen and add MB2 reloc functionality. If it
> works move to EFI platform tests. It is important that you do EFI platform tests with
> whole patch series. This way you avoid issues related to overwriting BS/RS code/data.
> 
> Daniel
> 

Daniel,

I appreciate your input. I do like the approach of splitting things up
into small incremental pieces, that's the way all this work should be
happening. You should also be aware that iPXE takes the approach of
least amount of functionality/code to make things work. So from their
view there's no reason for adding MB2 support for BIOS since it provides
no advantage over MB1 when booting from the BIOS. Now MB2 solves a
problem with booting over EFI vs MB1 so they'll be willing to take a
change there. I'll also disagree that BIOS is easier than EFI since with
EFI its just load the ELF into memory and set a few pointers in tags.
With BIOS it requires me to build up the memory map into a MB2 structure.

As far as it goes I've got iPXE booting MB2 EFI payloads just fine. The
issues I've explained here happen when I use Grub or iPXE to boot Xen so
its not implementation specific to my iPXE code.
Doug Goldstein Jan. 12, 2017, 3:52 p.m. UTC | #15
On 1/12/17 6:50 AM, Daniel Kiper wrote:
> On Wed, Jan 11, 2017 at 02:20:15PM -0600, Doug Goldstein wrote:
>> On 1/11/17 1:47 PM, Daniel Kiper wrote:
>>> On Tue, Jan 10, 2017 at 02:51:27PM -0600, Doug Goldstein wrote:
>>>> On 1/9/17 7:37 PM, Doug Goldstein wrote:
>>>>> On 12/5/16 4:25 PM, Daniel Kiper wrote:
>>>>
>>>>>> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
>>>>>> index 62c010e..dc857d8 100644
>>>>>> --- a/xen/arch/x86/efi/efi-boot.h
>>>>>> +++ b/xen/arch/x86/efi/efi-boot.h
>>>>>> @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>>>>>  {
>>>>>>      struct e820entry *e;
>>>>>>      unsigned int i;
>>>>>> +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
>>>>>> +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
>>>>>
>>>>> Just wondering where the constant came from? And if there should be a
>>>>> little bit of information about it. To me its just weird to shift 64.
>>>>
>>>> Its the size of the stack used in the assembly code.
>>>
>>> No, it is trampoline region size.
>>
>> trampoline + stack in head.S We take the address where we're going to
>> copy the trampoline and set the stack to 0x10000 past it.
> 
> I suppose that you think about this:
> 
>         /* Switch to low-memory stack.  */
>         mov     sym_fs(trampoline_phys),%edi
>         lea     0x10000(%edi),%esp
> 
> However, trampoline region size is (should be) 64 KiB. No way. Please
> look below for more details.

The trampoline + stack are 64kb together. The stack grows down and the
trampoline grows up. The stack starts at 64kb past the start of the
trampoline. %edi is the start of the trampoline.

> 
>>>>>>      /* Populate E820 table and check trampoline area availability. */
>>>>>>      e = e820map - 1;
>>>>>> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>>>>>              /* fall through */
>>>>>>          case EfiConventionalMemory:
>>>>>>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
>>>>>> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
>>>>>> +                 len >= cfg.size + extra_mem &&
>>>>>> +                 desc->PhysicalStart + len > cfg.addr )
>>>>>>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
>>>>>
>>>>> So this is where the current series blows up and fails on real hardware.
>>>>
>>>> Honestly this was my misunderstanding and this shouldn't ever be used to
>>>> get memory for the trampoline. This also has the bug in it that it needs
>>>> to be:
>>>>
>>>> ASSERT(cfg.size > 0);
>>>> cfg.addr = (desc->PhysicalStart + len - (cfg.size + extra_mem) & PAGE_MASK;
>>>
>>> As I said earlier. This extra_mem stuff is (maybe) wrong and should be fixed
>>> in one way or another. Hmmm... It looks OK. I will double check it because
>>> I do not looked at this code long time and maybe I am missing something.
>>
>> cfg.size needs to be the size of the trampolines + stack.
> 
> It looks that during some code rearrangement I moved one instruction too
> much to trampoline_bios_setup. So, I can agree that right now cfg.size
> should be properly initialized. Though it should be cfg.size = 64 << 10.
> Then extra_mem should be dropped.

That's fine as long as its clear that 64kb is for the trampoline + the
stack.

> 
>>>>> No where in the EFI + MB2 code path is cfg.size ever initialized. Its
>>>>> only initialized in the straight EFI case. The result is that cfg.addr
>>>>> is set to the section immediately following this. Took a bit to
>>>>> trackdown because I checked for memory overlaps with where Xen was
>>>>> loaded and where it was relocated to but forgot to check for overlaps
>>>>> with the trampoline code. This is the address where the trampoline jumps
>>>>> are copied.
>>>>>
>>>>> Personally I'd like to see an ASSERT added or the code swizzled around
>>>>> in such a way that its not possible to get into a bad state. But this is
>>>>> probably another patch series.
>>>>>
>>>>>>              /* fall through */
>>>>>>          case EfiLoaderCode:
>>>>>> @@ -210,12 +213,14 @@ static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
>>>>>>
>>>>>>  static void __init efi_arch_pre_exit_boot(void)
>>>>>>  {
>>>>>> -    if ( !trampoline_phys )
>>>>>> -    {
>>>>>> -        if ( !cfg.addr )
>>>>>> -            blexit(L"No memory for trampoline");
>>>>>> +    if ( trampoline_phys )
>>>>>> +        return;
>>>>>> +
>>>>>> +    if ( !cfg.addr )
>>>>>> +        blexit(L"No memory for trampoline");
>>>>>> +
>>>>>> +    if ( efi_enabled(EFI_LOADER) )
>>>>>>          relocate_trampoline(cfg.addr);
>>>>
>>>> Why is this call even here anymore? Its called in
>>>> efi_arch_memory_setup() already. If it was unable to allocate memory
>>>> under the 1mb region its just going to trample over ANY conventional
>>>> memory region that might be in use.
>>>
>>> Trampoline is relocated in xen/arch/x86/boot/head.S.
>>
>> For the MB2/MB case. But for the straight EFI case its called in
>> efi_arch_memory_setup() and then you've added the wrapper of
> 
> That is true.
> 
>> efi_enabled(EFI_LOADER) which in theory would have it called again in
>> the straight EFI case if trampoline_phys isn't set and cfg.addr is set.
> 
> Why "in theory"?

ok drop the words "in theory" and the statement is fine.

> 
>>>>>> -    }
>>>>>>  }
>>>>>>
>>>>>>  static void __init noreturn efi_arch_post_exit_boot(void)
>>>>>> @@ -653,6 +658,43 @@ static bool_t __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
>>>>>>
>>>>>>  static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
>>>>>>
>>>>>> +paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
>>>>>> +{
>>>>>> +    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
>>>>>> +    UINTN cols, gop_mode = ~0, rows;
>>>>>> +
>>>>>> +    __set_bit(EFI_BOOT, &efi_flags);
>>>>>> +    __set_bit(EFI_RS, &efi_flags);
>>>>>> +
>>>>>> +    efi_init(ImageHandle, SystemTable);
>>>>>> +
>>>>>> +    efi_console_set_mode();
>>>>>> +
>>>>>> +    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
>>>>>> +                           &cols, &rows) == EFI_SUCCESS )
>>>>>> +        efi_arch_console_init(cols, rows);
>>>>>> +
>>>>>> +    gop = efi_get_gop();
>>>>>> +
>>>>>> +    if ( gop )
>>>>>> +        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
>>>>>> +
>>>>>> +    efi_arch_edd();
>>>>>> +    efi_arch_cpu();
>>>>>> +
>>>>>> +    efi_tables();
>>>>>> +    setup_efi_pci();
>>>>>> +    efi_variables();
>>>>>
>>>>> This is probably where you missed the call to "efi_arch_memory_setup();"
>>>>> that gave me hell above.
>>>>
>>>> Well it turns out that calling "efi_arch_memory_setup()" isn't correct
>>>> because it also messes with the page tables AND also does the trampoline
>>>
>>> Yep.
>>>
>>>> relocation. Which after this call finishes then it does the trampoline
>>>> relocations in assembly. The code currently makes the assumption it can
>>>
>>> I am not sure what do you mean here.
>>>
>>>> use any conventional memory range below 1mb (and unfortunately does the
>>>> math incorrectly and instead uses the region following the conventional
>>>
>>> Which code? Which math?
>>
>> The code where cfg.size = 0 and the extra_mem was missing.
> 
> This should be fixed as I stated above.
> 
>>>> memory region). You need to use AllocatePages() otherwise you are
>>>> trampling memory that might have been allocated by the bootloader or any
>>>
>>> Bootloader code/data should be dead here.
>>
>> Correct. Unfortunately on my Lenovo laptop and my Intel NUCs I can't
>> currently call ExitBootServices and a timer that iPXE has wired up has
> 
> If you disable an important wheel in a machine you should not expect
> that the machine will work. Sorry! No way!

Speak to your co-workers Konrad and Boris. We've had long email threads
about how certain hardware does not work with the way Xen calls
ExitBootServices.

> 
>> some memory reserved down there and it was getting trampled. The real
> 
> I still do not know why remnants of iPXE should run at this Xen boot stage.
> It looks like an iPXE bug and IMO it should be fixed first.

Like I said above, its because on this machine I am unable to call Xen's
EBS.

> 
>> answer is that we need to fix up stock Xen to be able to always call EBS.
> 
> It looks that ExitBootServices() is always called. So, I do not think that
> anything have to be fixed.

It is commented out of this board using the patchset that Konrad
submitted to the ML years ago.
Daniel Kiper Jan. 12, 2017, 7:30 p.m. UTC | #16
On Thu, Jan 12, 2017 at 09:44:59AM -0600, Doug Goldstein wrote:
> On 1/12/17 6:18 AM, Daniel Kiper wrote:
> >>>> So as an aside, IMHO this is where the series should end and the next
> >>>> set of patches should be a follow on.
> >>>
> >>> Hmmm... Why? If you do not apply rest of patches then MB2 does not
> >>> work on all EFI platforms.
> >>>
> >>> Daniel
> >>
> >> So I should have expanded more in my other email. I've got this series
> >> pulled in on top of 4.8 along with different fixes as discussed on this
> >> thread:
> >>
> >> https://github.com/cardoe/xen/tree/48-and-daniel
> >>
> >> This boots up on my NUC but reports the other CPUs as stuck and the
> >> error is -5. This starts to come up on the Lenovo and it gets to near
> >> where it starts the dom0 kernel and then blanks the screen and hard
> >> hangs. This causes cr0 crashes on the other boards I've got access to.
> >>
> >> I've also got the series only to this point with the fixes.
> >>
> >> https://github.com/cardoe/xen/tree/48-and-daniel-sans-relocate
> >>
> >> The later version boots up on my NUC with all CPUs. It still hangs on
> >> the Lenovo. It works on the other boards. It also appears work under QEMU.
> >
> > AIUI, you are trying to add full (legacy BIOS and EFI) MB2 support to iPXE. Great!.
> > Though I think that you should do this in steps. First of all you should have MB2
> > fully running on legacy BIOS platforms. It is much simpler. If it works move to EFI
> > platforms. OVMF is good choice for start but of course finally tests should be done
> > on real hardware. You can do tests on legacy BIOS with just patch #01. If everything
> > works then apply whole patch series to Xen and add MB2 reloc functionality. If it
> > works move to EFI platform tests. It is important that you do EFI platform tests with
> > whole patch series. This way you avoid issues related to overwriting BS/RS code/data.
> >
> > Daniel
>
> Daniel,
>
> I appreciate your input. I do like the approach of splitting things up
> into small incremental pieces, that's the way all this work should be
> happening. You should also be aware that iPXE takes the approach of
> least amount of functionality/code to make things work. So from their

Nice and appreciated but look below...

> view there's no reason for adding MB2 support for BIOS since it provides
> no advantage over MB1 when booting from the BIOS. Now MB2 solves a

From your point of view maybe it does not. However, from user point of view it may.
If you have support for MB2 on legacy BIOS and EFI platforms then you can boot Xen
on both platforms without changing anything in boot config files. Otherwise you have
to prepare separate configuration for different platforms.

> problem with booting over EFI vs MB1 so they'll be willing to take a
> change there. I'll also disagree that BIOS is easier than EFI since with
> EFI its just load the ELF into memory and set a few pointers in tags.
> With BIOS it requires me to build up the memory map into a MB2 structure.

Xen uses only these tags on legacy BIOS platforms: MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
(well, nice to have but it can be also not provided), MULTIBOOT2_TAG_TYPE_MMAP (same
as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO), MULTIBOOT2_TAG_TYPE_BOOT_LOADER_NAME
(same as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO) ,MULTIBOOT2_TAG_TYPE_CMDLINE,
MULTIBOOT2_TAG_TYPE_MODULE. I do not mention MULTIBOOT2_TAG_TYPE_END which
is obvious. So, if you are real hardcore minimalist then you have to provide
MULTIBOOT2_TAG_TYPE_CMDLINE and MULTIBOOT2_TAG_TYPE_MODULE. All of them
are provided also on EFI. So, I do not see any reason to not provide MB2
for legacy BIOS. And I do not think that it is very difficult to provide
all optional tags mentioned above.

> As far as it goes I've got iPXE booting MB2 EFI payloads just fine. The
> issues I've explained here happen when I use Grub or iPXE to boot Xen so
> its not implementation specific to my iPXE code.

It looks that we have found the reason and solution for this problem.
I will fix it.

Daniel
Doug Goldstein Jan. 12, 2017, 7:46 p.m. UTC | #17
On 1/12/17 1:30 PM, Daniel Kiper wrote:
> On Thu, Jan 12, 2017 at 09:44:59AM -0600, Doug Goldstein wrote:

> 
>> view there's no reason for adding MB2 support for BIOS since it provides
>> no advantage over MB1 when booting from the BIOS. Now MB2 solves a
> 
> From your point of view maybe it does not. However, from user point of view it may.
> If you have support for MB2 on legacy BIOS and EFI platforms then you can boot Xen
> on both platforms without changing anything in boot config files. Otherwise you have
> to prepare separate configuration for different platforms.

Neither Grub nor iPXE require different configs for MB1 vs MB2 so I'm
not seeing the validity of this logic.

> 
>> problem with booting over EFI vs MB1 so they'll be willing to take a
>> change there. I'll also disagree that BIOS is easier than EFI since with
>> EFI its just load the ELF into memory and set a few pointers in tags.
>> With BIOS it requires me to build up the memory map into a MB2 structure.
> 
> Xen uses only these tags on legacy BIOS platforms: MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
> (well, nice to have but it can be also not provided), MULTIBOOT2_TAG_TYPE_MMAP (same
> as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO), MULTIBOOT2_TAG_TYPE_BOOT_LOADER_NAME
> (same as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO) ,MULTIBOOT2_TAG_TYPE_CMDLINE,
> MULTIBOOT2_TAG_TYPE_MODULE. I do not mention MULTIBOOT2_TAG_TYPE_END which
> is obvious. So, if you are real hardcore minimalist then you have to provide
> MULTIBOOT2_TAG_TYPE_CMDLINE and MULTIBOOT2_TAG_TYPE_MODULE. All of them
> are provided also on EFI. So, I do not see any reason to not provide MB2
> for legacy BIOS. And I do not think that it is very difficult to provide
> all optional tags mentioned above.

I don't understand what you're attempting to convey here. You've listed
out a number of tags that I mentioned in my message that I don't have to
implement for EFI. You've basically reinforced my point that its easier
to implement this for EFI than BIOS. MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
and MULTIBOOT2_TAG_TYPE_MMAP are unused by Xen on EFI. It gets this info
from a call to GetMemoryMap(). You actually reminded me of another bug.
Calling ExitBootServices() on Grub and letting it pass the memory info
causes Xen to fail to load.

Andrew helped me troubleshoot this and he discovered the fix. You've got
code:

/* Store Xen image load base address in place accessible for 32-bit code. */
mov %r15d,%esi

But if any of the checks under the run_bs: label specifically:
- /* Are EFI boot services available? */
- /* Is EFI SystemTable address provided by boot loader? */
- /* Is EFI ImageHandle address provided by boot loader? */

Will not run the mov instruction and then fail to boot. Its only if any
of these are false will it attempt to use the tags mentioned above as well.
Daniel Kiper Jan. 12, 2017, 8:28 p.m. UTC | #18
On Thu, Jan 12, 2017 at 09:52:15AM -0600, Doug Goldstein wrote:
> On 1/12/17 6:50 AM, Daniel Kiper wrote:
> > On Wed, Jan 11, 2017 at 02:20:15PM -0600, Doug Goldstein wrote:
> >> On 1/11/17 1:47 PM, Daniel Kiper wrote:
> >>> On Tue, Jan 10, 2017 at 02:51:27PM -0600, Doug Goldstein wrote:
> >>>> On 1/9/17 7:37 PM, Doug Goldstein wrote:
> >>>>> On 12/5/16 4:25 PM, Daniel Kiper wrote:
> >>>>
> >>>>>> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
> >>>>>> index 62c010e..dc857d8 100644
> >>>>>> --- a/xen/arch/x86/efi/efi-boot.h
> >>>>>> +++ b/xen/arch/x86/efi/efi-boot.h
> >>>>>> @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
> >>>>>>  {
> >>>>>>      struct e820entry *e;
> >>>>>>      unsigned int i;
> >>>>>> +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
> >>>>>> +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
> >>>>>
> >>>>> Just wondering where the constant came from? And if there should be a
> >>>>> little bit of information about it. To me its just weird to shift 64.
> >>>>
> >>>> Its the size of the stack used in the assembly code.
> >>>
> >>> No, it is trampoline region size.
> >>
> >> trampoline + stack in head.S We take the address where we're going to
> >> copy the trampoline and set the stack to 0x10000 past it.
> >
> > I suppose that you think about this:
> >
> >         /* Switch to low-memory stack.  */
> >         mov     sym_fs(trampoline_phys),%edi
> >         lea     0x10000(%edi),%esp
> >
> > However, trampoline region size is (should be) 64 KiB. No way. Please
> > look below for more details.
>
> The trampoline + stack are 64kb together. The stack grows down and the
> trampoline grows up. The stack starts at 64kb past the start of the
> trampoline. %edi is the start of the trampoline.

Yep. I think that right now we are on the same boat.

> >>>>>>      /* Populate E820 table and check trampoline area availability. */
> >>>>>>      e = e820map - 1;
> >>>>>> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
> >>>>>>              /* fall through */
> >>>>>>          case EfiConventionalMemory:
> >>>>>>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
> >>>>>> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
> >>>>>> +                 len >= cfg.size + extra_mem &&
> >>>>>> +                 desc->PhysicalStart + len > cfg.addr )
> >>>>>>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
> >>>>>
> >>>>> So this is where the current series blows up and fails on real hardware.
> >>>>
> >>>> Honestly this was my misunderstanding and this shouldn't ever be used to
> >>>> get memory for the trampoline. This also has the bug in it that it needs
> >>>> to be:
> >>>>
> >>>> ASSERT(cfg.size > 0);
> >>>> cfg.addr = (desc->PhysicalStart + len - (cfg.size + extra_mem) & PAGE_MASK;
> >>>
> >>> As I said earlier. This extra_mem stuff is (maybe) wrong and should be fixed
> >>> in one way or another. Hmmm... It looks OK. I will double check it because
> >>> I do not looked at this code long time and maybe I am missing something.
> >>
> >> cfg.size needs to be the size of the trampolines + stack.
> >
> > It looks that during some code rearrangement I moved one instruction too
> > much to trampoline_bios_setup. So, I can agree that right now cfg.size
> > should be properly initialized. Though it should be cfg.size = 64 << 10.
> > Then extra_mem should be dropped.
>
> That's fine as long as its clear that 64kb is for the trampoline + the
> stack.

OK, but there are two stacks. We talk about "low-memory stack". I will improve
the comment.

[...]

> >>>> memory region). You need to use AllocatePages() otherwise you are
> >>>> trampling memory that might have been allocated by the bootloader or any
> >>>
> >>> Bootloader code/data should be dead here.
> >>
> >> Correct. Unfortunately on my Lenovo laptop and my Intel NUCs I can't
> >> currently call ExitBootServices and a timer that iPXE has wired up has
> >
> > If you disable an important wheel in a machine you should not expect
> > that the machine will work. Sorry! No way!
>
> Speak to your co-workers Konrad and Boris. We've had long email threads
> about how certain hardware does not work with the way Xen calls
> ExitBootServices.

Could you be more precise what is wrong? Or at least send links to
relevant threads.

> >> some memory reserved down there and it was getting trampled. The real
> >
> > I still do not know why remnants of iPXE should run at this Xen boot stage.
> > It looks like an iPXE bug and IMO it should be fixed first.
>
> Like I said above, its because on this machine I am unable to call Xen's
> EBS.

I do not understand how ExitBootServices() call is related to iPXE timer remnants
or so. Though if it is related somehow then I think that you should blame machine
and/or iPXE designer/developer not Xen developer.

> >> answer is that we need to fix up stock Xen to be able to always call EBS.
> >
> > It looks that ExitBootServices() is always called. So, I do not think that
> > anything have to be fixed.
>
> It is commented out of this board using the patchset that Konrad
> submitted to the ML years ago.

I do not know what patchset do you mean. Could you send it?

Daniel
Daniel Kiper Jan. 12, 2017, 9:45 p.m. UTC | #19
On Thu, Jan 12, 2017 at 01:46:41PM -0600, Doug Goldstein wrote:
> On 1/12/17 1:30 PM, Daniel Kiper wrote:
> > On Thu, Jan 12, 2017 at 09:44:59AM -0600, Doug Goldstein wrote:
>
> >
> >> view there's no reason for adding MB2 support for BIOS since it provides
> >> no advantage over MB1 when booting from the BIOS. Now MB2 solves a
> >
> > From your point of view maybe it does not. However, from user point of view it may.
> > If you have support for MB2 on legacy BIOS and EFI platforms then you can boot Xen
> > on both platforms without changing anything in boot config files. Otherwise you have
> > to prepare separate configuration for different platforms.
>
> Neither Grub nor iPXE require different configs for MB1 vs MB2 so I'm
> not seeing the validity of this logic.

Hmmm... This is interesting. I do not know iPXE, however, in GRUB you must
use multiboot/module for MB1 and multiboot2/module2 for MB2. I suppose that
you have to differentiate between both of them in iPXE somehow too. Hence,
there is pretty good chance that configs for MB1 and MB2 are different.

> >> problem with booting over EFI vs MB1 so they'll be willing to take a
> >> change there. I'll also disagree that BIOS is easier than EFI since with
> >> EFI its just load the ELF into memory and set a few pointers in tags.
> >> With BIOS it requires me to build up the memory map into a MB2 structure.
> >
> > Xen uses only these tags on legacy BIOS platforms: MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
> > (well, nice to have but it can be also not provided), MULTIBOOT2_TAG_TYPE_MMAP (same
> > as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO), MULTIBOOT2_TAG_TYPE_BOOT_LOADER_NAME
> > (same as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO) ,MULTIBOOT2_TAG_TYPE_CMDLINE,
> > MULTIBOOT2_TAG_TYPE_MODULE. I do not mention MULTIBOOT2_TAG_TYPE_END which
> > is obvious. So, if you are real hardcore minimalist then you have to provide
> > MULTIBOOT2_TAG_TYPE_CMDLINE and MULTIBOOT2_TAG_TYPE_MODULE. All of them
> > are provided also on EFI. So, I do not see any reason to not provide MB2
> > for legacy BIOS. And I do not think that it is very difficult to provide
> > all optional tags mentioned above.
>
> I don't understand what you're attempting to convey here. You've listed
> out a number of tags that I mentioned in my message that I don't have to
> implement for EFI. You've basically reinforced my point that its easier
> to implement this for EFI than BIOS. MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
> and MULTIBOOT2_TAG_TYPE_MMAP are unused by Xen on EFI. It gets this info

I showed you that if you are real minimalist you can enable the same MB2 code
on legacy BIOS and EFI. I do not understand your objection against providing
MB2 in iPXE on legacy BIOS if you do not need extra code (maybe a few #ifdefs).
Though I am not going to convince you. It is your choice but I am still thinking
that it is wrong choice.

By the way, does iPXE check MULTIBOOT2_HEADER_TAG_INFORMATION_REQUEST in Xen header.
If it does (it should) and do not understand MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO and
MULTIBOOT2_TAG_TYPE_MMAP then it should fail.

> from a call to GetMemoryMap(). You actually reminded me of another bug.
> Calling ExitBootServices() on Grub and letting it pass the memory info
> causes Xen to fail to load.

How come... Which GRUB version do you use? Xen clearly says that it needs
boot services (look into MB2 header). So, GRUB is not allowed to call
ExitBootServices(). If it does then it is GRUB bug.

> Andrew helped me troubleshoot this and he discovered the fix. You've got
> code:
>
> /* Store Xen image load base address in place accessible for 32-bit code. */
> mov %r15d,%esi
>
> But if any of the checks under the run_bs: label specifically:
> - /* Are EFI boot services available? */
> - /* Is EFI SystemTable address provided by boot loader? */
> - /* Is EFI ImageHandle address provided by boot loader? */
>
> Will not run the mov instruction and then fail to boot. Its only if any
> of these are false will it attempt to use the tags mentioned above as well.

OK, this is a bug and I will fix it. However, this is not related to
ExitBootServices() call in GRUB2.

Daniel
Doug Goldstein Jan. 12, 2017, 10:20 p.m. UTC | #20
On 1/12/17 3:45 PM, Daniel Kiper wrote:
> On Thu, Jan 12, 2017 at 01:46:41PM -0600, Doug Goldstein wrote:
>> On 1/12/17 1:30 PM, Daniel Kiper wrote:
>>> On Thu, Jan 12, 2017 at 09:44:59AM -0600, Doug Goldstein wrote:
>>
>>>
>>>> view there's no reason for adding MB2 support for BIOS since it provides
>>>> no advantage over MB1 when booting from the BIOS. Now MB2 solves a
>>>
>>> From your point of view maybe it does not. However, from user point of view it may.
>>> If you have support for MB2 on legacy BIOS and EFI platforms then you can boot Xen
>>> on both platforms without changing anything in boot config files. Otherwise you have
>>> to prepare separate configuration for different platforms.
>>
>> Neither Grub nor iPXE require different configs for MB1 vs MB2 so I'm
>> not seeing the validity of this logic.
> 
> Hmmm... This is interesting. I do not know iPXE, however, in GRUB you must
> use multiboot/module for MB1 and multiboot2/module2 for MB2. I suppose that
> you have to differentiate between both of them in iPXE somehow too. Hence,
> there is pretty good chance that configs for MB1 and MB2 are different.

multiboot/multiboot2 and module/module2 are aliases of each other. They
work interchangeably. Its the same way in iPXE.


> 
>>>> problem with booting over EFI vs MB1 so they'll be willing to take a
>>>> change there. I'll also disagree that BIOS is easier than EFI since with
>>>> EFI its just load the ELF into memory and set a few pointers in tags.
>>>> With BIOS it requires me to build up the memory map into a MB2 structure.
>>>
>>> Xen uses only these tags on legacy BIOS platforms: MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
>>> (well, nice to have but it can be also not provided), MULTIBOOT2_TAG_TYPE_MMAP (same
>>> as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO), MULTIBOOT2_TAG_TYPE_BOOT_LOADER_NAME
>>> (same as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO) ,MULTIBOOT2_TAG_TYPE_CMDLINE,
>>> MULTIBOOT2_TAG_TYPE_MODULE. I do not mention MULTIBOOT2_TAG_TYPE_END which
>>> is obvious. So, if you are real hardcore minimalist then you have to provide
>>> MULTIBOOT2_TAG_TYPE_CMDLINE and MULTIBOOT2_TAG_TYPE_MODULE. All of them
>>> are provided also on EFI. So, I do not see any reason to not provide MB2
>>> for legacy BIOS. And I do not think that it is very difficult to provide
>>> all optional tags mentioned above.
>>
>> I don't understand what you're attempting to convey here. You've listed
>> out a number of tags that I mentioned in my message that I don't have to
>> implement for EFI. You've basically reinforced my point that its easier
>> to implement this for EFI than BIOS. MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
>> and MULTIBOOT2_TAG_TYPE_MMAP are unused by Xen on EFI. It gets this info
> 
> I showed you that if you are real minimalist you can enable the same MB2 code
> on legacy BIOS and EFI. I do not understand your objection against providing
> MB2 in iPXE on legacy BIOS if you do not need extra code (maybe a few #ifdefs).
> Though I am not going to convince you. It is your choice but I am still thinking
> that it is wrong choice.

Its not my choice. Its the feedback I've received from upstream.

> 
> By the way, does iPXE check MULTIBOOT2_HEADER_TAG_INFORMATION_REQUEST in Xen header.
> If it does (it should) and do not understand MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO and
> MULTIBOOT2_TAG_TYPE_MMAP then it should fail.

It does but I know that Xen doesn't use that information if Boot
Services are available by code inspection. Which is what my comments are
related to.

> 
>> from a call to GetMemoryMap(). You actually reminded me of another bug.
>> Calling ExitBootServices() on Grub and letting it pass the memory info
>> causes Xen to fail to load.
> 
> How come... Which GRUB version do you use? Xen clearly says that it needs
> boot services (look into MB2 header). So, GRUB is not allowed to call
> ExitBootServices(). If it does then it is GRUB bug.

No. That's not how it works at all. To quote 3.1.12 of the Multiboot2
spec...

"This tag indicates that payload supports starting without terminating
boot services."

This tag is not required to be respected but instead means that the
payload supports using boot services. Additionally section 3.6.3 which
talks about passing MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO states...

"This tag may not be provided by some boot loaders on EFI platforms if
EFI boot services are enabled and available for the loaded image (EFI
boot services not terminated tag exists in Multiboot2 information
structure)."

And section 3.6.8 talks about passing MULTIBOOT2_TAG_TYPE_MMAP states...

"This tag may not be provided by some boot loaders on EFI platforms if
EFI boot services are enabled and available for the loaded image (EFI
boot services not terminated tag exists in Multiboot2 information
structure)."


So for my iPXE support if the payload (in this case Xen) reports that it
supports not having boot services exited then I don't exit it and I
don't provide MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO or
MULTIBOOT2_TAG_TYPE_MMAP. Considering the fact that if Xen has boot
services available it ignores these two tags it seems what I've done
complies with the spec and complies with the only known full
implementation of MB2. (fwiw, the only implementations I know about are
tboot and your Xen patches. I've written a small application which dumps
out the data that was received but that was purely for debugging).


> 
>> Andrew helped me troubleshoot this and he discovered the fix. You've got
>> code:
>>
>> /* Store Xen image load base address in place accessible for 32-bit code. */
>> mov %r15d,%esi
>>
>> But if any of the checks under the run_bs: label specifically:
>> - /* Are EFI boot services available? */
>> - /* Is EFI SystemTable address provided by boot loader? */
>> - /* Is EFI ImageHandle address provided by boot loader? */
>>
>> Will not run the mov instruction and then fail to boot. Its only if any
>> of these are false will it attempt to use the tags mentioned above as well.
> 
> OK, this is a bug and I will fix it. However, this is not related to
> ExitBootServices() call in GRUB2.

Well it is. Because if boot services are not available then to goes the
path of the bug.
Doug Goldstein Jan. 12, 2017, 10:23 p.m. UTC | #21
On 1/12/17 2:28 PM, Daniel Kiper wrote:
> On Thu, Jan 12, 2017 at 09:52:15AM -0600, Doug Goldstein wrote:
>> On 1/12/17 6:50 AM, Daniel Kiper wrote:
>>> On Wed, Jan 11, 2017 at 02:20:15PM -0600, Doug Goldstein wrote:
>>>> On 1/11/17 1:47 PM, Daniel Kiper wrote:
>>>>> On Tue, Jan 10, 2017 at 02:51:27PM -0600, Doug Goldstein wrote:
>>>>>> On 1/9/17 7:37 PM, Doug Goldstein wrote:
>>>>>>> On 12/5/16 4:25 PM, Daniel Kiper wrote:
>>>>>>
>>>>>>>> diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
>>>>>>>> index 62c010e..dc857d8 100644
>>>>>>>> --- a/xen/arch/x86/efi/efi-boot.h
>>>>>>>> +++ b/xen/arch/x86/efi/efi-boot.h
>>>>>>>> @@ -146,6 +146,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>>>>>>>  {
>>>>>>>>      struct e820entry *e;
>>>>>>>>      unsigned int i;
>>>>>>>> +    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
>>>>>>>> +    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
>>>>>>>
>>>>>>> Just wondering where the constant came from? And if there should be a
>>>>>>> little bit of information about it. To me its just weird to shift 64.
>>>>>>
>>>>>> Its the size of the stack used in the assembly code.
>>>>>
>>>>> No, it is trampoline region size.
>>>>
>>>> trampoline + stack in head.S We take the address where we're going to
>>>> copy the trampoline and set the stack to 0x10000 past it.
>>>
>>> I suppose that you think about this:
>>>
>>>         /* Switch to low-memory stack.  */
>>>         mov     sym_fs(trampoline_phys),%edi
>>>         lea     0x10000(%edi),%esp
>>>
>>> However, trampoline region size is (should be) 64 KiB. No way. Please
>>> look below for more details.
>>
>> The trampoline + stack are 64kb together. The stack grows down and the
>> trampoline grows up. The stack starts at 64kb past the start of the
>> trampoline. %edi is the start of the trampoline.
> 
> Yep. I think that right now we are on the same boat.
> 
>>>>>>>>      /* Populate E820 table and check trampoline area availability. */
>>>>>>>>      e = e820map - 1;
>>>>>>>> @@ -168,7 +170,8 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
>>>>>>>>              /* fall through */
>>>>>>>>          case EfiConventionalMemory:
>>>>>>>>              if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
>>>>>>>> -                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
>>>>>>>> +                 len >= cfg.size + extra_mem &&
>>>>>>>> +                 desc->PhysicalStart + len > cfg.addr )
>>>>>>>>                  cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
>>>>>>>
>>>>>>> So this is where the current series blows up and fails on real hardware.
>>>>>>
>>>>>> Honestly this was my misunderstanding and this shouldn't ever be used to
>>>>>> get memory for the trampoline. This also has the bug in it that it needs
>>>>>> to be:
>>>>>>
>>>>>> ASSERT(cfg.size > 0);
>>>>>> cfg.addr = (desc->PhysicalStart + len - (cfg.size + extra_mem) & PAGE_MASK;
>>>>>
>>>>> As I said earlier. This extra_mem stuff is (maybe) wrong and should be fixed
>>>>> in one way or another. Hmmm... It looks OK. I will double check it because
>>>>> I do not looked at this code long time and maybe I am missing something.
>>>>
>>>> cfg.size needs to be the size of the trampolines + stack.
>>>
>>> It looks that during some code rearrangement I moved one instruction too
>>> much to trampoline_bios_setup. So, I can agree that right now cfg.size
>>> should be properly initialized. Though it should be cfg.size = 64 << 10.
>>> Then extra_mem should be dropped.
>>
>> That's fine as long as its clear that 64kb is for the trampoline + the
>> stack.
> 
> OK, but there are two stacks. We talk about "low-memory stack". I will improve
> the comment.
> 
> [...]
> 
>>>>>> memory region). You need to use AllocatePages() otherwise you are
>>>>>> trampling memory that might have been allocated by the bootloader or any
>>>>>
>>>>> Bootloader code/data should be dead here.
>>>>
>>>> Correct. Unfortunately on my Lenovo laptop and my Intel NUCs I can't
>>>> currently call ExitBootServices and a timer that iPXE has wired up has
>>>
>>> If you disable an important wheel in a machine you should not expect
>>> that the machine will work. Sorry! No way!
>>
>> Speak to your co-workers Konrad and Boris. We've had long email threads
>> about how certain hardware does not work with the way Xen calls
>> ExitBootServices.
> 
> Could you be more precise what is wrong? Or at least send links to
> relevant threads.

There have been several on the ML over the past 2 years. A quick Google
search turns these up.

http://xen.markmail.org/message/f6lx2ab4o2fch35r
https://lists.xenproject.org/archives/html/xen-devel/2015-01/msg03164.html


> 
>>>> some memory reserved down there and it was getting trampled. The real
>>>
>>> I still do not know why remnants of iPXE should run at this Xen boot stage.
>>> It looks like an iPXE bug and IMO it should be fixed first.
>>
>> Like I said above, its because on this machine I am unable to call Xen's
>> EBS.
> 
> I do not understand how ExitBootServices() call is related to iPXE timer remnants
> or so. Though if it is related somehow then I think that you should blame machine
> and/or iPXE designer/developer not Xen developer.

iPXE registers a callback for when EBS is called to clean up a timer.

> 
>>>> answer is that we need to fix up stock Xen to be able to always call EBS.
>>>
>>> It looks that ExitBootServices() is always called. So, I do not think that
>>> anything have to be fixed.
>>
>> It is commented out of this board using the patchset that Konrad
>> submitted to the ML years ago.
> 
> I do not know what patchset do you mean. Could you send it?
> 

See the above link.
Daniel Kiper Jan. 12, 2017, 11:44 p.m. UTC | #22
On Thu, Jan 12, 2017 at 04:20:00PM -0600, Doug Goldstein wrote:
> On 1/12/17 3:45 PM, Daniel Kiper wrote:
> > On Thu, Jan 12, 2017 at 01:46:41PM -0600, Doug Goldstein wrote:
> >> On 1/12/17 1:30 PM, Daniel Kiper wrote:
> >>> On Thu, Jan 12, 2017 at 09:44:59AM -0600, Doug Goldstein wrote:
> >>>> view there's no reason for adding MB2 support for BIOS since it provides
> >>>> no advantage over MB1 when booting from the BIOS. Now MB2 solves a
> >>>
> >>> From your point of view maybe it does not. However, from user point of view it may.
> >>> If you have support for MB2 on legacy BIOS and EFI platforms then you can boot Xen
> >>> on both platforms without changing anything in boot config files. Otherwise you have
> >>> to prepare separate configuration for different platforms.
> >>
> >> Neither Grub nor iPXE require different configs for MB1 vs MB2 so I'm
> >> not seeing the validity of this logic.
> >
> > Hmmm... This is interesting. I do not know iPXE, however, in GRUB you must
> > use multiboot/module for MB1 and multiboot2/module2 for MB2. I suppose that
> > you have to differentiate between both of them in iPXE somehow too. Hence,
> > there is pretty good chance that configs for MB1 and MB2 are different.
>
> multiboot/multiboot2 and module/module2 are aliases of each other. They
> work interchangeably. Its the same way in iPXE.

If you carefully look at GRUB2 code and how multiboot and multiboot2
modules are build you quickly realize that they are not aliases.
Though I do not how it works in iPXE.

> >>>> problem with booting over EFI vs MB1 so they'll be willing to take a
> >>>> change there. I'll also disagree that BIOS is easier than EFI since with
> >>>> EFI its just load the ELF into memory and set a few pointers in tags.
> >>>> With BIOS it requires me to build up the memory map into a MB2 structure.
> >>>
> >>> Xen uses only these tags on legacy BIOS platforms: MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
> >>> (well, nice to have but it can be also not provided), MULTIBOOT2_TAG_TYPE_MMAP (same
> >>> as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO), MULTIBOOT2_TAG_TYPE_BOOT_LOADER_NAME
> >>> (same as MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO) ,MULTIBOOT2_TAG_TYPE_CMDLINE,
> >>> MULTIBOOT2_TAG_TYPE_MODULE. I do not mention MULTIBOOT2_TAG_TYPE_END which
> >>> is obvious. So, if you are real hardcore minimalist then you have to provide
> >>> MULTIBOOT2_TAG_TYPE_CMDLINE and MULTIBOOT2_TAG_TYPE_MODULE. All of them
> >>> are provided also on EFI. So, I do not see any reason to not provide MB2
> >>> for legacy BIOS. And I do not think that it is very difficult to provide
> >>> all optional tags mentioned above.
> >>
> >> I don't understand what you're attempting to convey here. You've listed
> >> out a number of tags that I mentioned in my message that I don't have to
> >> implement for EFI. You've basically reinforced my point that its easier
> >> to implement this for EFI than BIOS. MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
> >> and MULTIBOOT2_TAG_TYPE_MMAP are unused by Xen on EFI. It gets this info
> >
> > I showed you that if you are real minimalist you can enable the same MB2 code
> > on legacy BIOS and EFI. I do not understand your objection against providing
> > MB2 in iPXE on legacy BIOS if you do not need extra code (maybe a few #ifdefs).
> > Though I am not going to convince you. It is your choice but I am still thinking
> > that it is wrong choice.
>
> Its not my choice. Its the feedback I've received from upstream.

OK, they are iPXE maintainers. Though it still does not change my
opinion about their decision.

> > By the way, does iPXE check MULTIBOOT2_HEADER_TAG_INFORMATION_REQUEST in Xen header.
> > If it does (it should) and do not understand MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO and
> > MULTIBOOT2_TAG_TYPE_MMAP then it should fail.
>
> It does but I know that Xen doesn't use that information if Boot
> Services are available by code inspection. Which is what my comments are
> related to.

I am not sure that you correctly understood what I mean. Please read
multiboot2 "Information request header tag" section for more details.
iPXE should obey this rule even if you do not provide MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO
and MULTIBOOT2_TAG_TYPE_MMAP. The same is relevant for other tags in
image header. To be precise: I mean that iPXE should complain if it
sees __REQUESTS__ for unknown tags.

> >> from a call to GetMemoryMap(). You actually reminded me of another bug.
> >> Calling ExitBootServices() on Grub and letting it pass the memory info
> >> causes Xen to fail to load.
> >
> > How come... Which GRUB version do you use? Xen clearly says that it needs
> > boot services (look into MB2 header). So, GRUB is not allowed to call
> > ExitBootServices(). If it does then it is GRUB bug.
>
> No. That's not how it works at all. To quote 3.1.12 of the Multiboot2
> spec...
>
> "This tag indicates that payload supports starting without terminating
> boot services."
>
> This tag is not required to be respected but instead means that the
> payload supports using boot services. Additionally section 3.6.3 which

Ahhh... Right, I forgot about that.

> talks about passing MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO states...
>
> "This tag may not be provided by some boot loaders on EFI platforms if
> EFI boot services are enabled and available for the loaded image (EFI
> boot services not terminated tag exists in Multiboot2 information
> structure)."
>
> And section 3.6.8 talks about passing MULTIBOOT2_TAG_TYPE_MMAP states...
>
> "This tag may not be provided by some boot loaders on EFI platforms if
> EFI boot services are enabled and available for the loaded image (EFI
> boot services not terminated tag exists in Multiboot2 information
> structure)."
>
> So for my iPXE support if the payload (in this case Xen) reports that it
> supports not having boot services exited then I don't exit it and I
> don't provide MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO or
> MULTIBOOT2_TAG_TYPE_MMAP. Considering the fact that if Xen has boot
> services available it ignores these two tags it seems what I've done
> complies with the spec and complies with the only known full
> implementation of MB2. (fwiw, the only implementations I know about are
> tboot and your Xen patches. I've written a small application which dumps
> out the data that was received but that was purely for debugging).

Yep, you can do that and I have never ever said that you have to provide both
above mentioned tags. Even on legacy BIOS platforms.

> >> Andrew helped me troubleshoot this and he discovered the fix. You've got
> >> code:
> >>
> >> /* Store Xen image load base address in place accessible for 32-bit code. */
> >> mov %r15d,%esi
> >>
> >> But if any of the checks under the run_bs: label specifically:
> >> - /* Are EFI boot services available? */
> >> - /* Is EFI SystemTable address provided by boot loader? */
> >> - /* Is EFI ImageHandle address provided by boot loader? */
> >>
> >> Will not run the mov instruction and then fail to boot. Its only if any
> >> of these are false will it attempt to use the tags mentioned above as well.
> >
> > OK, this is a bug and I will fix it. However, this is not related to
> > ExitBootServices() call in GRUB2.
>
> Well it is. Because if boot services are not available then to goes the
> path of the bug.

By chance you have triggered this bug by shutting down boot services in the
boot loader but it is also possible to do that in a bit different way
unrelated to the EFI stuff at all.

Daniel
Daniel Kiper Jan. 13, 2017, 12:04 a.m. UTC | #23
On Thu, Jan 12, 2017 at 04:23:59PM -0600, Doug Goldstein wrote:
> On 1/12/17 2:28 PM, Daniel Kiper wrote:
> > On Thu, Jan 12, 2017 at 09:52:15AM -0600, Doug Goldstein wrote:
> >> On 1/12/17 6:50 AM, Daniel Kiper wrote:
> >>> On Wed, Jan 11, 2017 at 02:20:15PM -0600, Doug Goldstein wrote:
> >>>> On 1/11/17 1:47 PM, Daniel Kiper wrote:
> >>>>> On Tue, Jan 10, 2017 at 02:51:27PM -0600, Doug Goldstein wrote:
> >>>>>> On 1/9/17 7:37 PM, Doug Goldstein wrote:

[...]

> >>>>>> memory region). You need to use AllocatePages() otherwise you are
> >>>>>> trampling memory that might have been allocated by the bootloader or any
> >>>>>
> >>>>> Bootloader code/data should be dead here.
> >>>>
> >>>> Correct. Unfortunately on my Lenovo laptop and my Intel NUCs I can't
> >>>> currently call ExitBootServices and a timer that iPXE has wired up has
> >>>
> >>> If you disable an important wheel in a machine you should not expect
> >>> that the machine will work. Sorry! No way!
> >>
> >> Speak to your co-workers Konrad and Boris. We've had long email threads
> >> about how certain hardware does not work with the way Xen calls
> >> ExitBootServices.
> >
> > Could you be more precise what is wrong? Or at least send links to
> > relevant threads.
>
> There have been several on the ML over the past 2 years. A quick Google
> search turns these up.
>
> http://xen.markmail.org/message/f6lx2ab4o2fch35r
> https://lists.xenproject.org/archives/html/xen-devel/2015-01/msg03164.html

This is more or less what I expected. However, IIRC, it was not related
to ExitBootServices() itself. The problem was that some runtime services
code lived in boot services code and data regions. So, I suppose that if
you map boot services code and data regions with runtime services code
and data everything should work. However, I have just realized that we
need an option to enable this functionality from GRUB2 command line.
Though you can do a test by setting map_bs to 1 at the beginning of
efi_multiboot2(). Do not forget to enable ExitBootServices() call in Xen.

> >>>> some memory reserved down there and it was getting trampled. The real
> >>>
> >>> I still do not know why remnants of iPXE should run at this Xen boot stage.
> >>> It looks like an iPXE bug and IMO it should be fixed first.
> >>
> >> Like I said above, its because on this machine I am unable to call Xen's
> >> EBS.
> >
> > I do not understand how ExitBootServices() call is related to iPXE timer remnants
> > or so. Though if it is related somehow then I think that you should blame machine
> > and/or iPXE designer/developer not Xen developer.
>
> iPXE registers a callback for when EBS is called to clean up a timer.

Could not you unregister this callback just before jump to the Xen image?
I do not think it is needed for Xen boot.

Daniel
Doug Goldstein Jan. 13, 2017, 12:35 a.m. UTC | #24
On 1/12/17 6:04 PM, Daniel Kiper wrote:
> On Thu, Jan 12, 2017 at 04:23:59PM -0600, Doug Goldstein wrote:
>> On 1/12/17 2:28 PM, Daniel Kiper wrote:
>>> On Thu, Jan 12, 2017 at 09:52:15AM -0600, Doug Goldstein wrote:
>>>> On 1/12/17 6:50 AM, Daniel Kiper wrote:
>>>>> On Wed, Jan 11, 2017 at 02:20:15PM -0600, Doug Goldstein wrote:
>>>>>> On 1/11/17 1:47 PM, Daniel Kiper wrote:
>>>>>>> On Tue, Jan 10, 2017 at 02:51:27PM -0600, Doug Goldstein wrote:
>>>>>>>> On 1/9/17 7:37 PM, Doug Goldstein wrote:
> 
> [...]
> 
>>>>>>>> memory region). You need to use AllocatePages() otherwise you are
>>>>>>>> trampling memory that might have been allocated by the bootloader or any
>>>>>>>
>>>>>>> Bootloader code/data should be dead here.
>>>>>>
>>>>>> Correct. Unfortunately on my Lenovo laptop and my Intel NUCs I can't
>>>>>> currently call ExitBootServices and a timer that iPXE has wired up has
>>>>>
>>>>> If you disable an important wheel in a machine you should not expect
>>>>> that the machine will work. Sorry! No way!
>>>>
>>>> Speak to your co-workers Konrad and Boris. We've had long email threads
>>>> about how certain hardware does not work with the way Xen calls
>>>> ExitBootServices.
>>>
>>> Could you be more precise what is wrong? Or at least send links to
>>> relevant threads.
>>
>> There have been several on the ML over the past 2 years. A quick Google
>> search turns these up.
>>
>> http://xen.markmail.org/message/f6lx2ab4o2fch35r
>> https://lists.xenproject.org/archives/html/xen-devel/2015-01/msg03164.html
> 
> This is more or less what I expected. However, IIRC, it was not related
> to ExitBootServices() itself. The problem was that some runtime services
> code lived in boot services code and data regions. So, I suppose that if
> you map boot services code and data regions with runtime services code
> and data everything should work. However, I have just realized that we
> need an option to enable this functionality from GRUB2 command line.
> Though you can do a test by setting map_bs to 1 at the beginning of
> efi_multiboot2(). Do not forget to enable ExitBootServices() call in Xen.
> 
>>>>>> some memory reserved down there and it was getting trampled. The real
>>>>>
>>>>> I still do not know why remnants of iPXE should run at this Xen boot stage.
>>>>> It looks like an iPXE bug and IMO it should be fixed first.
>>>>
>>>> Like I said above, its because on this machine I am unable to call Xen's
>>>> EBS.
>>>
>>> I do not understand how ExitBootServices() call is related to iPXE timer remnants
>>> or so. Though if it is related somehow then I think that you should blame machine
>>> and/or iPXE designer/developer not Xen developer.
>>
>> iPXE registers a callback for when EBS is called to clean up a timer.
> 
> Could not you unregister this callback just before jump to the Xen image?
> I do not think it is needed for Xen boot.

Yep. Already done and merged. But my point is we should prefer to use
AllocatePages() and only fall back to trampling any conventional memory
region if the call didn't work.
Doug Goldstein Jan. 13, 2017, 12:37 a.m. UTC | #25
On 1/12/17 6:04 PM, Daniel Kiper wrote:
> On Thu, Jan 12, 2017 at 04:23:59PM -0600, Doug Goldstein wrote:
>> On 1/12/17 2:28 PM, Daniel Kiper wrote:
>>> On Thu, Jan 12, 2017 at 09:52:15AM -0600, Doug Goldstein wrote:
>>>> On 1/12/17 6:50 AM, Daniel Kiper wrote:
>>>>> On Wed, Jan 11, 2017 at 02:20:15PM -0600, Doug Goldstein wrote:
>>>>>> On 1/11/17 1:47 PM, Daniel Kiper wrote:
>>>>>>> On Tue, Jan 10, 2017 at 02:51:27PM -0600, Doug Goldstein wrote:
>>>>>>>> On 1/9/17 7:37 PM, Doug Goldstein wrote:
> 
> [...]
> 
>>>>>>>> memory region). You need to use AllocatePages() otherwise you are
>>>>>>>> trampling memory that might have been allocated by the bootloader or any
>>>>>>>
>>>>>>> Bootloader code/data should be dead here.
>>>>>>
>>>>>> Correct. Unfortunately on my Lenovo laptop and my Intel NUCs I can't
>>>>>> currently call ExitBootServices and a timer that iPXE has wired up has
>>>>>
>>>>> If you disable an important wheel in a machine you should not expect
>>>>> that the machine will work. Sorry! No way!
>>>>
>>>> Speak to your co-workers Konrad and Boris. We've had long email threads
>>>> about how certain hardware does not work with the way Xen calls
>>>> ExitBootServices.
>>>
>>> Could you be more precise what is wrong? Or at least send links to
>>> relevant threads.
>>
>> There have been several on the ML over the past 2 years. A quick Google
>> search turns these up.
>>
>> http://xen.markmail.org/message/f6lx2ab4o2fch35r
>> https://lists.xenproject.org/archives/html/xen-devel/2015-01/msg03164.html
> 
> This is more or less what I expected. However, IIRC, it was not related
> to ExitBootServices() itself. The problem was that some runtime services
> code lived in boot services code and data regions. So, I suppose that if
> you map boot services code and data regions with runtime services code
> and data everything should work. However, I have just realized that we
> need an option to enable this functionality from GRUB2 command line.
> Though you can do a test by setting map_bs to 1 at the beginning of
> efi_multiboot2(). Do not forget to enable ExitBootServices() call in Xen.

On Intel NUCs, Super Micro boards and others, you are unable to call
ExitBootServices() without having called SetVirtualAddressMap(). If you
follow through both threads you'll see there's more than just map_bs.
Konrad also proposed adding /noexitbs

Patch
diff mbox

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index d423fd8..ac93df0 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -89,6 +89,13 @@  multiboot2_header_start:
                    0, /* Number of the lines - no preference. */ \
                    0  /* Number of bits per pixel - no preference. */
 
+        /* Inhibit bootloader from calling ExitBootServices(). */
+        mb2ht_init MB2_HT(EFI_BS), MB2_HT(OPTIONAL)
+
+        /* EFI64 entry point. */
+        mb2ht_init MB2_HT(ENTRY_ADDRESS_EFI64), MB2_HT(OPTIONAL), \
+                   sym_phys(__efi64_start)
+
         /* Multiboot2 header end tag. */
         mb2ht_init MB2_HT(END), MB2_HT(REQUIRED)
 .Lmultiboot2_header_end:
@@ -100,20 +107,48 @@  multiboot2_header_start:
 gdt_boot_descr:
         .word   6*8-1
         .long   sym_phys(trampoline_gdt)
+        .long   0 /* Needed for 64-bit lgdt */
+
+        .align 4
+vga_text_buffer:
+        .long   0xb8000
+
+efi_platform:
+        .byte   0
 
 .Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
 .Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
+.Lbad_ldr_nbs: .asciz "ERR: Bootloader shutdown EFI x64 boot services!"
+.Lbad_ldr_nst: .asciz "ERR: EFI SystemTable is not provided by bootloader!"
+.Lbad_ldr_nih: .asciz "ERR: EFI ImageHandle is not provided by bootloader!"
+.Lbad_efi_msg: .asciz "ERR: EFI IA-32 platforms are not supported!"
 
         .section .init.text, "ax", @progbits
 
 bad_cpu:
         mov     $(sym_phys(.Lbad_cpu_msg)),%esi # Error message
-        jmp     print_err
+        jmp     .Lget_vtb
 not_multiboot:
         mov     $(sym_phys(.Lbad_ldr_msg)),%esi # Error message
-print_err:
-        mov     $0xB8000,%edi  # VGA framebuffer
-1:      mov     (%esi),%bl
+        jmp     .Lget_vtb
+.Lmb2_no_st:
+        mov     $(sym_phys(.Lbad_ldr_nst)),%esi # Error message
+        jmp     .Lget_vtb
+.Lmb2_no_ih:
+        mov     $(sym_phys(.Lbad_ldr_nih)),%esi # Error message
+        jmp     .Lget_vtb
+.Lmb2_no_bs:
+        mov     $(sym_phys(.Lbad_ldr_nbs)),%esi # Error message
+        xor     %edi,%edi                       # No VGA text buffer
+        jmp     .Lsend_chr
+.Lmb2_efi_ia_32:
+        mov     $(sym_phys(.Lbad_efi_msg)),%esi # Error message
+        xor     %edi,%edi                       # No VGA text buffer
+        jmp     .Lsend_chr
+.Lget_vtb:
+        mov     sym_phys(vga_text_buffer),%edi
+.Lsend_chr:
+        mov     (%esi),%bl
         test    %bl,%bl        # Terminate on '\0' sentinel
         je      .Lhalt
         mov     $0x3f8+5,%dx   # UART Line Status Register
@@ -123,13 +158,186 @@  print_err:
         mov     $0x3f8+0,%dx   # UART Transmit Holding Register
         mov     %bl,%al
         out     %al,%dx        # Send a character over the serial line
-        movsb                  # Write a character to the VGA framebuffer
+        test    %edi,%edi      # Is the VGA text buffer available?
+        jz      .Lsend_chr
+        movsb                  # Write a character to the VGA text buffer
         mov     $7,%al
-        stosb                  # Write an attribute to the VGA framebuffer
-        jmp     1b
+        stosb                  # Write an attribute to the VGA text buffer
+        jmp     .Lsend_chr
 .Lhalt: hlt
         jmp     .Lhalt
 
+        .code64
+
+__efi64_start:
+        cld
+
+        /* VGA is not available on EFI platforms. */
+        movl   $0,vga_text_buffer(%rip)
+
+        /* Check for Multiboot2 bootloader. */
+        cmp     $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
+        je      .Lefi_multiboot2_proto
+
+        /* Jump to not_multiboot after switching CPU to x86_32 mode. */
+        lea     not_multiboot(%rip),%edi
+        jmp     x86_32_switch
+
+.Lefi_multiboot2_proto:
+        /* Zero EFI SystemTable and EFI ImageHandle addresses. */
+        xor     %esi,%esi
+        xor     %edi,%edi
+
+        /* Skip Multiboot2 information fixed part. */
+        lea     (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%rbx),%ecx
+        and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
+
+.Lefi_mb2_tsize:
+        /* Check Multiboot2 information total size. */
+        mov     %ecx,%r8d
+        sub     %ebx,%r8d
+        cmp     %r8d,MB2_fixed_total_size(%rbx)
+        jbe     run_bs
+
+        /* Are EFI boot services available? */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI_BS,MB2_tag_type(%rcx)
+        jne     .Lefi_mb2_st
+
+        /* We are on EFI platform and EFI boot services are available. */
+        incb    efi_platform(%rip)
+
+        /*
+         * Disable real mode and other legacy stuff which should not
+         * be run on EFI platforms.
+         */
+        incb    skip_realmode(%rip)
+        jmp     .Lefi_mb2_next_tag
+
+.Lefi_mb2_st:
+        /* Get EFI SystemTable address from Multiboot2 information. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%rcx)
+        cmove   MB2_efi64_st(%rcx),%rsi
+        je      .Lefi_mb2_next_tag
+
+        /* Get EFI ImageHandle address from Multiboot2 information. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64_IH,MB2_tag_type(%rcx)
+        cmove   MB2_efi64_ih(%rcx),%rdi
+        je      .Lefi_mb2_next_tag
+
+        /* Is it the end of Multiboot2 information? */
+        cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%rcx)
+        je      run_bs
+
+.Lefi_mb2_next_tag:
+        /* Go to next Multiboot2 information tag. */
+        add     MB2_tag_size(%rcx),%ecx
+        add     $(MULTIBOOT2_TAG_ALIGN-1),%ecx
+        and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
+        jmp     .Lefi_mb2_tsize
+
+run_bs:
+        /* Are EFI boot services available? */
+        cmpb    $0,efi_platform(%rip)
+        jnz     0f
+
+        /* Jump to .Lmb2_no_bs after switching CPU to x86_32 mode. */
+        lea     .Lmb2_no_bs(%rip),%edi
+        jmp     x86_32_switch
+
+0:
+        /* Is EFI SystemTable address provided by boot loader? */
+        test    %rsi,%rsi
+        jnz     1f
+
+        /* Jump to .Lmb2_no_st after switching CPU to x86_32 mode. */
+        lea     .Lmb2_no_st(%rip),%edi
+        jmp     x86_32_switch
+
+1:
+        /* Is EFI ImageHandle address provided by boot loader? */
+        test    %rdi,%rdi
+        jnz     2f
+
+        /* Jump to .Lmb2_no_ih after switching CPU to x86_32 mode. */
+        lea     .Lmb2_no_ih(%rip),%edi
+        jmp     x86_32_switch
+
+2:
+        push    %rax
+        push    %rdi
+
+        /*
+         * Initialize BSS (no nasty surprises!).
+         * It must be done earlier than in BIOS case
+         * because efi_multiboot2() touches it.
+         */
+        lea     __bss_start(%rip),%edi
+        lea     __bss_end(%rip),%ecx
+        sub     %edi,%ecx
+        shr     $3,%ecx
+        xor     %eax,%eax
+        rep stosq
+
+        pop     %rdi
+
+        /*
+         * efi_multiboot2() is called according to System V AMD64 ABI:
+         *   - IN: %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
+         *   - OUT: %rax - highest usable memory address below 1 MiB;
+         *                 memory above this address is reserved for trampoline;
+         *                 memory below this address is used as a storage for
+         *                 mbi struct created in reloc().
+         *
+         * MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO tag is not provided
+         * on EFI platforms. Hence, it could not be used like
+         * on legacy BIOS platforms.
+         */
+        call    efi_multiboot2
+
+        /* Convert memory address to bytes/16 and store it in safe place. */
+        shr     $4,%eax
+        mov     %eax,%ecx
+
+        pop     %rax
+
+        /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
+        lea     trampoline_setup(%rip),%edi
+
+x86_32_switch:
+        cli
+
+        /* Initialize GDTR. */
+        lgdt    gdt_boot_descr(%rip)
+
+        /* Reload code selector. */
+        pushq   $BOOT_CS32
+        lea     cs32_switch(%rip),%edx
+        push    %rdx
+        lretq
+
+        .code32
+
+cs32_switch:
+        /* Initialize basic data segments. */
+        mov     $BOOT_DS,%edx
+        mov     %edx,%ds
+        mov     %edx,%es
+        mov     %edx,%ss
+        /* %esp is initialized later. */
+
+        /* Load null descriptor to unused segment registers. */
+        xor     %edx,%edx
+        mov     %edx,%fs
+        mov     %edx,%gs
+
+        /* Disable paging. */
+        mov     %cr0,%edx
+        and     $(~X86_CR0_PG),%edx
+        mov     %edx,%cr0
+
+        /* Jump to earlier loaded address. */
+        jmp     *%edi
+
 __start:
         cld
         cli
@@ -157,7 +365,7 @@  __start:
 
         /* Not available? BDA value will be fine. */
         cmovnz  MB_mem_lower(%ebx),%edx
-        jmp     trampoline_setup
+        jmp     trampoline_bios_setup
 
 .Lmultiboot2_proto:
         /* Skip Multiboot2 information fixed part. */
@@ -169,24 +377,33 @@  __start:
         mov     %ecx,%edi
         sub     %ebx,%edi
         cmp     %edi,MB2_fixed_total_size(%ebx)
-        jbe     trampoline_setup
+        jbe     trampoline_bios_setup
 
         /* Get mem_lower from Multiboot2 information. */
         cmpl    $MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO,MB2_tag_type(%ecx)
         cmove   MB2_mem_lower(%ecx),%edx
-        je      trampoline_setup
+        je      .Lmb2_next_tag
+
+        /* EFI IA-32 platforms are not supported. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI32,MB2_tag_type(%ecx)
+        je      .Lmb2_efi_ia_32
+
+        /* Bootloader shutdown EFI x64 boot services. */
+        cmpl    $MULTIBOOT2_TAG_TYPE_EFI64,MB2_tag_type(%ecx)
+        je      .Lmb2_no_bs
 
         /* Is it the end of Multiboot2 information? */
         cmpl    $MULTIBOOT2_TAG_TYPE_END,MB2_tag_type(%ecx)
-        je      trampoline_setup
+        je      trampoline_bios_setup
 
+.Lmb2_next_tag:
         /* Go to next Multiboot2 information tag. */
         add     MB2_tag_size(%ecx),%ecx
         add     $(MULTIBOOT2_TAG_ALIGN-1),%ecx
         and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
         jmp     .Lmb2_tsize
 
-trampoline_setup:
+trampoline_bios_setup:
         /* Set up trampoline segment 64k below EBDA */
         movzwl  0x40e,%ecx          /* EBDA segment */
         cmp     $0xa000,%ecx        /* sanity check (high) */
@@ -202,16 +419,19 @@  trampoline_setup:
          * multiboot structure (if available) and use the smallest.
          */
         cmp     $0x100,%edx         /* is the multiboot value too small? */
-        jb      2f                  /* if so, do not use it */
+        jb      trampoline_setup    /* if so, do not use it */
         shl     $10-4,%edx
         cmp     %ecx,%edx           /* compare with BDA value */
         cmovb   %edx,%ecx           /* and use the smaller */
 
-2:      /* Reserve 64kb for the trampoline */
+        /* Reserve 64kb for the trampoline. */
         sub     $0x1000,%ecx
 
         /* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
         xor     %cl, %cl
+
+trampoline_setup:
+        /* Save trampoline address for later use. */
         shl     $4, %ecx
         mov     %ecx,sym_phys(trampoline_phys)
 
@@ -223,7 +443,14 @@  trampoline_setup:
         call    reloc
         mov     %eax,sym_phys(multiboot_ptr)
 
-        /* Initialize BSS (no nasty surprises!) */
+        /*
+         * Do not zero BSS on EFI platform here.
+         * It was initialized earlier.
+         */
+        cmpb    $0,sym_phys(efi_platform)
+        jnz     1f
+
+        /* Initialize BSS (no nasty surprises!). */
         mov     $sym_phys(__bss_start),%edi
         mov     $sym_phys(__bss_end),%ecx
         sub     %edi,%ecx
@@ -231,6 +458,7 @@  trampoline_setup:
         shr     $2,%ecx
         rep stosl
 
+1:
         /* Interrogate CPU extended features via CPUID. */
         mov     $0x80000000,%eax
         cpuid
@@ -282,8 +510,13 @@  trampoline_setup:
         cmp     $sym_phys(__trampoline_seg_stop),%edi
         jb      1b
 
+        /* Do not parse command line on EFI platform here. */
+        cmpb    $0,sym_phys(efi_platform)
+        jnz     1f
+
         call    cmdline_parse_early
 
+1:
         /* Switch to low-memory stack.  */
         mov     sym_phys(trampoline_phys),%edi
         lea     0x10000(%edi),%esp
diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
index 62c010e..dc857d8 100644
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -146,6 +146,8 @@  static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
 {
     struct e820entry *e;
     unsigned int i;
+    /* Check for extra mem for mbi data if Xen is loaded via multiboot2 protocol. */
+    UINTN extra_mem = efi_enabled(EFI_LOADER) ? 0 : (64 << 10);
 
     /* Populate E820 table and check trampoline area availability. */
     e = e820map - 1;
@@ -168,7 +170,8 @@  static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
             /* fall through */
         case EfiConventionalMemory:
             if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 &&
-                 len >= cfg.size && desc->PhysicalStart + len > cfg.addr )
+                 len >= cfg.size + extra_mem &&
+                 desc->PhysicalStart + len > cfg.addr )
                 cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK;
             /* fall through */
         case EfiLoaderCode:
@@ -210,12 +213,14 @@  static void *__init efi_arch_allocate_mmap_buffer(UINTN map_size)
 
 static void __init efi_arch_pre_exit_boot(void)
 {
-    if ( !trampoline_phys )
-    {
-        if ( !cfg.addr )
-            blexit(L"No memory for trampoline");
+    if ( trampoline_phys )
+        return;
+
+    if ( !cfg.addr )
+        blexit(L"No memory for trampoline");
+
+    if ( efi_enabled(EFI_LOADER) )
         relocate_trampoline(cfg.addr);
-    }
 }
 
 static void __init noreturn efi_arch_post_exit_boot(void)
@@ -653,6 +658,43 @@  static bool_t __init efi_arch_use_config_file(EFI_SYSTEM_TABLE *SystemTable)
 
 static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
 
+paddr_t __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
+{
+    EFI_GRAPHICS_OUTPUT_PROTOCOL *gop;
+    UINTN cols, gop_mode = ~0, rows;
+
+    __set_bit(EFI_BOOT, &efi_flags);
+    __set_bit(EFI_RS, &efi_flags);
+
+    efi_init(ImageHandle, SystemTable);
+
+    efi_console_set_mode();
+
+    if ( StdOut->QueryMode(StdOut, StdOut->Mode->Mode,
+                           &cols, &rows) == EFI_SUCCESS )
+        efi_arch_console_init(cols, rows);
+
+    gop = efi_get_gop();
+
+    if ( gop )
+        gop_mode = efi_find_gop_mode(gop, 0, 0, 0);
+
+    efi_arch_edd();
+    efi_arch_cpu();
+
+    efi_tables();
+    setup_efi_pci();
+    efi_variables();
+
+    if ( gop )
+        efi_set_gop_mode(gop, gop_mode);
+
+    efi_exit_boot(ImageHandle, SystemTable);
+
+    /* Return highest usable memory address below 1 MiB. */
+    return cfg.addr;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/efi/stub.c b/xen/arch/x86/efi/stub.c
index 4158124..6ea6aa1 100644
--- a/xen/arch/x86/efi/stub.c
+++ b/xen/arch/x86/efi/stub.c
@@ -3,6 +3,44 @@ 
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <asm/page.h>
+#include <asm/efibind.h>
+#include <efi/efidef.h>
+#include <efi/eficapsule.h>
+#include <efi/eficon.h>
+#include <efi/efidevp.h>
+#include <efi/efiapi.h>
+
+/*
+ * Here we are in EFI stub. EFI calls are not supported due to lack
+ * of relevant functionality in compiler and/or linker.
+ *
+ * efi_multiboot2() is an exception. Please look below for more details.
+ */
+
+paddr_t __init noreturn efi_multiboot2(EFI_HANDLE ImageHandle,
+                                       EFI_SYSTEM_TABLE *SystemTable)
+{
+    CHAR16 *err = L"Xen does not have EFI code build in!!!\r\nSystem halted!!!\r\n";
+    SIMPLE_TEXT_OUTPUT_INTERFACE *StdErr;
+
+    StdErr = SystemTable->StdErr ? SystemTable->StdErr : SystemTable->ConOut;
+
+    /*
+     * Print error message and halt the system.
+     *
+     * We have to open code MS x64 calling convention
+     * in assembly because here this convention may
+     * not be directly supported by C compiler.
+     */
+    asm volatile(
+    "    call %2                      \n"
+    "0:  hlt                          \n"
+    "    jmp  0b                      \n"
+       : "+c" (StdErr), "+d" (err) : "g" (StdErr->OutputString)
+       : "rax", "r8", "r9", "r10", "r11", "memory");
+
+    unreachable();
+}
 
 bool efi_enabled(unsigned int feature)
 {
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index b437a8f..2a22659 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -175,6 +175,8 @@  void __dummy__(void)
     OFFSET(MB2_tag_type, multiboot2_tag_t, type);
     OFFSET(MB2_tag_size, multiboot2_tag_t, size);
     OFFSET(MB2_mem_lower, multiboot2_tag_basic_meminfo_t, mem_lower);
+    OFFSET(MB2_efi64_st, multiboot2_tag_efi64_t, pointer);
+    OFFSET(MB2_efi64_ih, multiboot2_tag_efi64_ih_t, pointer);
     BLANK();
 
     OFFSET(DOMAIN_vm_assist, struct domain, vm_assist);
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index b0b1c9b..e0e2529 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -331,5 +331,5 @@  ASSERT(IS_ALIGNED(__init_end,   PAGE_SIZE), "__init_end misaligned")
 
 ASSERT(IS_ALIGNED(trampoline_start, 4), "trampoline_start misaligned")
 ASSERT(IS_ALIGNED(trampoline_end,   4), "trampoline_end misaligned")
-ASSERT(IS_ALIGNED(__bss_start,      4), "__bss_start misaligned")
-ASSERT(IS_ALIGNED(__bss_end,        4), "__bss_end misaligned")
+ASSERT(IS_ALIGNED(__bss_start,      8), "__bss_start misaligned")
+ASSERT(IS_ALIGNED(__bss_end,        8), "__bss_end misaligned")
diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
index 0a93e61..70ed836 100644
--- a/xen/common/efi/boot.c
+++ b/xen/common/efi/boot.c
@@ -79,6 +79,17 @@  static size_t wstrlen(const CHAR16 * s);
 static int set_color(u32 mask, int bpp, u8 *pos, u8 *sz);
 static bool_t match_guid(const EFI_GUID *guid1, const EFI_GUID *guid2);
 
+static void efi_init(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
+static void efi_console_set_mode(void);
+static EFI_GRAPHICS_OUTPUT_PROTOCOL *efi_get_gop(void);
+static UINTN efi_find_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop,
+                               UINTN cols, UINTN rows, UINTN depth);
+static void efi_tables(void);
+static void setup_efi_pci(void);
+static void efi_variables(void);
+static void efi_set_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, UINTN gop_mode);
+static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable);
+
 static const EFI_BOOT_SERVICES *__initdata efi_bs;
 static UINT32 __initdata efi_bs_revision;
 static EFI_HANDLE __initdata efi_ih;