diff mbox series

[2/3] x86/boot: Refactor BIOS/PVH start

Message ID 20240910161612.242702-3-frediano.ziglio@cloud.com (mailing list archive)
State Superseded
Headers show
Series x86/boot: Reduce assembly code | expand

Commit Message

Frediano Ziglio Sept. 10, 2024, 4:16 p.m. UTC
The 2 code paths were sharing quite some common code, reuse it instead
of having duplications.
Use %ebp register to store boot type before running common code.

Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
---
 xen/arch/x86/boot/head.S | 113 +++++++++++++++------------------------
 1 file changed, 44 insertions(+), 69 deletions(-)

Comments

Jan Beulich Sept. 15, 2024, 6:16 a.m. UTC | #1
On 10.09.2024 18:16, Frediano Ziglio wrote:
> --- a/xen/arch/x86/boot/head.S
> +++ b/xen/arch/x86/boot/head.S
> @@ -25,6 +25,9 @@
>  #define MB2_HT(name)      (MULTIBOOT2_HEADER_TAG_##name)
>  #define MB2_TT(name)      (MULTIBOOT2_TAG_TYPE_##name)
>  
> +#define BOOT_TYPE_BIOS 1
> +#define BOOT_TYPE_PVH 2

Did you consider using 0 and 1, to be able to use XOR on the BIOS
path and TEST for checking?

> @@ -409,13 +412,28 @@ cs32_switch:
>  ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long sym_offs(__pvh_start))
>  
>  __pvh_start:
> -        cld
>          cli
> +        mov     $BOOT_TYPE_PVH, %ebp
> +        jmp     common_bios_pvh
> +#endif /* CONFIG_PVH_GUEST */
> +
> +__start:
> +        cli
> +        mov     $BOOT_TYPE_BIOS, %ebp
> +
> +common_bios_pvh:

I think labels like this one don't need to show up in the symbol
table, and hence would better start with .L.

> +        cld

So you fold the STDs but not the STIs, despite that not even having
been first on the PVH path. This decision wants explaining in the
description, even if just briefly.

> @@ -433,14 +451,9 @@ __pvh_start:
>          /* Set up stack. */
>          lea     STACK_SIZE - CPUINFO_sizeof + sym_esi(cpu0_stack), %esp
>  
> -        call    initialise_bss

I'm little puzzled: The previous patch moved it "as early as possible"
just for it to be moved to a later point again here?

> -        mov     %ebx, sym_esi(pvh_start_info_pa)
> -
> -        /* Force xen console.  Will revert to user choice in init code. */
> -        movb    $-1, sym_esi(opt_console_xen)
> -
> -        /* Prepare gdt and segments */
> +        /*
> +         * Initialize GDTR and basic data segments.
> +         */
>          add     %esi, sym_esi(gdt_boot_base)
>          lgdt    sym_esi(gdt_boot_descr)

No real need to change the comment? In any even it wants to remain a
single-line one.

> @@ -449,67 +462,44 @@ __pvh_start:
>          mov     %ecx, %es
>          mov     %ecx, %ss
>  
> -        /* Skip bootloader setup and bios setup, go straight to trampoline */
> -        movb    $1, sym_esi(pvh_boot)
> -        movb    $1, sym_esi(skip_realmode)
> -
> -        /* Set trampoline_phys to use mfn 1 to avoid having a mapping at VA 0 */
> -        movw    $0x1000, sym_esi(trampoline_phys)
> -        mov     (%ebx), %eax /* mov $XEN_HVM_START_MAGIC_VALUE, %eax */
> -        jmp     trampoline_setup
> -
> -#endif /* CONFIG_PVH_GUEST */
> +        /* Load null descriptor to unused segment registers. */
> +        xor     %ecx, %ecx
> +        mov     %ecx, %fs
> +        mov     %ecx, %gs

The comment said "descriptor", yes, but it's a null selector that
we load here. Perhaps worth adjusting as the comment is moved.

> -initialise_bss:
>          /*
>           * Initialise the BSS.
>           *
>           * !!! WARNING - also zeroes the current stack !!!
>           */
> -        pop     %ebp
>          mov     %eax, %edx
> -
>          lea     sym_esi(__bss_start), %edi
>          lea     sym_esi(__bss_end), %ecx
>          sub     %edi, %ecx
>          xor     %eax, %eax
>          shr     $2, %ecx
>          rep stosl
> -
>          mov     %edx, %eax
> -        jmp     *%ebp
> -
> -__start:
> -        cld
> -        cli
>  
> -        /*
> -         * Multiboot (both 1 and 2) specify the stack pointer as undefined
> -         * when entering in BIOS circumstances.  This is unhelpful for
> -         * relocatable images, where one call (i.e. push) is required to
> -         * calculate the image's load address.
> -         *
> -         * This early in boot, there is one area of memory we know about with
> -         * reasonable confidence that it isn't overlapped by Xen, and that's
> -         * the Multiboot info structure in %ebx.  Use it as a temporary stack.
> -         */
> -
> -        /* Preserve the field we're about to clobber. */
> -        mov     (%ebx), %edx
> -        lea     4(%ebx), %esp
> +#ifdef CONFIG_PVH_GUEST
> +        cmp     $BOOT_TYPE_PVH, %ebp
> +        jne     1f
>  
> -        /* Calculate the load base address. */
> -        call    1f
> -1:      pop     %esi
> -        sub     $sym_offs(1b), %esi
> +        mov     %ebx, sym_esi(pvh_start_info_pa)
>  
> -        /* Restore the clobbered field. */
> -        mov     %edx, (%ebx)
> +        /* Force xen console.  Will revert to user choice in init code. */
> +        movb    $-1, sym_esi(opt_console_xen)
>  
> -        /* Set up stack. */
> -        lea     STACK_SIZE - CPUINFO_sizeof + sym_esi(cpu0_stack), %esp
> +        /* Skip bootloader setup and bios setup, go straight to trampoline */
> +        movb    $1, sym_esi(pvh_boot)
> +        movb    $1, sym_esi(skip_realmode)
>  
> -        call    initialise_bss
> +        /* Set trampoline_phys to use mfn 1 to avoid having a mapping at VA 0 */
> +        movw    $0x1000, sym_esi(trampoline_phys)

I realize you merely move this, yet I question the use of MOVW here.
You use 32-bit operations e.g. to set %ebp; perhaps the same should
be done here, even if that means a 1 byte code size increase. In any
even this would preferably use PAGE_SIZE imo.

Jan
Frediano Ziglio Sept. 16, 2024, 8:02 a.m. UTC | #2
On Sun, Sep 15, 2024 at 7:16 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 10.09.2024 18:16, Frediano Ziglio wrote:
> > --- a/xen/arch/x86/boot/head.S
> > +++ b/xen/arch/x86/boot/head.S
> > @@ -25,6 +25,9 @@
> >  #define MB2_HT(name)      (MULTIBOOT2_HEADER_TAG_##name)
> >  #define MB2_TT(name)      (MULTIBOOT2_TAG_TYPE_##name)
> >
> > +#define BOOT_TYPE_BIOS 1
> > +#define BOOT_TYPE_PVH 2
>
> Did you consider using 0 and 1, to be able to use XOR on the BIOS
> path and TEST for checking?
>

Not clear what you are trying to achieve. Fewer bytes using the XOR? I
think the TEST in this case is only reducing readability, it's an
enumeration.
If you are concerns about code size I would use an 8 bit register (I
would say DL) and use EBP register to temporary save EAX, 8 bit
registers have usually tiny instructions, MOV has same size as XOR you
mentioned not loosing any readability or forcing to change values.

> > @@ -409,13 +412,28 @@ cs32_switch:
> >  ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long sym_offs(__pvh_start))
> >
> >  __pvh_start:
> > -        cld
> >          cli
> > +        mov     $BOOT_TYPE_PVH, %ebp
> > +        jmp     common_bios_pvh
> > +#endif /* CONFIG_PVH_GUEST */
> > +
> > +__start:
> > +        cli
> > +        mov     $BOOT_TYPE_BIOS, %ebp
> > +
> > +common_bios_pvh:
>
> I think labels like this one don't need to show up in the symbol
> table, and hence would better start with .L.
>

Done

> > +        cld
>
> So you fold the STDs but not the STIs, despite that not even having
> been first on the PVH path. This decision wants explaining in the
> description, even if just briefly.
>

Just in case, I disable interrupts ASAP. Not that this should change
much the result.
Would you prefer to fold it too?
By "description" do you mean having an additional comment in the code
or something in the commit message?

> > @@ -433,14 +451,9 @@ __pvh_start:
> >          /* Set up stack. */
> >          lea     STACK_SIZE - CPUINFO_sizeof + sym_esi(cpu0_stack), %esp
> >
> > -        call    initialise_bss
>
> I'm little puzzled: The previous patch moved it "as early as possible"
> just for it to be moved to a later point again here?
>

The rationale is being able to use C, for that you need a stack,
correct segments and BSS.
Are you suggesting any change?

> > -        mov     %ebx, sym_esi(pvh_start_info_pa)
> > -
> > -        /* Force xen console.  Will revert to user choice in init code. */
> > -        movb    $-1, sym_esi(opt_console_xen)
> > -
> > -        /* Prepare gdt and segments */
> > +        /*
> > +         * Initialize GDTR and basic data segments.
> > +         */
> >          add     %esi, sym_esi(gdt_boot_base)
> >          lgdt    sym_esi(gdt_boot_descr)
>
> No real need to change the comment? In any even it wants to remain a
> single-line one.
>

It's not changed, it's the exact comment from the second copy of this
code (the BIOS path).
I think this comment is more clear than the first, I'll change to be one line.

> > @@ -449,67 +462,44 @@ __pvh_start:
> >          mov     %ecx, %es
> >          mov     %ecx, %ss
> >
> > -        /* Skip bootloader setup and bios setup, go straight to trampoline */
> > -        movb    $1, sym_esi(pvh_boot)
> > -        movb    $1, sym_esi(skip_realmode)
> > -
> > -        /* Set trampoline_phys to use mfn 1 to avoid having a mapping at VA 0 */
> > -        movw    $0x1000, sym_esi(trampoline_phys)
> > -        mov     (%ebx), %eax /* mov $XEN_HVM_START_MAGIC_VALUE, %eax */
> > -        jmp     trampoline_setup
> > -
> > -#endif /* CONFIG_PVH_GUEST */
> > +        /* Load null descriptor to unused segment registers. */
> > +        xor     %ecx, %ecx
> > +        mov     %ecx, %fs
> > +        mov     %ecx, %gs
>
> The comment said "descriptor", yes, but it's a null selector that
> we load here. Perhaps worth adjusting as the comment is moved.
>

I'll do.

> > -initialise_bss:
> >          /*
> >           * Initialise the BSS.
> >           *
> >           * !!! WARNING - also zeroes the current stack !!!
> >           */
> > -        pop     %ebp
> >          mov     %eax, %edx
> > -
> >          lea     sym_esi(__bss_start), %edi
> >          lea     sym_esi(__bss_end), %ecx
> >          sub     %edi, %ecx
> >          xor     %eax, %eax
> >          shr     $2, %ecx
> >          rep stosl
> > -
> >          mov     %edx, %eax
> > -        jmp     *%ebp
> > -
> > -__start:
> > -        cld
> > -        cli
> >
> > -        /*
> > -         * Multiboot (both 1 and 2) specify the stack pointer as undefined
> > -         * when entering in BIOS circumstances.  This is unhelpful for
> > -         * relocatable images, where one call (i.e. push) is required to
> > -         * calculate the image's load address.
> > -         *
> > -         * This early in boot, there is one area of memory we know about with
> > -         * reasonable confidence that it isn't overlapped by Xen, and that's
> > -         * the Multiboot info structure in %ebx.  Use it as a temporary stack.
> > -         */
> > -
> > -        /* Preserve the field we're about to clobber. */
> > -        mov     (%ebx), %edx
> > -        lea     4(%ebx), %esp
> > +#ifdef CONFIG_PVH_GUEST
> > +        cmp     $BOOT_TYPE_PVH, %ebp
> > +        jne     1f
> >
> > -        /* Calculate the load base address. */
> > -        call    1f
> > -1:      pop     %esi
> > -        sub     $sym_offs(1b), %esi
> > +        mov     %ebx, sym_esi(pvh_start_info_pa)
> >
> > -        /* Restore the clobbered field. */
> > -        mov     %edx, (%ebx)
> > +        /* Force xen console.  Will revert to user choice in init code. */
> > +        movb    $-1, sym_esi(opt_console_xen)
> >
> > -        /* Set up stack. */
> > -        lea     STACK_SIZE - CPUINFO_sizeof + sym_esi(cpu0_stack), %esp
> > +        /* Skip bootloader setup and bios setup, go straight to trampoline */
> > +        movb    $1, sym_esi(pvh_boot)
> > +        movb    $1, sym_esi(skip_realmode)
> >
> > -        call    initialise_bss
> > +        /* Set trampoline_phys to use mfn 1 to avoid having a mapping at VA 0 */
> > +        movw    $0x1000, sym_esi(trampoline_phys)
>
> I realize you merely move this, yet I question the use of MOVW here.
> You use 32-bit operations e.g. to set %ebp; perhaps the same should
> be done here, even if that means a 1 byte code size increase. In any
> even this would preferably use PAGE_SIZE imo.
>

So movl instead of movw and PAGE_SIZE instead of 0x1000. I'll do.

> Jan

Frediano
Jan Beulich Sept. 16, 2024, 11:58 a.m. UTC | #3
On 16.09.2024 10:02, Frediano Ziglio wrote:
> On Sun, Sep 15, 2024 at 7:16 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 10.09.2024 18:16, Frediano Ziglio wrote:
>>> --- a/xen/arch/x86/boot/head.S
>>> +++ b/xen/arch/x86/boot/head.S
>>> @@ -25,6 +25,9 @@
>>>  #define MB2_HT(name)      (MULTIBOOT2_HEADER_TAG_##name)
>>>  #define MB2_TT(name)      (MULTIBOOT2_TAG_TYPE_##name)
>>>
>>> +#define BOOT_TYPE_BIOS 1
>>> +#define BOOT_TYPE_PVH 2
>>
>> Did you consider using 0 and 1, to be able to use XOR on the BIOS
>> path and TEST for checking?
>>
> 
> Not clear what you are trying to achieve. Fewer bytes using the XOR? I
> think the TEST in this case is only reducing readability, it's an
> enumeration.

Except that in practice we don't really need an enum here; a boolean
would suffice.

> If you are concerns about code size I would use an 8 bit register (I
> would say DL) and use EBP register to temporary save EAX, 8 bit
> registers have usually tiny instructions, MOV has same size as XOR you
> mentioned not loosing any readability or forcing to change values.

No, it's not code size alone. It's larger code size for no good reason.
Using 8-bit ops is an option when you're truly code size constrained,
yet that's not the case here. Hence my take is - use the cheapest insns
possible.

>>> +        cld
>>
>> So you fold the STDs but not the STIs, despite that not even having
>> been first on the PVH path. This decision wants explaining in the
>> description, even if just briefly.
> 
> Just in case, I disable interrupts ASAP. Not that this should change
> much the result.
> Would you prefer to fold it too?

That or explain the difference.

> By "description" do you mean having an additional comment in the code
> or something in the commit message?

Sorry, I use "description" as a common synonym for "commit message".

>>> @@ -433,14 +451,9 @@ __pvh_start:
>>>          /* Set up stack. */
>>>          lea     STACK_SIZE - CPUINFO_sizeof + sym_esi(cpu0_stack), %esp
>>>
>>> -        call    initialise_bss
>>
>> I'm little puzzled: The previous patch moved it "as early as possible"
>> just for it to be moved to a later point again here?
>>
> 
> The rationale is being able to use C, for that you need a stack,
> correct segments and BSS.
> Are you suggesting any change?

I'm trying to understand the sum effect of both changes, which appear
to be moving in opposite direction.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index de118d72f2..a91413184c 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -25,6 +25,9 @@ 
 #define MB2_HT(name)      (MULTIBOOT2_HEADER_TAG_##name)
 #define MB2_TT(name)      (MULTIBOOT2_TAG_TYPE_##name)
 
+#define BOOT_TYPE_BIOS 1
+#define BOOT_TYPE_PVH 2
+
         .macro mb2ht_args arg:req, args:vararg
         .long \arg
         .ifnb \args
@@ -409,13 +412,28 @@  cs32_switch:
 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long sym_offs(__pvh_start))
 
 __pvh_start:
-        cld
         cli
+        mov     $BOOT_TYPE_PVH, %ebp
+        jmp     common_bios_pvh
+#endif /* CONFIG_PVH_GUEST */
+
+__start:
+        cli
+        mov     $BOOT_TYPE_BIOS, %ebp
+
+common_bios_pvh:
+        cld
 
         /*
-         * We need one call (i.e. push) to determine the load address.  See
-         * __start for a discussion on how to do this safely using the PVH
-         * info structure.
+         * Multiboot (both 1 and 2) and PVH specify the stack pointer as
+         * undefined.  This is unhelpful for relocatable images, where one
+         * call (i.e. push) is required to calculate the image's load address.
+         *
+         * Durig BIOS boot, there is one area of memory we know about with
+         * reasonable confidence that it isn't overlapped by Xen, and that's
+         * the Multiboot info structure in %ebx.  Use it as a temporary stack.
+         *
+         * During PVH boot use info structure in %ebx.
          */
 
         /* Preserve the field we're about to clobber. */
@@ -433,14 +451,9 @@  __pvh_start:
         /* Set up stack. */
         lea     STACK_SIZE - CPUINFO_sizeof + sym_esi(cpu0_stack), %esp
 
-        call    initialise_bss
-
-        mov     %ebx, sym_esi(pvh_start_info_pa)
-
-        /* Force xen console.  Will revert to user choice in init code. */
-        movb    $-1, sym_esi(opt_console_xen)
-
-        /* Prepare gdt and segments */
+        /*
+         * Initialize GDTR and basic data segments.
+         */
         add     %esi, sym_esi(gdt_boot_base)
         lgdt    sym_esi(gdt_boot_descr)
 
@@ -449,67 +462,44 @@  __pvh_start:
         mov     %ecx, %es
         mov     %ecx, %ss
 
-        /* Skip bootloader setup and bios setup, go straight to trampoline */
-        movb    $1, sym_esi(pvh_boot)
-        movb    $1, sym_esi(skip_realmode)
-
-        /* Set trampoline_phys to use mfn 1 to avoid having a mapping at VA 0 */
-        movw    $0x1000, sym_esi(trampoline_phys)
-        mov     (%ebx), %eax /* mov $XEN_HVM_START_MAGIC_VALUE, %eax */
-        jmp     trampoline_setup
-
-#endif /* CONFIG_PVH_GUEST */
+        /* Load null descriptor to unused segment registers. */
+        xor     %ecx, %ecx
+        mov     %ecx, %fs
+        mov     %ecx, %gs
 
-initialise_bss:
         /*
          * Initialise the BSS.
          *
          * !!! WARNING - also zeroes the current stack !!!
          */
-        pop     %ebp
         mov     %eax, %edx
-
         lea     sym_esi(__bss_start), %edi
         lea     sym_esi(__bss_end), %ecx
         sub     %edi, %ecx
         xor     %eax, %eax
         shr     $2, %ecx
         rep stosl
-
         mov     %edx, %eax
-        jmp     *%ebp
-
-__start:
-        cld
-        cli
 
-        /*
-         * Multiboot (both 1 and 2) specify the stack pointer as undefined
-         * when entering in BIOS circumstances.  This is unhelpful for
-         * relocatable images, where one call (i.e. push) is required to
-         * calculate the image's load address.
-         *
-         * This early in boot, there is one area of memory we know about with
-         * reasonable confidence that it isn't overlapped by Xen, and that's
-         * the Multiboot info structure in %ebx.  Use it as a temporary stack.
-         */
-
-        /* Preserve the field we're about to clobber. */
-        mov     (%ebx), %edx
-        lea     4(%ebx), %esp
+#ifdef CONFIG_PVH_GUEST
+        cmp     $BOOT_TYPE_PVH, %ebp
+        jne     1f
 
-        /* Calculate the load base address. */
-        call    1f
-1:      pop     %esi
-        sub     $sym_offs(1b), %esi
+        mov     %ebx, sym_esi(pvh_start_info_pa)
 
-        /* Restore the clobbered field. */
-        mov     %edx, (%ebx)
+        /* Force xen console.  Will revert to user choice in init code. */
+        movb    $-1, sym_esi(opt_console_xen)
 
-        /* Set up stack. */
-        lea     STACK_SIZE - CPUINFO_sizeof + sym_esi(cpu0_stack), %esp
+        /* Skip bootloader setup and bios setup, go straight to trampoline */
+        movb    $1, sym_esi(pvh_boot)
+        movb    $1, sym_esi(skip_realmode)
 
-        call    initialise_bss
+        /* Set trampoline_phys to use mfn 1 to avoid having a mapping at VA 0 */
+        movw    $0x1000, sym_esi(trampoline_phys)
+        mov     (%ebx), %eax /* mov $XEN_HVM_START_MAGIC_VALUE, %eax */
+        jmp     trampoline_setup
+1:
+#endif /* CONFIG_PVH_GUEST */
 
         /* Bootloaders may set multiboot{1,2}.mem_lower to a nonzero value. */
         xor     %edx,%edx
@@ -568,22 +558,7 @@  __start:
 trampoline_bios_setup:
         /*
          * Called on legacy BIOS platforms only.
-         *
-         * Initialize GDTR and basic data segments.
          */
-        add     %esi,sym_esi(gdt_boot_base)
-        lgdt    sym_esi(gdt_boot_descr)
-
-        mov     $BOOT_DS,%ecx
-        mov     %ecx,%ds
-        mov     %ecx,%es
-        mov     %ecx,%ss
-        /* %esp is initialized later. */
-
-        /* Load null descriptor to unused segment registers. */
-        xor     %ecx,%ecx
-        mov     %ecx,%fs
-        mov     %ecx,%gs
 
         /* Set up trampoline segment 64k below EBDA */
         movzwl  0x40e,%ecx          /* EBDA segment */