diff mbox series

[v2,1/3] xen/ppc: Set up a basic C environment

Message ID 1afe27097c5e1b55dcffa9464dc0cd0c1038a23e.1687466822.git.sanastasio@raptorengineering.com (mailing list archive)
State Superseded
Headers show
Series Early serial on Power | expand

Commit Message

Shawn Anastasio June 22, 2023, 8:57 p.m. UTC
Update ppc64/head.S to set up an initial boot stack, zero the .bss
section, and jump to C.

Also refactor the endian fixup trampoline into its own macro, since it
will need to be used in multiple places, including every time we make a
call into firmware (see next commit).

Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com>
---
 xen/arch/ppc/Makefile                |  1 +
 xen/arch/ppc/include/asm/asm-defns.h | 36 ++++++++++++++++++
 xen/arch/ppc/ppc64/head.S            | 55 ++++++++++++++++++----------
 xen/arch/ppc/setup.c                 | 13 +++++++
 4 files changed, 85 insertions(+), 20 deletions(-)
 create mode 100644 xen/arch/ppc/include/asm/asm-defns.h
 create mode 100644 xen/arch/ppc/setup.c

Comments

Andrew Cooper June 22, 2023, 10:49 p.m. UTC | #1
On 22/06/2023 9:57 pm, Shawn Anastasio wrote:
> Update ppc64/head.S to set up an initial boot stack, zero the .bss
> section, and jump to C.
>
> Also refactor the endian fixup trampoline into its own macro, since it
> will need to be used in multiple places, including every time we make a
> call into firmware (see next commit).
>
> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com>

Thankyou for making this change - it definitely simplifies things.

> ---
>  xen/arch/ppc/Makefile                |  1 +
>  xen/arch/ppc/include/asm/asm-defns.h | 36 ++++++++++++++++++
>  xen/arch/ppc/ppc64/head.S            | 55 ++++++++++++++++++----------
>  xen/arch/ppc/setup.c                 | 13 +++++++
>  4 files changed, 85 insertions(+), 20 deletions(-)
>  create mode 100644 xen/arch/ppc/include/asm/asm-defns.h
>  create mode 100644 xen/arch/ppc/setup.c
>
> diff --git a/xen/arch/ppc/Makefile b/xen/arch/ppc/Makefile
> index 98220648af..fdbcd730d0 100644
> --- a/xen/arch/ppc/Makefile
> +++ b/xen/arch/ppc/Makefile
> @@ -1,4 +1,5 @@
>  obj-$(CONFIG_PPC64) += ppc64/
> +obj-y += setup.o
>  
>  $(TARGET): $(TARGET)-syms
>  	cp -f $< $@
> diff --git a/xen/arch/ppc/include/asm/asm-defns.h b/xen/arch/ppc/include/asm/asm-defns.h
> new file mode 100644
> index 0000000000..3db2063a39
> --- /dev/null
> +++ b/xen/arch/ppc/include/asm/asm-defns.h
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#ifndef _ASM_PPC_ASM_DEFNS_H
> +#define _ASM_PPC_ASM_DEFNS_H
> +
> +/*
> + * Load a 64-bit immediate value into the specified GPR.
> + */
> +#define LOAD_IMM64(reg, val)                                                 \
> +    lis reg, (val) @highest;                                                 \
> +    ori reg, reg, (val) @higher;                                             \
> +    rldicr reg, reg, 32, 31;                                                 \
> +    oris reg, reg, (val) @h;                                                 \
> +    ori reg, reg, (val) @l;
> +
> +/*
> + * Depending on how we were booted, the CPU could be running in either
> + * Little Endian or Big Endian mode. The following trampoline from Linux
> + * cleverly uses an instruction that encodes to a NOP if the CPU's
> + * endianness matches the assumption of the assembler (LE, in our case)
> + * or a branch to code that performs the endian switch in the other case.
> + */
> +#define FIXUP_ENDIAN                                                           \
> +    tdi 0, 0, 0x48;   /* Reverse endian of b . + 8          */                 \
> +    b . + 44;         /* Skip trampoline if endian is good  */                 \
> +    .long 0xa600607d; /* mfmsr r11                          */                 \
> +    .long 0x01006b69; /* xori r11,r11,1                     */                 \
> +    .long 0x00004039; /* li r10,0                           */                 \
> +    .long 0x6401417d; /* mtmsrd r10,1                       */                 \
> +    .long 0x05009f42; /* bcl 20,31,$+4                      */                 \
> +    .long 0xa602487d; /* mflr r10                           */                 \
> +    .long 0x14004a39; /* addi r10,r10,20                    */                 \
> +    .long 0xa6035a7d; /* mtsrr0 r10                         */                 \
> +    .long 0xa6037b7d; /* mtsrr1 r11                         */                 \
> +    .long 0x2400004c  /* rfid                               */
> +
> +#endif /* _ASM_PPC_ASM_DEFNS_H */
> diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
> index 2fcefb40d8..a7db5b7de2 100644
> --- a/xen/arch/ppc/ppc64/head.S
> +++ b/xen/arch/ppc/ppc64/head.S
> @@ -1,30 +1,45 @@
>  /* SPDX-License-Identifier: GPL-2.0-or-later */
>  
> +#include <asm/asm-defns.h>
> +
>      .section .text.header, "ax", %progbits
>  
>  ENTRY(start)
>      /*
> -     * Depending on how we were booted, the CPU could be running in either
> -     * Little Endian or Big Endian mode. The following trampoline from Linux
> -     * cleverly uses an instruction that encodes to a NOP if the CPU's
> -     * endianness matches the assumption of the assembler (LE, in our case)
> -     * or a branch to code that performs the endian switch in the other case.
> +     * NOTE: argument registers (r3-r9) must be preserved until the C entrypoint
>       */
> -    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
> -    b . + 44          /* Skip trampoline if endian is good  */
> -    .long 0xa600607d  /* mfmsr r11                          */
> -    .long 0x01006b69  /* xori r11,r11,1                     */
> -    .long 0x00004039  /* li r10,0                           */
> -    .long 0x6401417d  /* mtmsrd r10,1                       */
> -    .long 0x05009f42  /* bcl 20,31,$+4                      */
> -    .long 0xa602487d  /* mflr r10                           */
> -    .long 0x14004a39  /* addi r10,r10,20                    */
> -    .long 0xa6035a7d  /* mtsrr0 r10                         */
> -    .long 0xa6037b7d  /* mtsrr1 r11                         */
> -    .long 0x2400004c  /* rfid                               */
> -
> -    /* Now that the endianness is confirmed, continue */
> -1:  b 1b
> +    FIXUP_ENDIAN
> +
> +    /* set up the initial stack */
> +    LOAD_IMM64(%r1, cpu0_boot_stack)
> +    li %r11, 0
> +    std %r11, 0(%r1)

I've done a bit of reading around, and it's rather sad that things prior
to Power10 don't have PC-relative addressing.  So the LOAD_IMM64()'s do
look necessary for the stack and bss.  I guess that means we can't be
sensibly be position independent in head.S?

Also, why store 0 onto the stack ?

> +
> +    /* clear .bss */
> +    LOAD_IMM64(%r14, __bss_start)
> +    LOAD_IMM64(%r15, __bss_end)
> +1:
> +    std %r11, 0(%r14)
> +    addi %r14, %r14, 8
> +    cmpld %r14, %r15
> +    blt 1b

This loop is correct, except for the corner case of this patch alone,
where the BSS is empty and this will write one word out-of-bounds.

For RISC-V, we put a temporary "char bss_tmp;" in setup.c, and I suggest
you do the same here, deleting it at a later point when there's real
data in the bss.

> +
> +    /* call the C entrypoint */
> +    LOAD_IMM64(%r12, start_xen)
> +    mtctr %r12
> +    bctrl

Why is this a LOAD_IMM64(), and not just:

    b start_xen

?  From the same reading around, PPC64 seems to have +/- 32M addressing
for branches, and the entire x86 hypervisor (.init included) is within 3M.

> +
> +    /* should never return */
> +    trap
>  
>      .size start, . - start
>      .type start, %function
> +
> +    .section .init.data, "aw", @progbits
> +
> +    /* Early init stack */
> +    .p2align 4
> +cpu0_boot_stack_bottom:
> +    .space STACK_SIZE
> +cpu0_boot_stack:
> +    .space STACK_FRAME_OVERHEAD

Why the extra space beyond the stack?

Also, I'd put cpu0_boot_stack in C, and just LOAD_IMM64(x,
cpu0_boot_stack + STACK_SIZE)

> diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c
> new file mode 100644
> index 0000000000..4d1b72fa23
> --- /dev/null
> +++ b/xen/arch/ppc/setup.c
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#include <xen/init.h>
> +
> +void __init noreturn start_xen(unsigned long r3, unsigned long r4,
> +                               unsigned long r5, unsigned long r6,
> +                               unsigned long r7)
> +{
> +    for ( ;; )
> +        /* Set current hardware thread to very low priority */
> +        asm volatile("or %r31, %r31, %r31");

Is there something magic about the OR instruction, or something magic
about %r31?

For a RISC architecture, this seems subtle.

~Andrew
Shawn Anastasio June 23, 2023, 1:26 a.m. UTC | #2
On 6/22/23 5:49 PM, Andrew Cooper wrote:
> On 22/06/2023 9:57 pm, Shawn Anastasio wrote:
>> Update ppc64/head.S to set up an initial boot stack, zero the .bss
>> section, and jump to C.
>>
>> Also refactor the endian fixup trampoline into its own macro, since it
>> will need to be used in multiple places, including every time we make a
>> call into firmware (see next commit).
>>
>> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com>
> 
> Thankyou for making this change - it definitely simplifies things.

No problem.

> I've done a bit of reading around, and it's rather sad that things prior
> to Power10 don't have PC-relative addressing.  So the LOAD_IMM64()'s do
> look necessary for the stack and bss.  I guess that means we can't be
> sensibly be position independent in head.S?

Prior to the introduction of pcrel loads/stores in P10, PIC is achieved
using a Table of Contents (TOC) whose base address is kept in r2 and can
be used as a relative base to address other symbols. I don't have -fPIC
enabled in this series yet (it's in the series I was hoping to submit
after this one), so for now I'm just loading the addresses as immediates
directly.

> Also, why store 0 onto the stack ?

On the ELFv2 ABI which we're using here, sp+0 is reserved for the "back
chain" pointer which is used to store the address of the caller's stack
frame and is used to support backtraces.

At the top of the stack, we need to make sure the first back chain
pointer is zero to ensure that anything walking the stack via these
pointers eventually terminates.

>> +
>> +    /* clear .bss */
>> +    LOAD_IMM64(%r14, __bss_start)
>> +    LOAD_IMM64(%r15, __bss_end)
>> +1:
>> +    std %r11, 0(%r14)
>> +    addi %r14, %r14, 8
>> +    cmpld %r14, %r15
>> +    blt 1b
> 
> This loop is correct, except for the corner case of this patch alone,
> where the BSS is empty and this will write one word out-of-bounds.
> 
> For RISC-V, we put a temporary "char bss_tmp;" in setup.c, and I suggest
> you do the same here, deleting it at a later point when there's real
> data in the bss.

Good catch. I actually introduce a .bss variable in patch 2 of this
series, so perhaps it would make the most sense for me to move this loop
to that patch?

Also it might make sense to have an assert in the linker script checking
that sizeof(.bss) > 0, though past early bring-up an empty .bss is
probably a pretty unlikely scenario...

>> +
>> +    /* call the C entrypoint */
>> +    LOAD_IMM64(%r12, start_xen)
>> +    mtctr %r12
>> +    bctrl
> 
> Why is this a LOAD_IMM64(), and not just:
> 
>     b start_xen
> 
> ?  From the same reading around, PPC64 seems to have +/- 32M addressing
> for branches, and the entire x86 hypervisor (.init included) is within 3M.

Good question. You're right that here it's entirely unnecessary. Once we
enable -fPIC, though, the calling convention for functions changes a bit
and necessitates that in certain scenarios r12 contains the entrypoint
of the function being called.

For reference, the reason this is needed is because each function
contains a prologue that calculates the base address of its
aforementioned TOC as a relative offset from its entrypoint, and for
that to work, the entrypoint needs to be contained in r12.

There is a short path that can be taken if the TOC pointer in r2 is
already set up and the calling function is in the same module as the
function being called (and thus they are known to share a TOC), but for
head.S simply taking the long path is an easy way to ensure the callee
has a valid TOC pointer.

In any case, its inclusion in this patch before -fPIC is enabled was an
oversight on my part and I'll change it to a `bl start_xen`.

>> +
>> +    /* should never return */
>> +    trap
>>  
>>      .size start, . - start
>>      .type start, %function
>> +
>> +    .section .init.data, "aw", @progbits
>> +
>> +    /* Early init stack */
>> +    .p2align 4
>> +cpu0_boot_stack_bottom:
>> +    .space STACK_SIZE
>> +cpu0_boot_stack:
>> +    .space STACK_FRAME_OVERHEAD
> 
> Why the extra space beyond the stack?

It's space for the aforementioned back chain pointer as well as a
few other things that callees are allowed to store in their caller's
stack frame. For example, routines are allowed to store the return
address from their caller at sp+16 (unlike x86, our "call" instruction
doesn't do that for you).

The ELFv2 specification contains a nice diagram of the stack frame on
page 31 (figure 2.18, section 2.2.2.1):
https://wiki.raptorcs.com/w/images/7/70/Leabi-20170510.pdf

> Also, I'd put cpu0_boot_stack in C, and just LOAD_IMM64(x,
> cpu0_boot_stack + STACK_SIZE)

Sure, I can do that.

>> diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c
>> new file mode 100644
>> index 0000000000..4d1b72fa23
>> --- /dev/null
>> +++ b/xen/arch/ppc/setup.c
>> @@ -0,0 +1,13 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +#include <xen/init.h>
>> +
>> +void __init noreturn start_xen(unsigned long r3, unsigned long r4,
>> +                               unsigned long r5, unsigned long r6,
>> +                               unsigned long r7)
>> +{
>> +    for ( ;; )
>> +        /* Set current hardware thread to very low priority */
>> +        asm volatile("or %r31, %r31, %r31");
> 
> Is there something magic about the OR instruction, or something magic
> about %r31?

Using the OR instruction with all three operands equal is of course a
no-op, but when using certain registers it can have a separate magic
side effect.

`or r31,31,31` is one such form that sets the Program Priority Register
to "very low" priority. Of course here where we don't have SMP going
there's not much point in using this over the standard side effect-less
no-op (`or r0,r0,r0` or just `nop`).

For a table of these magic OR forms, you can see page 836 of the Power
ISA 3.0B:
https://wiki.raptorcs.com/w/images/c/cb/PowerISA_public.v3.0B.pdf

> ~Andrew

Thanks,
Shawn
Jan Beulich June 23, 2023, 6:34 a.m. UTC | #3
On 23.06.2023 03:26, Shawn Anastasio wrote:
> On 6/22/23 5:49 PM, Andrew Cooper wrote:
>> On 22/06/2023 9:57 pm, Shawn Anastasio wrote:
>>> --- /dev/null
>>> +++ b/xen/arch/ppc/setup.c
>>> @@ -0,0 +1,13 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>>> +#include <xen/init.h>
>>> +
>>> +void __init noreturn start_xen(unsigned long r3, unsigned long r4,
>>> +                               unsigned long r5, unsigned long r6,
>>> +                               unsigned long r7)
>>> +{
>>> +    for ( ;; )
>>> +        /* Set current hardware thread to very low priority */
>>> +        asm volatile("or %r31, %r31, %r31");
>>
>> Is there something magic about the OR instruction, or something magic
>> about %r31?
> 
> Using the OR instruction with all three operands equal is of course a
> no-op, but when using certain registers it can have a separate magic
> side effect.
> 
> `or r31,31,31` is one such form that sets the Program Priority Register
> to "very low" priority. Of course here where we don't have SMP going
> there's not much point in using this over the standard side effect-less
> no-op (`or r0,r0,r0` or just `nop`).
> 
> For a table of these magic OR forms, you can see page 836 of the Power
> ISA 3.0B:
> https://wiki.raptorcs.com/w/images/c/cb/PowerISA_public.v3.0B.pdf

I have 3.1 to hand, and it looks like they were dropped from there?
Otherwise I was meaning to say that it's a shame gas doesn't support
these.

Anyway - I think you want to put this behind a macro named after the
pseudo.

Finally, as a nit: Style above is lacking several blanks. One
between the two semicolons, and a total of three in the asm().

Jan
Shawn Anastasio June 23, 2023, 2:41 p.m. UTC | #4
On 6/23/23 1:34 AM, Jan Beulich wrote:
> On 23.06.2023 03:26, Shawn Anastasio wrote:
>> On 6/22/23 5:49 PM, Andrew Cooper wrote:
>>> On 22/06/2023 9:57 pm, Shawn Anastasio wrote:
>>>> --- /dev/null
>>>> +++ b/xen/arch/ppc/setup.c
>>>> @@ -0,0 +1,13 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>>>> +#include <xen/init.h>
>>>> +
>>>> +void __init noreturn start_xen(unsigned long r3, unsigned long r4,
>>>> +                               unsigned long r5, unsigned long r6,
>>>> +                               unsigned long r7)
>>>> +{
>>>> +    for ( ;; )
>>>> +        /* Set current hardware thread to very low priority */
>>>> +        asm volatile("or %r31, %r31, %r31");
>>>
>>> Is there something magic about the OR instruction, or something magic
>>> about %r31?
>>
>> Using the OR instruction with all three operands equal is of course a
>> no-op, but when using certain registers it can have a separate magic
>> side effect.
>>
>> `or r31,31,31` is one such form that sets the Program Priority Register
>> to "very low" priority. Of course here where we don't have SMP going
>> there's not much point in using this over the standard side effect-less
>> no-op (`or r0,r0,r0` or just `nop`).
>>
>> For a table of these magic OR forms, you can see page 836 of the Power
>> ISA 3.0B:
>> https://wiki.raptorcs.com/w/images/c/cb/PowerISA_public.v3.0B.pdf
> 
> I have 3.1 to hand, and it looks like they were dropped from there?
> Otherwise I was meaning to say that it's a shame gas doesn't support
> these.

No, they're still present in ISA 3.1B. See page 1084, Book II Chapter
3 Section 2.

> Anyway - I think you want to put this behind a macro named after the
> pseudo.

Sure, that makes sense to me.

> Finally, as a nit: Style above is lacking several blanks. One
> between the two semicolons, and a total of three in the asm().

Just to be sure, would the following be correct?

    for ( ; ; )
        /* Set current hardware thread to very low priority */
        asm volatile ( "or %r31, %r31, %r31" );

Not including the refactor of that instruction to a macro, of course.

> Jan

Thanks,
Shawn
Jan Beulich June 23, 2023, 4:59 p.m. UTC | #5
On 23.06.2023 16:41, Shawn Anastasio wrote:
> On 6/23/23 1:34 AM, Jan Beulich wrote:
>> On 23.06.2023 03:26, Shawn Anastasio wrote:
>>> On 6/22/23 5:49 PM, Andrew Cooper wrote:
>>>> On 22/06/2023 9:57 pm, Shawn Anastasio wrote:
>>>>> --- /dev/null
>>>>> +++ b/xen/arch/ppc/setup.c
>>>>> @@ -0,0 +1,13 @@
>>>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>>>>> +#include <xen/init.h>
>>>>> +
>>>>> +void __init noreturn start_xen(unsigned long r3, unsigned long r4,
>>>>> +                               unsigned long r5, unsigned long r6,
>>>>> +                               unsigned long r7)
>>>>> +{
>>>>> +    for ( ;; )
>>>>> +        /* Set current hardware thread to very low priority */
>>>>> +        asm volatile("or %r31, %r31, %r31");
>>>>
>>>> Is there something magic about the OR instruction, or something magic
>>>> about %r31?
>>>
>>> Using the OR instruction with all three operands equal is of course a
>>> no-op, but when using certain registers it can have a separate magic
>>> side effect.
>>>
>>> `or r31,31,31` is one such form that sets the Program Priority Register
>>> to "very low" priority. Of course here where we don't have SMP going
>>> there's not much point in using this over the standard side effect-less
>>> no-op (`or r0,r0,r0` or just `nop`).
>>>
>>> For a table of these magic OR forms, you can see page 836 of the Power
>>> ISA 3.0B:
>>> https://wiki.raptorcs.com/w/images/c/cb/PowerISA_public.v3.0B.pdf
>>
>> I have 3.1 to hand, and it looks like they were dropped from there?
>> Otherwise I was meaning to say that it's a shame gas doesn't support
>> these.
> 
> No, they're still present in ISA 3.1B. See page 1084, Book II Chapter
> 3 Section 2.

Ah, I see. I was searching for the pseudo mnemonics that I had
found elsewhere at some point (which are what I would hope gas
could support, but then of course they first need to be
mentioned in the doc).

>> Finally, as a nit: Style above is lacking several blanks. One
>> between the two semicolons, and a total of three in the asm().
> 
> Just to be sure, would the following be correct?
> 
>     for ( ; ; )
>         /* Set current hardware thread to very low priority */
>         asm volatile ( "or %r31, %r31, %r31" );
> 
> Not including the refactor of that instruction to a macro, of course.

Yes, this looks correct now. Our style is perhaps a little
unusual; at least I'm not aware of another project using the
same, although I have the vague feeling that someone once
mentioned a possible origin.

Jan
Shawn Anastasio July 6, 2023, 3:43 p.m. UTC | #6
On 6/22/23 8:26 PM, Shawn Anastasio wrote:
> On 6/22/23 5:49 PM, Andrew Cooper wrote:
>> On 22/06/2023 9:57 pm, Shawn Anastasio wrote:
>>> Update ppc64/head.S to set up an initial boot stack, zero the .bss
>>> section, and jump to C.
>>>
>>> Also refactor the endian fixup trampoline into its own macro, since it
>>> will need to be used in multiple places, including every time we make a
>>> call into firmware (see next commit).
>>>
>>> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com>
>>
>> Thankyou for making this change - it definitely simplifies things.
> 
> No problem.
> 
>> I've done a bit of reading around, and it's rather sad that things prior
>> to Power10 don't have PC-relative addressing.  So the LOAD_IMM64()'s do
>> look necessary for the stack and bss.  I guess that means we can't be
>> sensibly be position independent in head.S?
> 
> Prior to the introduction of pcrel loads/stores in P10, PIC is achieved
> using a Table of Contents (TOC) whose base address is kept in r2 and can
> be used as a relative base to address other symbols. I don't have -fPIC
> enabled in this series yet (it's in the series I was hoping to submit
> after this one), so for now I'm just loading the addresses as immediates
> directly.
> 
>> Also, why store 0 onto the stack ?
> 
> On the ELFv2 ABI which we're using here, sp+0 is reserved for the "back
> chain" pointer which is used to store the address of the caller's stack
> frame and is used to support backtraces.
> 
> At the top of the stack, we need to make sure the first back chain
> pointer is zero to ensure that anything walking the stack via these
> pointers eventually terminates.
> 
>>> +
>>> +    /* clear .bss */
>>> +    LOAD_IMM64(%r14, __bss_start)
>>> +    LOAD_IMM64(%r15, __bss_end)
>>> +1:
>>> +    std %r11, 0(%r14)
>>> +    addi %r14, %r14, 8
>>> +    cmpld %r14, %r15
>>> +    blt 1b
>>
>> This loop is correct, except for the corner case of this patch alone,
>> where the BSS is empty and this will write one word out-of-bounds.
>>
>> For RISC-V, we put a temporary "char bss_tmp;" in setup.c, and I suggest
>> you do the same here, deleting it at a later point when there's real
>> data in the bss.
> 
> Good catch. I actually introduce a .bss variable in patch 2 of this
> series, so perhaps it would make the most sense for me to move this loop
> to that patch?
> 
> Also it might make sense to have an assert in the linker script checking
> that sizeof(.bss) > 0, though past early bring-up an empty .bss is
> probably a pretty unlikely scenario...
> 
>>> +
>>> +    /* call the C entrypoint */
>>> +    LOAD_IMM64(%r12, start_xen)
>>> +    mtctr %r12
>>> +    bctrl
>>
>> Why is this a LOAD_IMM64(), and not just:
>>
>>     b start_xen
>>
>> ?  From the same reading around, PPC64 seems to have +/- 32M addressing
>> for branches, and the entire x86 hypervisor (.init included) is within 3M.
> 
> Good question. You're right that here it's entirely unnecessary. Once we
> enable -fPIC, though, the calling convention for functions changes a bit
> and necessitates that in certain scenarios r12 contains the entrypoint
> of the function being called.

Turns out I was actually wrong here -- changing the indirect load +
branch to a direct branch does actually break the code here, but for a
reason other than what I anticipated. Even with PIC disabled, r2 needs
to contain a valid TOC pointer. The function address doesn't need to be
contained in r12 to calculate it since it's an absolute address rather
than function-relative, but using a simple direct branch here causes the
linker to skip past the TOC calculation prologue in `start_xen` as an
optimization, since it assumes that r2 is already set up. Since we
haven't set up r2, though, this results in the program using a
garbage TOC pointer at run-time.

I'll just set up the TOC before making the call in `start` to fix this.

Thanks,
Shawn
diff mbox series

Patch

diff --git a/xen/arch/ppc/Makefile b/xen/arch/ppc/Makefile
index 98220648af..fdbcd730d0 100644
--- a/xen/arch/ppc/Makefile
+++ b/xen/arch/ppc/Makefile
@@ -1,4 +1,5 @@ 
 obj-$(CONFIG_PPC64) += ppc64/
+obj-y += setup.o
 
 $(TARGET): $(TARGET)-syms
 	cp -f $< $@
diff --git a/xen/arch/ppc/include/asm/asm-defns.h b/xen/arch/ppc/include/asm/asm-defns.h
new file mode 100644
index 0000000000..3db2063a39
--- /dev/null
+++ b/xen/arch/ppc/include/asm/asm-defns.h
@@ -0,0 +1,36 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_PPC_ASM_DEFNS_H
+#define _ASM_PPC_ASM_DEFNS_H
+
+/*
+ * Load a 64-bit immediate value into the specified GPR.
+ */
+#define LOAD_IMM64(reg, val)                                                 \
+    lis reg, (val) @highest;                                                 \
+    ori reg, reg, (val) @higher;                                             \
+    rldicr reg, reg, 32, 31;                                                 \
+    oris reg, reg, (val) @h;                                                 \
+    ori reg, reg, (val) @l;
+
+/*
+ * Depending on how we were booted, the CPU could be running in either
+ * Little Endian or Big Endian mode. The following trampoline from Linux
+ * cleverly uses an instruction that encodes to a NOP if the CPU's
+ * endianness matches the assumption of the assembler (LE, in our case)
+ * or a branch to code that performs the endian switch in the other case.
+ */
+#define FIXUP_ENDIAN                                                           \
+    tdi 0, 0, 0x48;   /* Reverse endian of b . + 8          */                 \
+    b . + 44;         /* Skip trampoline if endian is good  */                 \
+    .long 0xa600607d; /* mfmsr r11                          */                 \
+    .long 0x01006b69; /* xori r11,r11,1                     */                 \
+    .long 0x00004039; /* li r10,0                           */                 \
+    .long 0x6401417d; /* mtmsrd r10,1                       */                 \
+    .long 0x05009f42; /* bcl 20,31,$+4                      */                 \
+    .long 0xa602487d; /* mflr r10                           */                 \
+    .long 0x14004a39; /* addi r10,r10,20                    */                 \
+    .long 0xa6035a7d; /* mtsrr0 r10                         */                 \
+    .long 0xa6037b7d; /* mtsrr1 r11                         */                 \
+    .long 0x2400004c  /* rfid                               */
+
+#endif /* _ASM_PPC_ASM_DEFNS_H */
diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
index 2fcefb40d8..a7db5b7de2 100644
--- a/xen/arch/ppc/ppc64/head.S
+++ b/xen/arch/ppc/ppc64/head.S
@@ -1,30 +1,45 @@ 
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include <asm/asm-defns.h>
+
     .section .text.header, "ax", %progbits
 
 ENTRY(start)
     /*
-     * Depending on how we were booted, the CPU could be running in either
-     * Little Endian or Big Endian mode. The following trampoline from Linux
-     * cleverly uses an instruction that encodes to a NOP if the CPU's
-     * endianness matches the assumption of the assembler (LE, in our case)
-     * or a branch to code that performs the endian switch in the other case.
+     * NOTE: argument registers (r3-r9) must be preserved until the C entrypoint
      */
-    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
-    b . + 44          /* Skip trampoline if endian is good  */
-    .long 0xa600607d  /* mfmsr r11                          */
-    .long 0x01006b69  /* xori r11,r11,1                     */
-    .long 0x00004039  /* li r10,0                           */
-    .long 0x6401417d  /* mtmsrd r10,1                       */
-    .long 0x05009f42  /* bcl 20,31,$+4                      */
-    .long 0xa602487d  /* mflr r10                           */
-    .long 0x14004a39  /* addi r10,r10,20                    */
-    .long 0xa6035a7d  /* mtsrr0 r10                         */
-    .long 0xa6037b7d  /* mtsrr1 r11                         */
-    .long 0x2400004c  /* rfid                               */
-
-    /* Now that the endianness is confirmed, continue */
-1:  b 1b
+    FIXUP_ENDIAN
+
+    /* set up the initial stack */
+    LOAD_IMM64(%r1, cpu0_boot_stack)
+    li %r11, 0
+    std %r11, 0(%r1)
+
+    /* clear .bss */
+    LOAD_IMM64(%r14, __bss_start)
+    LOAD_IMM64(%r15, __bss_end)
+1:
+    std %r11, 0(%r14)
+    addi %r14, %r14, 8
+    cmpld %r14, %r15
+    blt 1b
+
+    /* call the C entrypoint */
+    LOAD_IMM64(%r12, start_xen)
+    mtctr %r12
+    bctrl
+
+    /* should never return */
+    trap
 
     .size start, . - start
     .type start, %function
+
+    .section .init.data, "aw", @progbits
+
+    /* Early init stack */
+    .p2align 4
+cpu0_boot_stack_bottom:
+    .space STACK_SIZE
+cpu0_boot_stack:
+    .space STACK_FRAME_OVERHEAD
diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c
new file mode 100644
index 0000000000..4d1b72fa23
--- /dev/null
+++ b/xen/arch/ppc/setup.c
@@ -0,0 +1,13 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <xen/init.h>
+
+void __init noreturn start_xen(unsigned long r3, unsigned long r4,
+                               unsigned long r5, unsigned long r6,
+                               unsigned long r7)
+{
+    for ( ;; )
+        /* Set current hardware thread to very low priority */
+        asm volatile("or %r31, %r31, %r31");
+
+    unreachable();
+}