Message ID | 1afe27097c5e1b55dcffa9464dc0cd0c1038a23e.1687466822.git.sanastasio@raptorengineering.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Early serial on Power | expand |
On 22/06/2023 9:57 pm, Shawn Anastasio wrote: > Update ppc64/head.S to set up an initial boot stack, zero the .bss > section, and jump to C. > > Also refactor the endian fixup trampoline into its own macro, since it > will need to be used in multiple places, including every time we make a > call into firmware (see next commit). > > Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Thankyou for making this change - it definitely simplifies things. > --- > xen/arch/ppc/Makefile | 1 + > xen/arch/ppc/include/asm/asm-defns.h | 36 ++++++++++++++++++ > xen/arch/ppc/ppc64/head.S | 55 ++++++++++++++++++---------- > xen/arch/ppc/setup.c | 13 +++++++ > 4 files changed, 85 insertions(+), 20 deletions(-) > create mode 100644 xen/arch/ppc/include/asm/asm-defns.h > create mode 100644 xen/arch/ppc/setup.c > > diff --git a/xen/arch/ppc/Makefile b/xen/arch/ppc/Makefile > index 98220648af..fdbcd730d0 100644 > --- a/xen/arch/ppc/Makefile > +++ b/xen/arch/ppc/Makefile > @@ -1,4 +1,5 @@ > obj-$(CONFIG_PPC64) += ppc64/ > +obj-y += setup.o > > $(TARGET): $(TARGET)-syms > cp -f $< $@ > diff --git a/xen/arch/ppc/include/asm/asm-defns.h b/xen/arch/ppc/include/asm/asm-defns.h > new file mode 100644 > index 0000000000..3db2063a39 > --- /dev/null > +++ b/xen/arch/ppc/include/asm/asm-defns.h > @@ -0,0 +1,36 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > +#ifndef _ASM_PPC_ASM_DEFNS_H > +#define _ASM_PPC_ASM_DEFNS_H > + > +/* > + * Load a 64-bit immediate value into the specified GPR. > + */ > +#define LOAD_IMM64(reg, val) \ > + lis reg, (val) @highest; \ > + ori reg, reg, (val) @higher; \ > + rldicr reg, reg, 32, 31; \ > + oris reg, reg, (val) @h; \ > + ori reg, reg, (val) @l; > + > +/* > + * Depending on how we were booted, the CPU could be running in either > + * Little Endian or Big Endian mode. The following trampoline from Linux > + * cleverly uses an instruction that encodes to a NOP if the CPU's > + * endianness matches the assumption of the assembler (LE, in our case) > + * or a branch to code that performs the endian switch in the other case. > + */ > +#define FIXUP_ENDIAN \ > + tdi 0, 0, 0x48; /* Reverse endian of b . + 8 */ \ > + b . + 44; /* Skip trampoline if endian is good */ \ > + .long 0xa600607d; /* mfmsr r11 */ \ > + .long 0x01006b69; /* xori r11,r11,1 */ \ > + .long 0x00004039; /* li r10,0 */ \ > + .long 0x6401417d; /* mtmsrd r10,1 */ \ > + .long 0x05009f42; /* bcl 20,31,$+4 */ \ > + .long 0xa602487d; /* mflr r10 */ \ > + .long 0x14004a39; /* addi r10,r10,20 */ \ > + .long 0xa6035a7d; /* mtsrr0 r10 */ \ > + .long 0xa6037b7d; /* mtsrr1 r11 */ \ > + .long 0x2400004c /* rfid */ > + > +#endif /* _ASM_PPC_ASM_DEFNS_H */ > diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S > index 2fcefb40d8..a7db5b7de2 100644 > --- a/xen/arch/ppc/ppc64/head.S > +++ b/xen/arch/ppc/ppc64/head.S > @@ -1,30 +1,45 @@ > /* SPDX-License-Identifier: GPL-2.0-or-later */ > > +#include <asm/asm-defns.h> > + > .section .text.header, "ax", %progbits > > ENTRY(start) > /* > - * Depending on how we were booted, the CPU could be running in either > - * Little Endian or Big Endian mode. The following trampoline from Linux > - * cleverly uses an instruction that encodes to a NOP if the CPU's > - * endianness matches the assumption of the assembler (LE, in our case) > - * or a branch to code that performs the endian switch in the other case. > + * NOTE: argument registers (r3-r9) must be preserved until the C entrypoint > */ > - tdi 0, 0, 0x48 /* Reverse endian of b . + 8 */ > - b . + 44 /* Skip trampoline if endian is good */ > - .long 0xa600607d /* mfmsr r11 */ > - .long 0x01006b69 /* xori r11,r11,1 */ > - .long 0x00004039 /* li r10,0 */ > - .long 0x6401417d /* mtmsrd r10,1 */ > - .long 0x05009f42 /* bcl 20,31,$+4 */ > - .long 0xa602487d /* mflr r10 */ > - .long 0x14004a39 /* addi r10,r10,20 */ > - .long 0xa6035a7d /* mtsrr0 r10 */ > - .long 0xa6037b7d /* mtsrr1 r11 */ > - .long 0x2400004c /* rfid */ > - > - /* Now that the endianness is confirmed, continue */ > -1: b 1b > + FIXUP_ENDIAN > + > + /* set up the initial stack */ > + LOAD_IMM64(%r1, cpu0_boot_stack) > + li %r11, 0 > + std %r11, 0(%r1) I've done a bit of reading around, and it's rather sad that things prior to Power10 don't have PC-relative addressing. So the LOAD_IMM64()'s do look necessary for the stack and bss. I guess that means we can't be sensibly be position independent in head.S? Also, why store 0 onto the stack ? > + > + /* clear .bss */ > + LOAD_IMM64(%r14, __bss_start) > + LOAD_IMM64(%r15, __bss_end) > +1: > + std %r11, 0(%r14) > + addi %r14, %r14, 8 > + cmpld %r14, %r15 > + blt 1b This loop is correct, except for the corner case of this patch alone, where the BSS is empty and this will write one word out-of-bounds. For RISC-V, we put a temporary "char bss_tmp;" in setup.c, and I suggest you do the same here, deleting it at a later point when there's real data in the bss. > + > + /* call the C entrypoint */ > + LOAD_IMM64(%r12, start_xen) > + mtctr %r12 > + bctrl Why is this a LOAD_IMM64(), and not just: b start_xen ? From the same reading around, PPC64 seems to have +/- 32M addressing for branches, and the entire x86 hypervisor (.init included) is within 3M. > + > + /* should never return */ > + trap > > .size start, . - start > .type start, %function > + > + .section .init.data, "aw", @progbits > + > + /* Early init stack */ > + .p2align 4 > +cpu0_boot_stack_bottom: > + .space STACK_SIZE > +cpu0_boot_stack: > + .space STACK_FRAME_OVERHEAD Why the extra space beyond the stack? Also, I'd put cpu0_boot_stack in C, and just LOAD_IMM64(x, cpu0_boot_stack + STACK_SIZE) > diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c > new file mode 100644 > index 0000000000..4d1b72fa23 > --- /dev/null > +++ b/xen/arch/ppc/setup.c > @@ -0,0 +1,13 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > +#include <xen/init.h> > + > +void __init noreturn start_xen(unsigned long r3, unsigned long r4, > + unsigned long r5, unsigned long r6, > + unsigned long r7) > +{ > + for ( ;; ) > + /* Set current hardware thread to very low priority */ > + asm volatile("or %r31, %r31, %r31"); Is there something magic about the OR instruction, or something magic about %r31? For a RISC architecture, this seems subtle. ~Andrew
On 6/22/23 5:49 PM, Andrew Cooper wrote: > On 22/06/2023 9:57 pm, Shawn Anastasio wrote: >> Update ppc64/head.S to set up an initial boot stack, zero the .bss >> section, and jump to C. >> >> Also refactor the endian fixup trampoline into its own macro, since it >> will need to be used in multiple places, including every time we make a >> call into firmware (see next commit). >> >> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> > > Thankyou for making this change - it definitely simplifies things. No problem. > I've done a bit of reading around, and it's rather sad that things prior > to Power10 don't have PC-relative addressing. So the LOAD_IMM64()'s do > look necessary for the stack and bss. I guess that means we can't be > sensibly be position independent in head.S? Prior to the introduction of pcrel loads/stores in P10, PIC is achieved using a Table of Contents (TOC) whose base address is kept in r2 and can be used as a relative base to address other symbols. I don't have -fPIC enabled in this series yet (it's in the series I was hoping to submit after this one), so for now I'm just loading the addresses as immediates directly. > Also, why store 0 onto the stack ? On the ELFv2 ABI which we're using here, sp+0 is reserved for the "back chain" pointer which is used to store the address of the caller's stack frame and is used to support backtraces. At the top of the stack, we need to make sure the first back chain pointer is zero to ensure that anything walking the stack via these pointers eventually terminates. >> + >> + /* clear .bss */ >> + LOAD_IMM64(%r14, __bss_start) >> + LOAD_IMM64(%r15, __bss_end) >> +1: >> + std %r11, 0(%r14) >> + addi %r14, %r14, 8 >> + cmpld %r14, %r15 >> + blt 1b > > This loop is correct, except for the corner case of this patch alone, > where the BSS is empty and this will write one word out-of-bounds. > > For RISC-V, we put a temporary "char bss_tmp;" in setup.c, and I suggest > you do the same here, deleting it at a later point when there's real > data in the bss. Good catch. I actually introduce a .bss variable in patch 2 of this series, so perhaps it would make the most sense for me to move this loop to that patch? Also it might make sense to have an assert in the linker script checking that sizeof(.bss) > 0, though past early bring-up an empty .bss is probably a pretty unlikely scenario... >> + >> + /* call the C entrypoint */ >> + LOAD_IMM64(%r12, start_xen) >> + mtctr %r12 >> + bctrl > > Why is this a LOAD_IMM64(), and not just: > > b start_xen > > ? From the same reading around, PPC64 seems to have +/- 32M addressing > for branches, and the entire x86 hypervisor (.init included) is within 3M. Good question. You're right that here it's entirely unnecessary. Once we enable -fPIC, though, the calling convention for functions changes a bit and necessitates that in certain scenarios r12 contains the entrypoint of the function being called. For reference, the reason this is needed is because each function contains a prologue that calculates the base address of its aforementioned TOC as a relative offset from its entrypoint, and for that to work, the entrypoint needs to be contained in r12. There is a short path that can be taken if the TOC pointer in r2 is already set up and the calling function is in the same module as the function being called (and thus they are known to share a TOC), but for head.S simply taking the long path is an easy way to ensure the callee has a valid TOC pointer. In any case, its inclusion in this patch before -fPIC is enabled was an oversight on my part and I'll change it to a `bl start_xen`. >> + >> + /* should never return */ >> + trap >> >> .size start, . - start >> .type start, %function >> + >> + .section .init.data, "aw", @progbits >> + >> + /* Early init stack */ >> + .p2align 4 >> +cpu0_boot_stack_bottom: >> + .space STACK_SIZE >> +cpu0_boot_stack: >> + .space STACK_FRAME_OVERHEAD > > Why the extra space beyond the stack? It's space for the aforementioned back chain pointer as well as a few other things that callees are allowed to store in their caller's stack frame. For example, routines are allowed to store the return address from their caller at sp+16 (unlike x86, our "call" instruction doesn't do that for you). The ELFv2 specification contains a nice diagram of the stack frame on page 31 (figure 2.18, section 2.2.2.1): https://wiki.raptorcs.com/w/images/7/70/Leabi-20170510.pdf > Also, I'd put cpu0_boot_stack in C, and just LOAD_IMM64(x, > cpu0_boot_stack + STACK_SIZE) Sure, I can do that. >> diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c >> new file mode 100644 >> index 0000000000..4d1b72fa23 >> --- /dev/null >> +++ b/xen/arch/ppc/setup.c >> @@ -0,0 +1,13 @@ >> +/* SPDX-License-Identifier: GPL-2.0-or-later */ >> +#include <xen/init.h> >> + >> +void __init noreturn start_xen(unsigned long r3, unsigned long r4, >> + unsigned long r5, unsigned long r6, >> + unsigned long r7) >> +{ >> + for ( ;; ) >> + /* Set current hardware thread to very low priority */ >> + asm volatile("or %r31, %r31, %r31"); > > Is there something magic about the OR instruction, or something magic > about %r31? Using the OR instruction with all three operands equal is of course a no-op, but when using certain registers it can have a separate magic side effect. `or r31,31,31` is one such form that sets the Program Priority Register to "very low" priority. Of course here where we don't have SMP going there's not much point in using this over the standard side effect-less no-op (`or r0,r0,r0` or just `nop`). For a table of these magic OR forms, you can see page 836 of the Power ISA 3.0B: https://wiki.raptorcs.com/w/images/c/cb/PowerISA_public.v3.0B.pdf > ~Andrew Thanks, Shawn
On 23.06.2023 03:26, Shawn Anastasio wrote: > On 6/22/23 5:49 PM, Andrew Cooper wrote: >> On 22/06/2023 9:57 pm, Shawn Anastasio wrote: >>> --- /dev/null >>> +++ b/xen/arch/ppc/setup.c >>> @@ -0,0 +1,13 @@ >>> +/* SPDX-License-Identifier: GPL-2.0-or-later */ >>> +#include <xen/init.h> >>> + >>> +void __init noreturn start_xen(unsigned long r3, unsigned long r4, >>> + unsigned long r5, unsigned long r6, >>> + unsigned long r7) >>> +{ >>> + for ( ;; ) >>> + /* Set current hardware thread to very low priority */ >>> + asm volatile("or %r31, %r31, %r31"); >> >> Is there something magic about the OR instruction, or something magic >> about %r31? > > Using the OR instruction with all three operands equal is of course a > no-op, but when using certain registers it can have a separate magic > side effect. > > `or r31,31,31` is one such form that sets the Program Priority Register > to "very low" priority. Of course here where we don't have SMP going > there's not much point in using this over the standard side effect-less > no-op (`or r0,r0,r0` or just `nop`). > > For a table of these magic OR forms, you can see page 836 of the Power > ISA 3.0B: > https://wiki.raptorcs.com/w/images/c/cb/PowerISA_public.v3.0B.pdf I have 3.1 to hand, and it looks like they were dropped from there? Otherwise I was meaning to say that it's a shame gas doesn't support these. Anyway - I think you want to put this behind a macro named after the pseudo. Finally, as a nit: Style above is lacking several blanks. One between the two semicolons, and a total of three in the asm(). Jan
On 6/23/23 1:34 AM, Jan Beulich wrote: > On 23.06.2023 03:26, Shawn Anastasio wrote: >> On 6/22/23 5:49 PM, Andrew Cooper wrote: >>> On 22/06/2023 9:57 pm, Shawn Anastasio wrote: >>>> --- /dev/null >>>> +++ b/xen/arch/ppc/setup.c >>>> @@ -0,0 +1,13 @@ >>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */ >>>> +#include <xen/init.h> >>>> + >>>> +void __init noreturn start_xen(unsigned long r3, unsigned long r4, >>>> + unsigned long r5, unsigned long r6, >>>> + unsigned long r7) >>>> +{ >>>> + for ( ;; ) >>>> + /* Set current hardware thread to very low priority */ >>>> + asm volatile("or %r31, %r31, %r31"); >>> >>> Is there something magic about the OR instruction, or something magic >>> about %r31? >> >> Using the OR instruction with all three operands equal is of course a >> no-op, but when using certain registers it can have a separate magic >> side effect. >> >> `or r31,31,31` is one such form that sets the Program Priority Register >> to "very low" priority. Of course here where we don't have SMP going >> there's not much point in using this over the standard side effect-less >> no-op (`or r0,r0,r0` or just `nop`). >> >> For a table of these magic OR forms, you can see page 836 of the Power >> ISA 3.0B: >> https://wiki.raptorcs.com/w/images/c/cb/PowerISA_public.v3.0B.pdf > > I have 3.1 to hand, and it looks like they were dropped from there? > Otherwise I was meaning to say that it's a shame gas doesn't support > these. No, they're still present in ISA 3.1B. See page 1084, Book II Chapter 3 Section 2. > Anyway - I think you want to put this behind a macro named after the > pseudo. Sure, that makes sense to me. > Finally, as a nit: Style above is lacking several blanks. One > between the two semicolons, and a total of three in the asm(). Just to be sure, would the following be correct? for ( ; ; ) /* Set current hardware thread to very low priority */ asm volatile ( "or %r31, %r31, %r31" ); Not including the refactor of that instruction to a macro, of course. > Jan Thanks, Shawn
On 23.06.2023 16:41, Shawn Anastasio wrote: > On 6/23/23 1:34 AM, Jan Beulich wrote: >> On 23.06.2023 03:26, Shawn Anastasio wrote: >>> On 6/22/23 5:49 PM, Andrew Cooper wrote: >>>> On 22/06/2023 9:57 pm, Shawn Anastasio wrote: >>>>> --- /dev/null >>>>> +++ b/xen/arch/ppc/setup.c >>>>> @@ -0,0 +1,13 @@ >>>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */ >>>>> +#include <xen/init.h> >>>>> + >>>>> +void __init noreturn start_xen(unsigned long r3, unsigned long r4, >>>>> + unsigned long r5, unsigned long r6, >>>>> + unsigned long r7) >>>>> +{ >>>>> + for ( ;; ) >>>>> + /* Set current hardware thread to very low priority */ >>>>> + asm volatile("or %r31, %r31, %r31"); >>>> >>>> Is there something magic about the OR instruction, or something magic >>>> about %r31? >>> >>> Using the OR instruction with all three operands equal is of course a >>> no-op, but when using certain registers it can have a separate magic >>> side effect. >>> >>> `or r31,31,31` is one such form that sets the Program Priority Register >>> to "very low" priority. Of course here where we don't have SMP going >>> there's not much point in using this over the standard side effect-less >>> no-op (`or r0,r0,r0` or just `nop`). >>> >>> For a table of these magic OR forms, you can see page 836 of the Power >>> ISA 3.0B: >>> https://wiki.raptorcs.com/w/images/c/cb/PowerISA_public.v3.0B.pdf >> >> I have 3.1 to hand, and it looks like they were dropped from there? >> Otherwise I was meaning to say that it's a shame gas doesn't support >> these. > > No, they're still present in ISA 3.1B. See page 1084, Book II Chapter > 3 Section 2. Ah, I see. I was searching for the pseudo mnemonics that I had found elsewhere at some point (which are what I would hope gas could support, but then of course they first need to be mentioned in the doc). >> Finally, as a nit: Style above is lacking several blanks. One >> between the two semicolons, and a total of three in the asm(). > > Just to be sure, would the following be correct? > > for ( ; ; ) > /* Set current hardware thread to very low priority */ > asm volatile ( "or %r31, %r31, %r31" ); > > Not including the refactor of that instruction to a macro, of course. Yes, this looks correct now. Our style is perhaps a little unusual; at least I'm not aware of another project using the same, although I have the vague feeling that someone once mentioned a possible origin. Jan
On 6/22/23 8:26 PM, Shawn Anastasio wrote: > On 6/22/23 5:49 PM, Andrew Cooper wrote: >> On 22/06/2023 9:57 pm, Shawn Anastasio wrote: >>> Update ppc64/head.S to set up an initial boot stack, zero the .bss >>> section, and jump to C. >>> >>> Also refactor the endian fixup trampoline into its own macro, since it >>> will need to be used in multiple places, including every time we make a >>> call into firmware (see next commit). >>> >>> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> >> >> Thankyou for making this change - it definitely simplifies things. > > No problem. > >> I've done a bit of reading around, and it's rather sad that things prior >> to Power10 don't have PC-relative addressing. So the LOAD_IMM64()'s do >> look necessary for the stack and bss. I guess that means we can't be >> sensibly be position independent in head.S? > > Prior to the introduction of pcrel loads/stores in P10, PIC is achieved > using a Table of Contents (TOC) whose base address is kept in r2 and can > be used as a relative base to address other symbols. I don't have -fPIC > enabled in this series yet (it's in the series I was hoping to submit > after this one), so for now I'm just loading the addresses as immediates > directly. > >> Also, why store 0 onto the stack ? > > On the ELFv2 ABI which we're using here, sp+0 is reserved for the "back > chain" pointer which is used to store the address of the caller's stack > frame and is used to support backtraces. > > At the top of the stack, we need to make sure the first back chain > pointer is zero to ensure that anything walking the stack via these > pointers eventually terminates. > >>> + >>> + /* clear .bss */ >>> + LOAD_IMM64(%r14, __bss_start) >>> + LOAD_IMM64(%r15, __bss_end) >>> +1: >>> + std %r11, 0(%r14) >>> + addi %r14, %r14, 8 >>> + cmpld %r14, %r15 >>> + blt 1b >> >> This loop is correct, except for the corner case of this patch alone, >> where the BSS is empty and this will write one word out-of-bounds. >> >> For RISC-V, we put a temporary "char bss_tmp;" in setup.c, and I suggest >> you do the same here, deleting it at a later point when there's real >> data in the bss. > > Good catch. I actually introduce a .bss variable in patch 2 of this > series, so perhaps it would make the most sense for me to move this loop > to that patch? > > Also it might make sense to have an assert in the linker script checking > that sizeof(.bss) > 0, though past early bring-up an empty .bss is > probably a pretty unlikely scenario... > >>> + >>> + /* call the C entrypoint */ >>> + LOAD_IMM64(%r12, start_xen) >>> + mtctr %r12 >>> + bctrl >> >> Why is this a LOAD_IMM64(), and not just: >> >> b start_xen >> >> ? From the same reading around, PPC64 seems to have +/- 32M addressing >> for branches, and the entire x86 hypervisor (.init included) is within 3M. > > Good question. You're right that here it's entirely unnecessary. Once we > enable -fPIC, though, the calling convention for functions changes a bit > and necessitates that in certain scenarios r12 contains the entrypoint > of the function being called. Turns out I was actually wrong here -- changing the indirect load + branch to a direct branch does actually break the code here, but for a reason other than what I anticipated. Even with PIC disabled, r2 needs to contain a valid TOC pointer. The function address doesn't need to be contained in r12 to calculate it since it's an absolute address rather than function-relative, but using a simple direct branch here causes the linker to skip past the TOC calculation prologue in `start_xen` as an optimization, since it assumes that r2 is already set up. Since we haven't set up r2, though, this results in the program using a garbage TOC pointer at run-time. I'll just set up the TOC before making the call in `start` to fix this. Thanks, Shawn
diff --git a/xen/arch/ppc/Makefile b/xen/arch/ppc/Makefile index 98220648af..fdbcd730d0 100644 --- a/xen/arch/ppc/Makefile +++ b/xen/arch/ppc/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_PPC64) += ppc64/ +obj-y += setup.o $(TARGET): $(TARGET)-syms cp -f $< $@ diff --git a/xen/arch/ppc/include/asm/asm-defns.h b/xen/arch/ppc/include/asm/asm-defns.h new file mode 100644 index 0000000000..3db2063a39 --- /dev/null +++ b/xen/arch/ppc/include/asm/asm-defns.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_PPC_ASM_DEFNS_H +#define _ASM_PPC_ASM_DEFNS_H + +/* + * Load a 64-bit immediate value into the specified GPR. + */ +#define LOAD_IMM64(reg, val) \ + lis reg, (val) @highest; \ + ori reg, reg, (val) @higher; \ + rldicr reg, reg, 32, 31; \ + oris reg, reg, (val) @h; \ + ori reg, reg, (val) @l; + +/* + * Depending on how we were booted, the CPU could be running in either + * Little Endian or Big Endian mode. The following trampoline from Linux + * cleverly uses an instruction that encodes to a NOP if the CPU's + * endianness matches the assumption of the assembler (LE, in our case) + * or a branch to code that performs the endian switch in the other case. + */ +#define FIXUP_ENDIAN \ + tdi 0, 0, 0x48; /* Reverse endian of b . + 8 */ \ + b . + 44; /* Skip trampoline if endian is good */ \ + .long 0xa600607d; /* mfmsr r11 */ \ + .long 0x01006b69; /* xori r11,r11,1 */ \ + .long 0x00004039; /* li r10,0 */ \ + .long 0x6401417d; /* mtmsrd r10,1 */ \ + .long 0x05009f42; /* bcl 20,31,$+4 */ \ + .long 0xa602487d; /* mflr r10 */ \ + .long 0x14004a39; /* addi r10,r10,20 */ \ + .long 0xa6035a7d; /* mtsrr0 r10 */ \ + .long 0xa6037b7d; /* mtsrr1 r11 */ \ + .long 0x2400004c /* rfid */ + +#endif /* _ASM_PPC_ASM_DEFNS_H */ diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S index 2fcefb40d8..a7db5b7de2 100644 --- a/xen/arch/ppc/ppc64/head.S +++ b/xen/arch/ppc/ppc64/head.S @@ -1,30 +1,45 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <asm/asm-defns.h> + .section .text.header, "ax", %progbits ENTRY(start) /* - * Depending on how we were booted, the CPU could be running in either - * Little Endian or Big Endian mode. The following trampoline from Linux - * cleverly uses an instruction that encodes to a NOP if the CPU's - * endianness matches the assumption of the assembler (LE, in our case) - * or a branch to code that performs the endian switch in the other case. + * NOTE: argument registers (r3-r9) must be preserved until the C entrypoint */ - tdi 0, 0, 0x48 /* Reverse endian of b . + 8 */ - b . + 44 /* Skip trampoline if endian is good */ - .long 0xa600607d /* mfmsr r11 */ - .long 0x01006b69 /* xori r11,r11,1 */ - .long 0x00004039 /* li r10,0 */ - .long 0x6401417d /* mtmsrd r10,1 */ - .long 0x05009f42 /* bcl 20,31,$+4 */ - .long 0xa602487d /* mflr r10 */ - .long 0x14004a39 /* addi r10,r10,20 */ - .long 0xa6035a7d /* mtsrr0 r10 */ - .long 0xa6037b7d /* mtsrr1 r11 */ - .long 0x2400004c /* rfid */ - - /* Now that the endianness is confirmed, continue */ -1: b 1b + FIXUP_ENDIAN + + /* set up the initial stack */ + LOAD_IMM64(%r1, cpu0_boot_stack) + li %r11, 0 + std %r11, 0(%r1) + + /* clear .bss */ + LOAD_IMM64(%r14, __bss_start) + LOAD_IMM64(%r15, __bss_end) +1: + std %r11, 0(%r14) + addi %r14, %r14, 8 + cmpld %r14, %r15 + blt 1b + + /* call the C entrypoint */ + LOAD_IMM64(%r12, start_xen) + mtctr %r12 + bctrl + + /* should never return */ + trap .size start, . - start .type start, %function + + .section .init.data, "aw", @progbits + + /* Early init stack */ + .p2align 4 +cpu0_boot_stack_bottom: + .space STACK_SIZE +cpu0_boot_stack: + .space STACK_FRAME_OVERHEAD diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c new file mode 100644 index 0000000000..4d1b72fa23 --- /dev/null +++ b/xen/arch/ppc/setup.c @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <xen/init.h> + +void __init noreturn start_xen(unsigned long r3, unsigned long r4, + unsigned long r5, unsigned long r6, + unsigned long r7) +{ + for ( ;; ) + /* Set current hardware thread to very low priority */ + asm volatile("or %r31, %r31, %r31"); + + unreachable(); +}
Update ppc64/head.S to set up an initial boot stack, zero the .bss section, and jump to C. Also refactor the endian fixup trampoline into its own macro, since it will need to be used in multiple places, including every time we make a call into firmware (see next commit). Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/Makefile | 1 + xen/arch/ppc/include/asm/asm-defns.h | 36 ++++++++++++++++++ xen/arch/ppc/ppc64/head.S | 55 ++++++++++++++++++---------- xen/arch/ppc/setup.c | 13 +++++++ 4 files changed, 85 insertions(+), 20 deletions(-) create mode 100644 xen/arch/ppc/include/asm/asm-defns.h create mode 100644 xen/arch/ppc/setup.c