diff mbox series

[v3,1/3] xen/ppc: Set up a basic C environment

Message ID 8c79f498ffcb0837ff18640dbaf5b66f79186574.1688670018.git.sanastasio@raptorengineering.com (mailing list archive)
State Superseded
Headers show
Series Early serial on Power | expand

Commit Message

Shawn Anastasio July 6, 2023, 7:04 p.m. UTC
Update ppc64/head.S to set up an initial boot stack, zero the .bss
section, and jump to C.

Also refactor the endian fixup trampoline into its own macro, since it
will need to be used in multiple places, including every time we make a
call into firmware (see next commit).

Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com>
---
 xen/arch/ppc/Makefile                |  2 ++
 xen/arch/ppc/include/asm/asm-defns.h | 40 ++++++++++++++++++++++++++++
 xen/arch/ppc/include/asm/config.h    |  2 +-
 xen/arch/ppc/ppc64/head.S            | 40 ++++++++++++++--------------
 xen/arch/ppc/setup.c                 | 19 +++++++++++++
 5 files changed, 82 insertions(+), 21 deletions(-)
 create mode 100644 xen/arch/ppc/include/asm/asm-defns.h
 create mode 100644 xen/arch/ppc/setup.c

Comments

Jan Beulich July 17, 2023, 3:38 p.m. UTC | #1
On 06.07.2023 21:04, Shawn Anastasio wrote:
> --- a/xen/arch/ppc/include/asm/config.h
> +++ b/xen/arch/ppc/include/asm/config.h
> @@ -43,7 +43,7 @@
>  
>  #define SMP_CACHE_BYTES (1 << 6)
>  
> -#define STACK_ORDER 2
> +#define STACK_ORDER 0
>  #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)

In which way is this related to the change at hand? Aren't you going to
need to undo this rather sooner than later?

> --- a/xen/arch/ppc/ppc64/head.S
> +++ b/xen/arch/ppc/ppc64/head.S
> @@ -1,30 +1,30 @@
>  /* SPDX-License-Identifier: GPL-2.0-or-later */
>  
> +#include <asm/asm-defns.h>
> +
>      .section .text.header, "ax", %progbits
>  
>  ENTRY(start)
>      /*
> -     * Depending on how we were booted, the CPU could be running in either
> -     * Little Endian or Big Endian mode. The following trampoline from Linux
> -     * cleverly uses an instruction that encodes to a NOP if the CPU's
> -     * endianness matches the assumption of the assembler (LE, in our case)
> -     * or a branch to code that performs the endian switch in the other case.
> +     * NOTE: argument registers (r3-r9) must be preserved until the C entrypoint
>       */
> -    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
> -    b . + 44          /* Skip trampoline if endian is good  */
> -    .long 0xa600607d  /* mfmsr r11                          */
> -    .long 0x01006b69  /* xori r11,r11,1                     */
> -    .long 0x00004039  /* li r10,0                           */
> -    .long 0x6401417d  /* mtmsrd r10,1                       */
> -    .long 0x05009f42  /* bcl 20,31,$+4                      */
> -    .long 0xa602487d  /* mflr r10                           */
> -    .long 0x14004a39  /* addi r10,r10,20                    */
> -    .long 0xa6035a7d  /* mtsrr0 r10                         */
> -    .long 0xa6037b7d  /* mtsrr1 r11                         */
> -    .long 0x2400004c  /* rfid                               */
> -
> -    /* Now that the endianness is confirmed, continue */
> -1:  b 1b
> +    FIXUP_ENDIAN
> +
> +    /* set up the TOC pointer */
> +    LOAD_IMM32(%r2, .TOC.)
> +
> +    /* set up the initial stack */
> +    LOAD_IMM32(%r1, cpu0_boot_stack)

Wouldn't this (and perhaps also .TOC.) better be calculated in a
PC-relative manner? Or is the plan to have Xen linked to an address
below 4Gb?

> +    li %r11, 0
> +    stdu %r11, -STACK_FRAME_OVERHEAD(%r1)
> +
> +    /* call the C entrypoint */
> +    bl start_xen
> +
> +    /* should never return */
> +    trap
>  
>      .size start, . - start
>      .type start, %function
> +
> +    .section .init.data, "aw", @progbits

What's this good for when no data follows?

> --- /dev/null
> +++ b/xen/arch/ppc/setup.c
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#include <xen/init.h>
> +
> +/* Xen stack for bringing up the first CPU. */
> +unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE);

This yields the entire array as zero-initialized. At which point I
don't see a need for the store in head.S.

> +/* Macro to adjust thread priority for hardware multithreading */
> +#define HMT_very_low()  asm volatile (" or %r31, %r31, %r31 ")

Nit: Style. Wants to be

#define HMT_very_low()  asm volatile ( "or %r31, %r31, %r31" )

Jan
Shawn Anastasio July 17, 2023, 4 p.m. UTC | #2
On 7/17/23 10:38 AM, Jan Beulich wrote:
> On 06.07.2023 21:04, Shawn Anastasio wrote:
>> --- a/xen/arch/ppc/include/asm/config.h
>> +++ b/xen/arch/ppc/include/asm/config.h
>> @@ -43,7 +43,7 @@
>>  
>>  #define SMP_CACHE_BYTES (1 << 6)
>>  
>> -#define STACK_ORDER 2
>> +#define STACK_ORDER 0
>>  #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
> 
> In which way is this related to the change at hand? Aren't you going to
> need to undo this rather sooner than later?

I noticed the stack order being too large when I moved the stack
declaration to .c per Andrew's recommendation for v2. Since we're using
64k pages, I don't see why the stack would need to be this big. A quick
look at ARM shows they have a stack order of 3, which would yield a
stack size of just 32k.

>> --- a/xen/arch/ppc/ppc64/head.S
>> +++ b/xen/arch/ppc/ppc64/head.S
>> @@ -1,30 +1,30 @@
>>  /* SPDX-License-Identifier: GPL-2.0-or-later */
>>  
>> +#include <asm/asm-defns.h>
>> +
>>      .section .text.header, "ax", %progbits
>>  
>>  ENTRY(start)
>>      /*
>> -     * Depending on how we were booted, the CPU could be running in either
>> -     * Little Endian or Big Endian mode. The following trampoline from Linux
>> -     * cleverly uses an instruction that encodes to a NOP if the CPU's
>> -     * endianness matches the assumption of the assembler (LE, in our case)
>> -     * or a branch to code that performs the endian switch in the other case.
>> +     * NOTE: argument registers (r3-r9) must be preserved until the C entrypoint
>>       */
>> -    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
>> -    b . + 44          /* Skip trampoline if endian is good  */
>> -    .long 0xa600607d  /* mfmsr r11                          */
>> -    .long 0x01006b69  /* xori r11,r11,1                     */
>> -    .long 0x00004039  /* li r10,0                           */
>> -    .long 0x6401417d  /* mtmsrd r10,1                       */
>> -    .long 0x05009f42  /* bcl 20,31,$+4                      */
>> -    .long 0xa602487d  /* mflr r10                           */
>> -    .long 0x14004a39  /* addi r10,r10,20                    */
>> -    .long 0xa6035a7d  /* mtsrr0 r10                         */
>> -    .long 0xa6037b7d  /* mtsrr1 r11                         */
>> -    .long 0x2400004c  /* rfid                               */
>> -
>> -    /* Now that the endianness is confirmed, continue */
>> -1:  b 1b
>> +    FIXUP_ENDIAN
>> +
>> +    /* set up the TOC pointer */
>> +    LOAD_IMM32(%r2, .TOC.)
>> +
>> +    /* set up the initial stack */
>> +    LOAD_IMM32(%r1, cpu0_boot_stack)
> 
> Wouldn't this (and perhaps also .TOC.) better be calculated in a
> PC-relative manner? Or is the plan to have Xen linked to an address
> below 4Gb?

As mentioned previously, I am planning to enable the PIC code model in
my next series in order to accommodate booting on the PowerNV firmware
type which has a different load address. That patch will change the
initial TOC load to a simulated PC-relative one (pre-POWER10 doesn't
have proper PC-relative loads/stores) and the rest to TOC-relative.

>> +    li %r11, 0
>> +    stdu %r11, -STACK_FRAME_OVERHEAD(%r1)
>> +
>> +    /* call the C entrypoint */
>> +    bl start_xen
>> +
>> +    /* should never return */
>> +    trap
>>  
>>      .size start, . - start
>>      .type start, %function
>> +
>> +    .section .init.data, "aw", @progbits
> 
> What's this good for when no data follows?

Good catch. With the stack no longer declared in head.S this is
unnecessary. Will remove in v3.

>> --- /dev/null
>> +++ b/xen/arch/ppc/setup.c
>> @@ -0,0 +1,19 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +#include <xen/init.h>
>> +
>> +/* Xen stack for bringing up the first CPU. */
>> +unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE);
> 
> This yields the entire array as zero-initialized. At which point I
> don't see a need for the store in head.S.

Okay, fair enough. Given that the array is zero-initialized the stdu
could be replaced with an `addi %r1, %r1, -STACK_FRAME_OVERHEAD`, and
the load of zero to %r11 could be deferred to the second patch in this
series where it's used in the .bss clearing loop.

That said I don't really see the harm with keeping the standard
idiomatic `stdu` for the initial stack frame setup. Other than
performance, which isn't a concern here in early setup code.

>> +/* Macro to adjust thread priority for hardware multithreading */
>> +#define HMT_very_low()  asm volatile (" or %r31, %r31, %r31 ")
> 
> Nit: Style. Wants to be
> 
> #define HMT_very_low()  asm volatile ( "or %r31, %r31, %r31" )

Will fix in v3.

> Jan

Thanks,
Shawn
Jan Beulich July 17, 2023, 4:24 p.m. UTC | #3
On 17.07.2023 18:00, Shawn Anastasio wrote:
> On 7/17/23 10:38 AM, Jan Beulich wrote:
>> On 06.07.2023 21:04, Shawn Anastasio wrote:
>>> --- a/xen/arch/ppc/include/asm/config.h
>>> +++ b/xen/arch/ppc/include/asm/config.h
>>> @@ -43,7 +43,7 @@
>>>  
>>>  #define SMP_CACHE_BYTES (1 << 6)
>>>  
>>> -#define STACK_ORDER 2
>>> +#define STACK_ORDER 0
>>>  #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
>>
>> In which way is this related to the change at hand? Aren't you going to
>> need to undo this rather sooner than later?
> 
> I noticed the stack order being too large when I moved the stack
> declaration to .c per Andrew's recommendation for v2. Since we're using
> 64k pages, I don't see why the stack would need to be this big. A quick
> look at ARM shows they have a stack order of 3, which would yield a
> stack size of just 32k.

Oh, I forgot page size is 64k. May I suggest to have a BUILD_BUG_ON()
somewhere, such that switching to e.g. 4k pages (the need for which
cannot be ruled out yet) will make obvious that an adjustment is
necessary. (Alternatively accommodate this case here right away.)

>>> --- a/xen/arch/ppc/ppc64/head.S
>>> +++ b/xen/arch/ppc/ppc64/head.S
>>> @@ -1,30 +1,30 @@
>>>  /* SPDX-License-Identifier: GPL-2.0-or-later */
>>>  
>>> +#include <asm/asm-defns.h>
>>> +
>>>      .section .text.header, "ax", %progbits
>>>  
>>>  ENTRY(start)
>>>      /*
>>> -     * Depending on how we were booted, the CPU could be running in either
>>> -     * Little Endian or Big Endian mode. The following trampoline from Linux
>>> -     * cleverly uses an instruction that encodes to a NOP if the CPU's
>>> -     * endianness matches the assumption of the assembler (LE, in our case)
>>> -     * or a branch to code that performs the endian switch in the other case.
>>> +     * NOTE: argument registers (r3-r9) must be preserved until the C entrypoint
>>>       */
>>> -    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
>>> -    b . + 44          /* Skip trampoline if endian is good  */
>>> -    .long 0xa600607d  /* mfmsr r11                          */
>>> -    .long 0x01006b69  /* xori r11,r11,1                     */
>>> -    .long 0x00004039  /* li r10,0                           */
>>> -    .long 0x6401417d  /* mtmsrd r10,1                       */
>>> -    .long 0x05009f42  /* bcl 20,31,$+4                      */
>>> -    .long 0xa602487d  /* mflr r10                           */
>>> -    .long 0x14004a39  /* addi r10,r10,20                    */
>>> -    .long 0xa6035a7d  /* mtsrr0 r10                         */
>>> -    .long 0xa6037b7d  /* mtsrr1 r11                         */
>>> -    .long 0x2400004c  /* rfid                               */
>>> -
>>> -    /* Now that the endianness is confirmed, continue */
>>> -1:  b 1b
>>> +    FIXUP_ENDIAN
>>> +
>>> +    /* set up the TOC pointer */
>>> +    LOAD_IMM32(%r2, .TOC.)
>>> +
>>> +    /* set up the initial stack */
>>> +    LOAD_IMM32(%r1, cpu0_boot_stack)
>>
>> Wouldn't this (and perhaps also .TOC.) better be calculated in a
>> PC-relative manner? Or is the plan to have Xen linked to an address
>> below 4Gb?
> 
> As mentioned previously, I am planning to enable the PIC code model in
> my next series in order to accommodate booting on the PowerNV firmware
> type which has a different load address. That patch will change the
> initial TOC load to a simulated PC-relative one (pre-POWER10 doesn't
> have proper PC-relative loads/stores) and the rest to TOC-relative.

Okay. Perhaps worth mentioning in the description, so the question
won't need asking again. What about addresses being confined to 32
bits, though?

>>> --- /dev/null
>>> +++ b/xen/arch/ppc/setup.c
>>> @@ -0,0 +1,19 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>>> +#include <xen/init.h>
>>> +
>>> +/* Xen stack for bringing up the first CPU. */
>>> +unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE);
>>
>> This yields the entire array as zero-initialized. At which point I
>> don't see a need for the store in head.S.
> 
> Okay, fair enough. Given that the array is zero-initialized the stdu
> could be replaced with an `addi %r1, %r1, -STACK_FRAME_OVERHEAD`, and
> the load of zero to %r11 could be deferred to the second patch in this
> series where it's used in the .bss clearing loop.
> 
> That said I don't really see the harm with keeping the standard
> idiomatic `stdu` for the initial stack frame setup. Other than
> performance, which isn't a concern here in early setup code.

I'm not going to insist that you switch, but as you can see this can
raise questions.

Jan
diff mbox series

Patch

diff --git a/xen/arch/ppc/Makefile b/xen/arch/ppc/Makefile
index 98220648af..530fba2121 100644
--- a/xen/arch/ppc/Makefile
+++ b/xen/arch/ppc/Makefile
@@ -1,5 +1,7 @@ 
 obj-$(CONFIG_PPC64) += ppc64/
 
+obj-y += setup.o
+
 $(TARGET): $(TARGET)-syms
 	cp -f $< $@
 
diff --git a/xen/arch/ppc/include/asm/asm-defns.h b/xen/arch/ppc/include/asm/asm-defns.h
new file mode 100644
index 0000000000..6ea35f6edb
--- /dev/null
+++ b/xen/arch/ppc/include/asm/asm-defns.h
@@ -0,0 +1,40 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_PPC_ASM_DEFNS_H
+#define _ASM_PPC_ASM_DEFNS_H
+
+/*
+ * Load a 64-bit immediate value into the specified GPR.
+ */
+#define LOAD_IMM64(reg, val)                                                 \
+    lis reg, (val) @highest;                                                 \
+    ori reg, reg, (val) @higher;                                             \
+    rldicr reg, reg, 32, 31;                                                 \
+    oris reg, reg, (val) @h;                                                 \
+    ori reg, reg, (val) @l;
+
+#define LOAD_IMM32(reg, val)                                                 \
+    lis reg, (val) @h;                                                       \
+    ori reg, reg, (val) @l;                                                  \
+
+/*
+ * Depending on how we were booted, the CPU could be running in either
+ * Little Endian or Big Endian mode. The following trampoline from Linux
+ * cleverly uses an instruction that encodes to a NOP if the CPU's
+ * endianness matches the assumption of the assembler (LE, in our case)
+ * or a branch to code that performs the endian switch in the other case.
+ */
+#define FIXUP_ENDIAN                                                           \
+    tdi 0, 0, 0x48;   /* Reverse endian of b . + 8          */                 \
+    b . + 44;         /* Skip trampoline if endian is good  */                 \
+    .long 0xa600607d; /* mfmsr r11                          */                 \
+    .long 0x01006b69; /* xori r11,r11,1                     */                 \
+    .long 0x00004039; /* li r10,0                           */                 \
+    .long 0x6401417d; /* mtmsrd r10,1                       */                 \
+    .long 0x05009f42; /* bcl 20,31,$+4                      */                 \
+    .long 0xa602487d; /* mflr r10                           */                 \
+    .long 0x14004a39; /* addi r10,r10,20                    */                 \
+    .long 0xa6035a7d; /* mtsrr0 r10                         */                 \
+    .long 0xa6037b7d; /* mtsrr1 r11                         */                 \
+    .long 0x2400004c  /* rfid                               */
+
+#endif /* _ASM_PPC_ASM_DEFNS_H */
diff --git a/xen/arch/ppc/include/asm/config.h b/xen/arch/ppc/include/asm/config.h
index b9a6814f00..01ca5d0803 100644
--- a/xen/arch/ppc/include/asm/config.h
+++ b/xen/arch/ppc/include/asm/config.h
@@ -43,7 +43,7 @@ 
 
 #define SMP_CACHE_BYTES (1 << 6)
 
-#define STACK_ORDER 2
+#define STACK_ORDER 0
 #define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
 
 /* 288 bytes below the stack pointer must be preserved by interrupt handlers */
diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
index 2fcefb40d8..7d848611cc 100644
--- a/xen/arch/ppc/ppc64/head.S
+++ b/xen/arch/ppc/ppc64/head.S
@@ -1,30 +1,30 @@ 
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include <asm/asm-defns.h>
+
     .section .text.header, "ax", %progbits
 
 ENTRY(start)
     /*
-     * Depending on how we were booted, the CPU could be running in either
-     * Little Endian or Big Endian mode. The following trampoline from Linux
-     * cleverly uses an instruction that encodes to a NOP if the CPU's
-     * endianness matches the assumption of the assembler (LE, in our case)
-     * or a branch to code that performs the endian switch in the other case.
+     * NOTE: argument registers (r3-r9) must be preserved until the C entrypoint
      */
-    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
-    b . + 44          /* Skip trampoline if endian is good  */
-    .long 0xa600607d  /* mfmsr r11                          */
-    .long 0x01006b69  /* xori r11,r11,1                     */
-    .long 0x00004039  /* li r10,0                           */
-    .long 0x6401417d  /* mtmsrd r10,1                       */
-    .long 0x05009f42  /* bcl 20,31,$+4                      */
-    .long 0xa602487d  /* mflr r10                           */
-    .long 0x14004a39  /* addi r10,r10,20                    */
-    .long 0xa6035a7d  /* mtsrr0 r10                         */
-    .long 0xa6037b7d  /* mtsrr1 r11                         */
-    .long 0x2400004c  /* rfid                               */
-
-    /* Now that the endianness is confirmed, continue */
-1:  b 1b
+    FIXUP_ENDIAN
+
+    /* set up the TOC pointer */
+    LOAD_IMM32(%r2, .TOC.)
+
+    /* set up the initial stack */
+    LOAD_IMM32(%r1, cpu0_boot_stack)
+    li %r11, 0
+    stdu %r11, -STACK_FRAME_OVERHEAD(%r1)
+
+    /* call the C entrypoint */
+    bl start_xen
+
+    /* should never return */
+    trap
 
     .size start, . - start
     .type start, %function
+
+    .section .init.data, "aw", @progbits
diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c
new file mode 100644
index 0000000000..73106474b2
--- /dev/null
+++ b/xen/arch/ppc/setup.c
@@ -0,0 +1,19 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <xen/init.h>
+
+/* Xen stack for bringing up the first CPU. */
+unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE);
+
+/* Macro to adjust thread priority for hardware multithreading */
+#define HMT_very_low()  asm volatile (" or %r31, %r31, %r31 ")
+
+void __init noreturn start_xen(unsigned long r3, unsigned long r4,
+                               unsigned long r5, unsigned long r6,
+                               unsigned long r7)
+{
+    for ( ; ; )
+        /* Set current hardware thread to very low priority */
+        HMT_very_low();
+
+    unreachable();
+}