diff mbox series

[v4,2/4] xen: Add files needed for minimal ppc64le build

Message ID 97a72e26edafb1d7b3a583755f015d04066c1e53.1686936278.git.sanastasio@raptorengineering.com (mailing list archive)
State Superseded
Headers show
Series Initial support for Power | expand

Commit Message

Shawn Anastasio June 16, 2023, 5:48 p.m. UTC
Add the build system changes required to build for ppc64le (POWER8+).
As of now the resulting image simply boots to an infinite loop.

$ make XEN_TARGET_ARCH=ppc64 -C xen openpower_defconfig
$ make XEN_TARGET_ARCH=ppc64 SUBSYSTEMS=xen -C xen build

This port targets POWER8+ CPUs running in Little Endian mode specifically,
and does not boot on older machines. Additionally, this initial skeleton
only implements the PaPR/pseries boot protocol which allows it to be
booted in a standard QEMU virtual machine:

$ qemu-system-ppc64 -M pseries-5.2 -m 256M -kernel xen/xen

Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com>
---
 config/ppc64.mk                          |   5 +
 xen/Makefile                             |   5 +-
 xen/arch/ppc/Kconfig                     |  42 ++++++
 xen/arch/ppc/Kconfig.debug               |   0
 xen/arch/ppc/Makefile                    |  16 +++
 xen/arch/ppc/Rules.mk                    |   0
 xen/arch/ppc/arch.mk                     |  12 ++
 xen/arch/ppc/configs/openpower_defconfig |  13 ++
 xen/arch/ppc/include/asm/config.h        |  63 +++++++++
 xen/arch/ppc/include/asm/page-bits.h     |   7 +
 xen/arch/ppc/ppc64/Makefile              |   1 +
 xen/arch/ppc/ppc64/asm-offsets.c         |   0
 xen/arch/ppc/ppc64/head.S                |  27 ++++
 xen/arch/ppc/xen.lds.S                   | 172 +++++++++++++++++++++++
 14 files changed, 361 insertions(+), 2 deletions(-)
 create mode 100644 config/ppc64.mk
 create mode 100644 xen/arch/ppc/Kconfig
 create mode 100644 xen/arch/ppc/Kconfig.debug
 create mode 100644 xen/arch/ppc/Makefile
 create mode 100644 xen/arch/ppc/Rules.mk
 create mode 100644 xen/arch/ppc/arch.mk
 create mode 100644 xen/arch/ppc/configs/openpower_defconfig
 create mode 100644 xen/arch/ppc/include/asm/config.h
 create mode 100644 xen/arch/ppc/include/asm/page-bits.h
 create mode 100644 xen/arch/ppc/ppc64/Makefile
 create mode 100644 xen/arch/ppc/ppc64/asm-offsets.c
 create mode 100644 xen/arch/ppc/ppc64/head.S
 create mode 100644 xen/arch/ppc/xen.lds.S

Comments

Andrew Cooper June 16, 2023, 8:24 p.m. UTC | #1
On 16/06/2023 6:48 pm, Shawn Anastasio wrote:
> Add the build system changes required to build for ppc64le (POWER8+).
> As of now the resulting image simply boots to an infinite loop.
>
> $ make XEN_TARGET_ARCH=ppc64 -C xen openpower_defconfig
> $ make XEN_TARGET_ARCH=ppc64 SUBSYSTEMS=xen -C xen build

I think the first of these isn't needed, given the config ARCH_DEFCONFIG
default.  I'd suggest dropping it.

On the second, what is the SUBSYSTEMS=xen?  It's not needed given the
stripped down build system, but I don't see why we'd ever be compiling
Xen with some kind of subsystem configuration for something else.

> diff --git a/config/ppc64.mk b/config/ppc64.mk
> new file mode 100644
> index 0000000000..597f0668c3
> --- /dev/null
> +++ b/config/ppc64.mk
> @@ -0,0 +1,5 @@
> +CONFIG_PPC := y
> +CONFIG_PPC64 := y
> +CONFIG_PPC_$(XEN_OS) := y

I know you're copying the existing architectures, but I'm pretty certain
these $(XEN_OS) expressions are pretty bogus.  The userspace stuff in
tools/ may need to know the host OS it's being built for, but Xen really
doesn't.

I'm pretty sure it will compile with this dropped, so please do.  I'll
see about patching it out of the other architectures.

> diff --git a/xen/arch/ppc/Kconfig b/xen/arch/ppc/Kconfig
> new file mode 100644
> index 0000000000..a0a70adef4
> --- /dev/null
> +++ b/xen/arch/ppc/Kconfig
> @@ -0,0 +1,42 @@
> +config PPC
> +	def_bool y
> +
> +config PPC64
> +	def_bool y
> +	select 64BIT
> +
> +config ARCH_DEFCONFIG
> +	string
> +	default "arch/ppc/configs/openpower_defconfig"
> +
> +menu "Architecture Features"
> +
> +source "arch/Kconfig"
> +
> +endmenu
> +
> +menu "ISA Selection"
> +
> +choice
> +	prompt "Base ISA"
> +	default POWER_ISA_2_07B if PPC64
> +	help
> +	  This selects the base ISA version that Xen will target.
> +
> +config POWER_ISA_2_07B
> +	bool "Power ISA 2.07B"
> +	help
> +	  Target version 2.07B of the Power ISA (POWER8)
> +
> +config POWER_ISA_3_00
> +	bool "Power ISA 3.00"
> +	help
> +	  Target version 3.00 of the Power ISA (POWER9)

For both of these, it will be helpful for anyone who isn't as
PPC-knowledgeable if the POWER8/9 was in the title too, seeing as
they're the most common name.

But as I'm a noob here too, how different are Power8 and 9?  Given they
share a head.S, they're presumably not too disjoint in terms of ISA.

While being able to target a specific CPU is something we're trying to
retrofit to Xen, by default we do expect it to run on as broad a set of
systems as possible.

If that's not feasible, then fine, but if it is, it ought to be the
default.  Which might be as simple as saying "or later" somewhere in
this text, or might be a giant can of worms that I shouldn't open...
 
> diff --git a/xen/arch/ppc/include/asm/page-bits.h b/xen/arch/ppc/include/asm/page-bits.h
> new file mode 100644
> index 0000000000..4c01bf9716
> --- /dev/null
> +++ b/xen/arch/ppc/include/asm/page-bits.h
> @@ -0,0 +1,7 @@
> +#ifndef __PPC_PAGE_BITS_H__
> +#define __PPC_PAGE_BITS_H__
> +
> +#define PAGE_SHIFT              16 /* 64 KiB Pages */
> +#define PADDR_BITS              48

Is 64k the minimum granularity?  Or is 4k an option?

I ask because Xen has some very short sighted ABIs which we're still
working on removing.  There are still quite a few expectations of
PAGE_SHIFT being 12.

To be clear, we're looking to fix all of these ABIs, but I suspect it
will be an easier lift to begin with at 4k.  (Or perhaps the right thing
is to double down and just get them fixed.)

> +
> +#endif /* __PPC_PAGE_BITS_H__ */
> diff --git a/xen/arch/ppc/ppc64/Makefile b/xen/arch/ppc/ppc64/Makefile
> new file mode 100644
> index 0000000000..3340058c08
> --- /dev/null
> +++ b/xen/arch/ppc/ppc64/Makefile
> @@ -0,0 +1 @@
> +obj-y += head.o
> diff --git a/xen/arch/ppc/ppc64/asm-offsets.c b/xen/arch/ppc/ppc64/asm-offsets.c
> new file mode 100644
> index 0000000000..e69de29bb2
> diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
> new file mode 100644
> index 0000000000..0b289c713a
> --- /dev/null
> +++ b/xen/arch/ppc/ppc64/head.S
> @@ -0,0 +1,27 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +.section .text.header, "ax", %progbits
> +
> +ENTRY(start)
> +    /*
> +     * Depending on how we were booted, the CPU could be running in either
> +     * Little Endian or Big Endian mode. The following trampoline from Linux
> +     * cleverly uses an instruction that encodes to a NOP if the CPU's
> +     * endianness matches the assumption of the assembler (LE, in our case)
> +     * or a branch to code that performs the endian switch in the other case.
> +     */
> +    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
> +    b . + 44          /* Skip trampoline if endian is good  */
> +    .long 0xa600607d  /* mfmsr r11                          */
> +    .long 0x01006b69  /* xori r11,r11,1                     */
> +    .long 0x00004039  /* li r10,0                           */
> +    .long 0x6401417d  /* mtmsrd r10,1                       */
> +    .long 0x05009f42  /* bcl 20,31,$+4                      */
> +    .long 0xa602487d  /* mflr r10                           */
> +    .long 0x14004a39  /* addi r10,r10,20                    */
> +    .long 0xa6035a7d  /* mtsrr0 r10                         */
> +    .long 0xa6037b7d  /* mtsrr1 r11                         */
> +    .long 0x2400004c  /* rfid                               */
> +
> +    /* Now that the endianness is confirmed, continue */
> +1:  b 1b

.size start, . - start
.type start, %function

Lets get the ELF metadata right from the start.

> diff --git a/xen/arch/ppc/xen.lds.S b/xen/arch/ppc/xen.lds.S
> new file mode 100644
> index 0000000000..a72e519c6a
> --- /dev/null
> +++ b/xen/arch/ppc/xen.lds.S
> @@ -0,0 +1,172 @@
> <snip>
> +    DISCARD_SECTIONS
> +
> +    STABS_DEBUG_SECTIONS
> +
> +    ELF_DETAILS_SECTIONS
> +}

In the other architectures, we now assert that sections such as .got are
empty, because we've had enough bugs in the past.

I'd recommend doing the same from the outset for all the dynamic
relocation sections, unless you're expecting to have to support them?

~Andrew
Andrew Cooper June 16, 2023, 8:27 p.m. UTC | #2
On 16/06/2023 6:48 pm, Shawn Anastasio wrote:
> diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
> new file mode 100644
> index 0000000000..0b289c713a
> --- /dev/null
> +++ b/xen/arch/ppc/ppc64/head.S
> @@ -0,0 +1,27 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +.section .text.header, "ax", %progbits
> +
> +ENTRY(start)
> +    /*
> +     * Depending on how we were booted, the CPU could be running in either
> +     * Little Endian or Big Endian mode. The following trampoline from Linux
> +     * cleverly uses an instruction that encodes to a NOP if the CPU's
> +     * endianness matches the assumption of the assembler (LE, in our case)
> +     * or a branch to code that performs the endian switch in the other case.
> +     */

Sorry, I also meant to ask.  How prevalent is Big Endian in practice in
the Power world?

It's another area (like 4k pages) where I expect there to be plenty of
fun to be had with the codebase.

~Andrew
Julien Grall June 16, 2023, 8:39 p.m. UTC | #3
Hi,

On 16/06/2023 21:24, Andrew Cooper wrote:
>> --- /dev/null
>> +++ b/xen/arch/ppc/ppc64/head.S
>> @@ -0,0 +1,27 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.section .text.header, "ax", %progbits
>> +
>> +ENTRY(start)
>> +    /*
>> +     * Depending on how we were booted, the CPU could be running in either
>> +     * Little Endian or Big Endian mode. The following trampoline from Linux
>> +     * cleverly uses an instruction that encodes to a NOP if the CPU's
>> +     * endianness matches the assumption of the assembler (LE, in our case)
>> +     * or a branch to code that performs the endian switch in the other case.
>> +     */
>> +    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
>> +    b . + 44          /* Skip trampoline if endian is good  */
>> +    .long 0xa600607d  /* mfmsr r11                          */
>> +    .long 0x01006b69  /* xori r11,r11,1                     */
>> +    .long 0x00004039  /* li r10,0                           */
>> +    .long 0x6401417d  /* mtmsrd r10,1                       */
>> +    .long 0x05009f42  /* bcl 20,31,$+4                      */
>> +    .long 0xa602487d  /* mflr r10                           */
>> +    .long 0x14004a39  /* addi r10,r10,20                    */
>> +    .long 0xa6035a7d  /* mtsrr0 r10                         */
>> +    .long 0xa6037b7d  /* mtsrr1 r11                         */
>> +    .long 0x2400004c  /* rfid                               */
>> +
>> +    /* Now that the endianness is confirmed, continue */
>> +1:  b 1b
> 
> .size start, . - start
> .type start, %function

Shouldn't we introduce ENDPROC()/END() to avoid open-coding these two 
lines everywhere?

Cheers,
Shawn Anastasio June 16, 2023, 9 p.m. UTC | #4
On 6/16/23 3:27 PM, Andrew Cooper wrote:
> Sorry, I also meant to ask.  How prevalent is Big Endian in practice in
> the Power world?

Modern systems support operating in either endianness, but historically
most operating systems targeted Big Endian-only, and some older systems
didn't support Little Endian at all.

These days Little Endian is more prevalent (at least in the Open Source
world), and many Linux distributions only target LE. Despite this though,
most firmware on Power systems still operates in Big Endian mode
exclusively,
so it's the responsibility of the kernel to handle the switch to LE at
its entrypoint.

The FW being BE also needs to be considered whenever the kernel calls
into firmware, since it's the responsibility of the kernel to switch to BE
before making the call, and also to switch itself back to LE after the FW
routine returns. Typically it's just handled by sprinkling this trampoline
(via a macro) at all of the entry/return points.

> It's another area (like 4k pages) where I expect there to be plenty of
> fun to be had with the codebase.

Hopefully the choice to run in Little Endian mode will reduce the number
of pain points encountered.

Also it's worth mentioning that both 4k and 64k pages are supported,
though 64k is probably the more common choice.

> ~Andrew

Thanks,
Shawn
Jan Beulich June 19, 2023, 8:01 a.m. UTC | #5
On 16.06.2023 22:39, Julien Grall wrote:
> On 16/06/2023 21:24, Andrew Cooper wrote:
>>> --- /dev/null
>>> +++ b/xen/arch/ppc/ppc64/head.S
>>> @@ -0,0 +1,27 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>>> +
>>> +.section .text.header, "ax", %progbits
>>> +
>>> +ENTRY(start)
>>> +    /*
>>> +     * Depending on how we were booted, the CPU could be running in either
>>> +     * Little Endian or Big Endian mode. The following trampoline from Linux
>>> +     * cleverly uses an instruction that encodes to a NOP if the CPU's
>>> +     * endianness matches the assumption of the assembler (LE, in our case)
>>> +     * or a branch to code that performs the endian switch in the other case.
>>> +     */
>>> +    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
>>> +    b . + 44          /* Skip trampoline if endian is good  */
>>> +    .long 0xa600607d  /* mfmsr r11                          */
>>> +    .long 0x01006b69  /* xori r11,r11,1                     */
>>> +    .long 0x00004039  /* li r10,0                           */
>>> +    .long 0x6401417d  /* mtmsrd r10,1                       */
>>> +    .long 0x05009f42  /* bcl 20,31,$+4                      */
>>> +    .long 0xa602487d  /* mflr r10                           */
>>> +    .long 0x14004a39  /* addi r10,r10,20                    */
>>> +    .long 0xa6035a7d  /* mtsrr0 r10                         */
>>> +    .long 0xa6037b7d  /* mtsrr1 r11                         */
>>> +    .long 0x2400004c  /* rfid                               */
>>> +
>>> +    /* Now that the endianness is confirmed, continue */
>>> +1:  b 1b
>>
>> .size start, . - start
>> .type start, %function
> 
> Shouldn't we introduce ENDPROC()/END() to avoid open-coding these two 
> lines everywhere?

A proposal for this is pending, see "x86: annotate entry points with
type and size". That proposal extends to asking whether to use this
scheme uniformly, not just for x86.

Jan
Jan Beulich June 19, 2023, 8:02 a.m. UTC | #6
On 16.06.2023 22:27, Andrew Cooper wrote:
> On 16/06/2023 6:48 pm, Shawn Anastasio wrote:
>> diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
>> new file mode 100644
>> index 0000000000..0b289c713a
>> --- /dev/null
>> +++ b/xen/arch/ppc/ppc64/head.S
>> @@ -0,0 +1,27 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.section .text.header, "ax", %progbits
>> +
>> +ENTRY(start)
>> +    /*
>> +     * Depending on how we were booted, the CPU could be running in either
>> +     * Little Endian or Big Endian mode. The following trampoline from Linux
>> +     * cleverly uses an instruction that encodes to a NOP if the CPU's
>> +     * endianness matches the assumption of the assembler (LE, in our case)
>> +     * or a branch to code that performs the endian switch in the other case.
>> +     */
> 
> Sorry, I also meant to ask.  How prevalent is Big Endian in practice in
> the Power world?
> 
> It's another area (like 4k pages) where I expect there to be plenty of
> fun to be had with the codebase.

Why? I'd expect "fun" if ppc was meaning to run in big-endian mode.

Jan
Shawn Anastasio June 19, 2023, 3:49 p.m. UTC | #7
On 6/16/23 3:24 PM, Andrew Cooper wrote:
> On 16/06/2023 6:48 pm, Shawn Anastasio wrote:
>> Add the build system changes required to build for ppc64le (POWER8+).
>> As of now the resulting image simply boots to an infinite loop.
>>
>> $ make XEN_TARGET_ARCH=ppc64 -C xen openpower_defconfig
>> $ make XEN_TARGET_ARCH=ppc64 SUBSYSTEMS=xen -C xen build
> 
> I think the first of these isn't needed, given the config ARCH_DEFCONFIG
> default.  I'd suggest dropping it.

It seems like the build system expects an `$(ARCH)_defconfig` present if
you don't manually specify a defconfig target. I see riscv64 has a
tiny64_defconfig and a riscv64_defconfig that are idential, probably for
this same reason. Would you like me to take the same approach of
duplicating openpower_defconfig to ppc64_defconfig?

> On the second, what is the SUBSYSTEMS=xen?  It's not needed given the
> stripped down build system, but I don't see why we'd ever be compiling
> Xen with some kind of subsystem configuration for something else.

This was a remnant of the old head.o-only TARGET build command that I
got from the initial riscv commit. You're correct that it's unnecessary
now and I'll drop it from the commit message.

>> diff --git a/config/ppc64.mk b/config/ppc64.mk
>> new file mode 100644
>> index 0000000000..597f0668c3
>> --- /dev/null
>> +++ b/config/ppc64.mk
>> @@ -0,0 +1,5 @@
>> +CONFIG_PPC := y
>> +CONFIG_PPC64 := y
>> +CONFIG_PPC_$(XEN_OS) := y
> 
> I know you're copying the existing architectures, but I'm pretty certain
> these $(XEN_OS) expressions are pretty bogus.  The userspace stuff in
> tools/ may need to know the host OS it's being built for, but Xen really
> doesn't.
> 
> I'm pretty sure it will compile with this dropped, so please do.  I'll
> see about patching it out of the other architectures.

Sure, I'll drop this in v5.

>> diff --git a/xen/arch/ppc/Kconfig b/xen/arch/ppc/Kconfig
>> new file mode 100644
>> index 0000000000..a0a70adef4
>> --- /dev/null
>> +++ b/xen/arch/ppc/Kconfig
>> @@ -0,0 +1,42 @@
>> +config PPC
>> +	def_bool y
>> +
>> +config PPC64
>> +	def_bool y
>> +	select 64BIT
>> +
>> +config ARCH_DEFCONFIG
>> +	string
>> +	default "arch/ppc/configs/openpower_defconfig"
>> +
>> +menu "Architecture Features"
>> +
>> +source "arch/Kconfig"
>> +
>> +endmenu
>> +
>> +menu "ISA Selection"
>> +
>> +choice
>> +	prompt "Base ISA"
>> +	default POWER_ISA_2_07B if PPC64
>> +	help
>> +	  This selects the base ISA version that Xen will target.
>> +
>> +config POWER_ISA_2_07B
>> +	bool "Power ISA 2.07B"
>> +	help
>> +	  Target version 2.07B of the Power ISA (POWER8)
>> +
>> +config POWER_ISA_3_00
>> +	bool "Power ISA 3.00"
>> +	help
>> +	  Target version 3.00 of the Power ISA (POWER9)
> 
> For both of these, it will be helpful for anyone who isn't as
> PPC-knowledgeable if the POWER8/9 was in the title too, seeing as
> they're the most common name.
> 
> But as I'm a noob here too, how different are Power8 and 9?  Given they
> share a head.S, they're presumably not too disjoint in terms of ISA.
>
> While being able to target a specific CPU is something we're trying to
> retrofit to Xen, by default we do expect it to run on as broad a set of
> systems as possible.

They're not that different, and a kernel built for POWER8 should just
work on POWER9. The intent was to specify a baseline feature set that
guards whether, i.e., POWER9/ISA3.0-only features should be built.

> If that's not feasible, then fine, but if it is, it ought to be the
> default.  Which might be as simple as saying "or later" somewhere in
> this text, or might be a giant can of worms that I shouldn't open...

Originally the help text for the two ISA config options ended in a "+"
but that was deemed ambiguous. Would adding "or later" to the help text
for the two options clarify it sufficiently?

>> diff --git a/xen/arch/ppc/include/asm/page-bits.h b/xen/arch/ppc/include/asm/page-bits.h
>> new file mode 100644
>> index 0000000000..4c01bf9716
>> --- /dev/null
>> +++ b/xen/arch/ppc/include/asm/page-bits.h
>> @@ -0,0 +1,7 @@
>> +#ifndef __PPC_PAGE_BITS_H__
>> +#define __PPC_PAGE_BITS_H__
>> +
>> +#define PAGE_SHIFT              16 /* 64 KiB Pages */
>> +#define PADDR_BITS              48
> 
> Is 64k the minimum granularity?  Or is 4k an option?

Both 4K and 64K are supported by the hardware.

> I ask because Xen has some very short sighted ABIs which we're still
> working on removing.  There are still quite a few expectations of
> PAGE_SHIFT being 12.
> 
> To be clear, we're looking to fix all of these ABIs, but I suspect it
> will be an easier lift to begin with at 4k.  (Or perhaps the right thing
> is to double down and just get them fixed.)

Interesting. Given this I'm inclined to go with 4k just to reduce pain
points during initial bring up, though supporting 64k would definitely
be desirable going forward.

>> +
>> +#endif /* __PPC_PAGE_BITS_H__ */
>> diff --git a/xen/arch/ppc/ppc64/Makefile b/xen/arch/ppc/ppc64/Makefile
>> new file mode 100644
>> index 0000000000..3340058c08
>> --- /dev/null
>> +++ b/xen/arch/ppc/ppc64/Makefile
>> @@ -0,0 +1 @@
>> +obj-y += head.o
>> diff --git a/xen/arch/ppc/ppc64/asm-offsets.c b/xen/arch/ppc/ppc64/asm-offsets.c
>> new file mode 100644
>> index 0000000000..e69de29bb2
>> diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
>> new file mode 100644
>> index 0000000000..0b289c713a
>> --- /dev/null
>> +++ b/xen/arch/ppc/ppc64/head.S
>> @@ -0,0 +1,27 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.section .text.header, "ax", %progbits
>> +
>> +ENTRY(start)
>> +    /*
>> +     * Depending on how we were booted, the CPU could be running in either
>> +     * Little Endian or Big Endian mode. The following trampoline from Linux
>> +     * cleverly uses an instruction that encodes to a NOP if the CPU's
>> +     * endianness matches the assumption of the assembler (LE, in our case)
>> +     * or a branch to code that performs the endian switch in the other case.
>> +     */
>> +    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
>> +    b . + 44          /* Skip trampoline if endian is good  */
>> +    .long 0xa600607d  /* mfmsr r11                          */
>> +    .long 0x01006b69  /* xori r11,r11,1                     */
>> +    .long 0x00004039  /* li r10,0                           */
>> +    .long 0x6401417d  /* mtmsrd r10,1                       */
>> +    .long 0x05009f42  /* bcl 20,31,$+4                      */
>> +    .long 0xa602487d  /* mflr r10                           */
>> +    .long 0x14004a39  /* addi r10,r10,20                    */
>> +    .long 0xa6035a7d  /* mtsrr0 r10                         */
>> +    .long 0xa6037b7d  /* mtsrr1 r11                         */
>> +    .long 0x2400004c  /* rfid                               */
>> +
>> +    /* Now that the endianness is confirmed, continue */
>> +1:  b 1b
> 
> .size start, . - start
> .type start, %function
> 
> Lets get the ELF metadata right from the start.

Good point. Following the example in the Power ELFv2 ABI
specification [1] for function type annotation, I'll place

.type start, @function

in the ENTRY macro. It's not clear what the difference between %function
and @function are in this context (my toolchain seems to accept both and
produce the same ELF metadata), but the latter is more idiomatic in
Power assembly. The same goes for its placement before the entrypoint
vs. after the last instruction.

As for the size annotation, I'll follow Julien's suggestion and
introduce an END macro.

>> diff --git a/xen/arch/ppc/xen.lds.S b/xen/arch/ppc/xen.lds.S
>> new file mode 100644
>> index 0000000000..a72e519c6a
>> --- /dev/null
>> +++ b/xen/arch/ppc/xen.lds.S
>> @@ -0,0 +1,172 @@
>> <snip>
>> +    DISCARD_SECTIONS
>> +
>> +    STABS_DEBUG_SECTIONS
>> +
>> +    ELF_DETAILS_SECTIONS
>> +}
> 
> In the other architectures, we now assert that sections such as .got are
> empty, because we've had enough bugs in the past.
> 
> I'd recommend doing the same from the outset for all the dynamic
> relocation sections, unless you're expecting to have to support them?

No plans on supporting dynamic relocation (for now), so I can go ahead
and add these assertions.

> ~Andrew

Thanks,
Shawn

[1] Page 77 https://wiki.raptorcs.com/w/images/7/70/Leabi-20170510.pdf
Jan Beulich June 19, 2023, 4:07 p.m. UTC | #8
On 19.06.2023 17:49, Shawn Anastasio wrote:
> On 6/16/23 3:24 PM, Andrew Cooper wrote:
>> On 16/06/2023 6:48 pm, Shawn Anastasio wrote:
>>> Add the build system changes required to build for ppc64le (POWER8+).
>>> As of now the resulting image simply boots to an infinite loop.
>>>
>>> $ make XEN_TARGET_ARCH=ppc64 -C xen openpower_defconfig
>>> $ make XEN_TARGET_ARCH=ppc64 SUBSYSTEMS=xen -C xen build
>>
>> I think the first of these isn't needed, given the config ARCH_DEFCONFIG
>> default.  I'd suggest dropping it.
> 
> It seems like the build system expects an `$(ARCH)_defconfig` present if
> you don't manually specify a defconfig target. I see riscv64 has a
> tiny64_defconfig and a riscv64_defconfig that are idential, probably for
> this same reason. Would you like me to take the same approach of
> duplicating openpower_defconfig to ppc64_defconfig?

It's a symlink for RISC-V iirc, so wants to be that same way for PPC
then (for the time being, again like there).

>>> --- /dev/null
>>> +++ b/xen/arch/ppc/ppc64/head.S
>>> @@ -0,0 +1,27 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>>> +
>>> +.section .text.header, "ax", %progbits
>>> +
>>> +ENTRY(start)
>>> +    /*
>>> +     * Depending on how we were booted, the CPU could be running in either
>>> +     * Little Endian or Big Endian mode. The following trampoline from Linux
>>> +     * cleverly uses an instruction that encodes to a NOP if the CPU's
>>> +     * endianness matches the assumption of the assembler (LE, in our case)
>>> +     * or a branch to code that performs the endian switch in the other case.
>>> +     */
>>> +    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
>>> +    b . + 44          /* Skip trampoline if endian is good  */
>>> +    .long 0xa600607d  /* mfmsr r11                          */
>>> +    .long 0x01006b69  /* xori r11,r11,1                     */
>>> +    .long 0x00004039  /* li r10,0                           */
>>> +    .long 0x6401417d  /* mtmsrd r10,1                       */
>>> +    .long 0x05009f42  /* bcl 20,31,$+4                      */
>>> +    .long 0xa602487d  /* mflr r10                           */
>>> +    .long 0x14004a39  /* addi r10,r10,20                    */
>>> +    .long 0xa6035a7d  /* mtsrr0 r10                         */
>>> +    .long 0xa6037b7d  /* mtsrr1 r11                         */
>>> +    .long 0x2400004c  /* rfid                               */
>>> +
>>> +    /* Now that the endianness is confirmed, continue */
>>> +1:  b 1b
>>
>> .size start, . - start
>> .type start, %function
>>
>> Lets get the ELF metadata right from the start.
> 
> Good point. Following the example in the Power ELFv2 ABI
> specification [1] for function type annotation, I'll place
> 
> .type start, @function
> 
> in the ENTRY macro. It's not clear what the difference between %function
> and @function are in this context (my toolchain seems to accept both and
> produce the same ELF metadata), but the latter is more idiomatic in
> Power assembly. The same goes for its placement before the entrypoint
> vs. after the last instruction.

% and @ are entirely the same here, except for targets like arm-*,
where @ starts a comment.

>>> --- /dev/null
>>> +++ b/xen/arch/ppc/xen.lds.S
>>> @@ -0,0 +1,172 @@
>>> <snip>
>>> +    DISCARD_SECTIONS
>>> +
>>> +    STABS_DEBUG_SECTIONS
>>> +
>>> +    ELF_DETAILS_SECTIONS
>>> +}
>>
>> In the other architectures, we now assert that sections such as .got are
>> empty, because we've had enough bugs in the past.
>>
>> I'd recommend doing the same from the outset for all the dynamic
>> relocation sections, unless you're expecting to have to support them?
> 
> No plans on supporting dynamic relocation (for now), so I can go ahead
> and add these assertions.

How would you ever use dynamic relocations? You have no loader to
process them for you. (Yes, there can of course be relocation free
head.S code which deals with relocations, but doing such in assembly
is likely not the best idea. Yet as soon as you enter C code you're
at risk of hitting a place requiring such a relocation, perhaps
simply because of a "careless" function call on some infrequently
used code path.)

Jan
Andrew Cooper June 19, 2023, 4:25 p.m. UTC | #9
On 19/06/2023 4:49 pm, Shawn Anastasio wrote:
> On 6/16/23 3:24 PM, Andrew Cooper wrote:
>> On 16/06/2023 6:48 pm, Shawn Anastasio wrote:
>>> Add the build system changes required to build for ppc64le (POWER8+).
>>> As of now the resulting image simply boots to an infinite loop.
>>>
>>> $ make XEN_TARGET_ARCH=ppc64 -C xen openpower_defconfig
>>> $ make XEN_TARGET_ARCH=ppc64 SUBSYSTEMS=xen -C xen build
>> I think the first of these isn't needed, given the config ARCH_DEFCONFIG
>> default.  I'd suggest dropping it.
> It seems like the build system expects an `$(ARCH)_defconfig` present if
> you don't manually specify a defconfig target. I see riscv64 has a
> tiny64_defconfig and a riscv64_defconfig that are idential, probably for
> this same reason. Would you like me to take the same approach of
> duplicating openpower_defconfig to ppc64_defconfig?

Or just rename openpower_defconfig to ppc64_defconfig ?

Is there any reason to keep openpower differently?

>> If that's not feasible, then fine, but if it is, it ought to be the
>> default.  Which might be as simple as saying "or later" somewhere in
>> this text, or might be a giant can of worms that I shouldn't open...
> Originally the help text for the two ISA config options ended in a "+"
> but that was deemed ambiguous. Would adding "or later" to the help text
> for the two options clarify it sufficiently?

Yeah, that would definitely help.

>>> diff --git a/xen/arch/ppc/include/asm/page-bits.h b/xen/arch/ppc/include/asm/page-bits.h
>>> new file mode 100644
>>> index 0000000000..4c01bf9716
>>> --- /dev/null
>>> +++ b/xen/arch/ppc/include/asm/page-bits.h
>>> @@ -0,0 +1,7 @@
>>> +#ifndef __PPC_PAGE_BITS_H__
>>> +#define __PPC_PAGE_BITS_H__
>>> +
>>> +#define PAGE_SHIFT              16 /* 64 KiB Pages */
>>> +#define PADDR_BITS              48
>> Is 64k the minimum granularity?  Or is 4k an option?
> Both 4K and 64K are supported by the hardware.
>
>> I ask because Xen has some very short sighted ABIs which we're still
>> working on removing.  There are still quite a few expectations of
>> PAGE_SHIFT being 12.
>>
>> To be clear, we're looking to fix all of these ABIs, but I suspect it
>> will be an easier lift to begin with at 4k.  (Or perhaps the right thing
>> is to double down and just get them fixed.)
> Interesting. Given this I'm inclined to go with 4k just to reduce pain
> points during initial bring up, though supporting 64k would definitely
> be desirable going forward.

Maybe keep as 64k for now, with 4k as a backup if major problems are
encountered?

I honestly don't know how much of Xen's common code is non-4k-clean, and
this is the best opportunity to find out.

>>> +
>>> +#endif /* __PPC_PAGE_BITS_H__ */
>>> diff --git a/xen/arch/ppc/ppc64/Makefile b/xen/arch/ppc/ppc64/Makefile
>>> new file mode 100644
>>> index 0000000000..3340058c08
>>> --- /dev/null
>>> +++ b/xen/arch/ppc/ppc64/Makefile
>>> @@ -0,0 +1 @@
>>> +obj-y += head.o
>>> diff --git a/xen/arch/ppc/ppc64/asm-offsets.c b/xen/arch/ppc/ppc64/asm-offsets.c
>>> new file mode 100644
>>> index 0000000000..e69de29bb2
>>> diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
>>> new file mode 100644
>>> index 0000000000..0b289c713a
>>> --- /dev/null
>>> +++ b/xen/arch/ppc/ppc64/head.S
>>> @@ -0,0 +1,27 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>>> +
>>> +.section .text.header, "ax", %progbits
>>> +
>>> +ENTRY(start)
>>> +    /*
>>> +     * Depending on how we were booted, the CPU could be running in either
>>> +     * Little Endian or Big Endian mode. The following trampoline from Linux
>>> +     * cleverly uses an instruction that encodes to a NOP if the CPU's
>>> +     * endianness matches the assumption of the assembler (LE, in our case)
>>> +     * or a branch to code that performs the endian switch in the other case.
>>> +     */
>>> +    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
>>> +    b . + 44          /* Skip trampoline if endian is good  */
>>> +    .long 0xa600607d  /* mfmsr r11                          */
>>> +    .long 0x01006b69  /* xori r11,r11,1                     */
>>> +    .long 0x00004039  /* li r10,0                           */
>>> +    .long 0x6401417d  /* mtmsrd r10,1                       */
>>> +    .long 0x05009f42  /* bcl 20,31,$+4                      */
>>> +    .long 0xa602487d  /* mflr r10                           */
>>> +    .long 0x14004a39  /* addi r10,r10,20                    */
>>> +    .long 0xa6035a7d  /* mtsrr0 r10                         */
>>> +    .long 0xa6037b7d  /* mtsrr1 r11                         */
>>> +    .long 0x2400004c  /* rfid                               */
>>> +
>>> +    /* Now that the endianness is confirmed, continue */
>>> +1:  b 1b
>> .size start, . - start
>> .type start, %function
>>
>> Lets get the ELF metadata right from the start.
> Good point. Following the example in the Power ELFv2 ABI
> specification [1] for function type annotation, I'll place
>
> .type start, @function
>
> in the ENTRY macro. It's not clear what the difference between %function
> and @function are in this context (my toolchain seems to accept both and
> produce the same ELF metadata), but the latter is more idiomatic in
> Power assembly. The same goes for its placement before the entrypoint
> vs. after the last instruction.
>
> As for the size annotation, I'll follow Julien's suggestion and
> introduce an END macro.

There are reasons why this doesn't exist/work yet, and that's a swamp
you swamp you absolutely don't want to wade into at this point.

Just leave the raw annotations for now, and they'll be cleaned up when
the bugs with the various proposed options have been fixed.

~Andrew
Shawn Anastasio June 19, 2023, 4:29 p.m. UTC | #10
On 6/19/23 11:07 AM, Jan Beulich wrote:
>> It seems like the build system expects an `$(ARCH)_defconfig` present if
>> you don't manually specify a defconfig target. I see riscv64 has a
>> tiny64_defconfig and a riscv64_defconfig that are idential, probably for
>> this same reason. Would you like me to take the same approach of
>> duplicating openpower_defconfig to ppc64_defconfig?
> 
> It's a symlink for RISC-V iirc, so wants to be that same way for PPC
> then (for the time being, again like there).

Ah, you're right, riscv uses a symlink. I'll use that approach here as
well then.

>> Good point. Following the example in the Power ELFv2 ABI
>> specification [1] for function type annotation, I'll place
>>
>> .type start, @function
>>
>> in the ENTRY macro. It's not clear what the difference between %function
>> and @function are in this context (my toolchain seems to accept both and
>> produce the same ELF metadata), but the latter is more idiomatic in
>> Power assembly. The same goes for its placement before the entrypoint
>> vs. after the last instruction.
> 
> % and @ are entirely the same here, except for targets like arm-*,
> where @ starts a comment.

Okay, thanks for the confirmation. Is there a preference for which one
is used? I'd lean towards @ just for the sake of remaining idiomatic,
but since they're equivalent it doesn't really matter from my
perspective.

>> No plans on supporting dynamic relocation (for now), so I can go ahead
>> and add these assertions.
> 
> How would you ever use dynamic relocations? You have no loader to
> process them for you. (Yes, there can of course be relocation free
> head.S code which deals with relocations, but doing such in assembly
> is likely not the best idea. Yet as soon as you enter C code you're
> at risk of hitting a place requiring such a relocation, perhaps
> simply because of a "careless" function call on some infrequently
> used code path.)

Right, those concerns are definitely something we'd have to deal with if
we ever wanted to add support for relocations (and a large part of the
reason why I said I don't have plans to support it right now).

As an aside, I'll note that Linux uses the approach of doing the
relocation fixups entirely in assembly before the C entrypoint, but
that's obviously not a super desirable approach from a correctness or
maintainability standpoint.

> Jan

Thanks,
Shawn
Julien Grall June 19, 2023, 4:44 p.m. UTC | #11
Hi,

On 19/06/2023 17:25, Andrew Cooper wrote:
> On 19/06/2023 4:49 pm, Shawn Anastasio wrote:
>> On 6/16/23 3:24 PM, Andrew Cooper wrote:
>>> On 16/06/2023 6:48 pm, Shawn Anastasio wrote:
>>>> Add the build system changes required to build for ppc64le (POWER8+).
>>>> As of now the resulting image simply boots to an infinite loop.
>>>>
>>>> $ make XEN_TARGET_ARCH=ppc64 -C xen openpower_defconfig
>>>> $ make XEN_TARGET_ARCH=ppc64 SUBSYSTEMS=xen -C xen build
>>> I think the first of these isn't needed, given the config ARCH_DEFCONFIG
>>> default.  I'd suggest dropping it.
>> It seems like the build system expects an `$(ARCH)_defconfig` present if
>> you don't manually specify a defconfig target. I see riscv64 has a
>> tiny64_defconfig and a riscv64_defconfig that are idential, probably for
>> this same reason. Would you like me to take the same approach of
>> duplicating openpower_defconfig to ppc64_defconfig?
> 
> Or just rename openpower_defconfig to ppc64_defconfig ?
> 
> Is there any reason to keep openpower differently?
> 
>>> If that's not feasible, then fine, but if it is, it ought to be the
>>> default.  Which might be as simple as saying "or later" somewhere in
>>> this text, or might be a giant can of worms that I shouldn't open...
>> Originally the help text for the two ISA config options ended in a "+"
>> but that was deemed ambiguous. Would adding "or later" to the help text
>> for the two options clarify it sufficiently?
> 
> Yeah, that would definitely help.
> 
>>>> diff --git a/xen/arch/ppc/include/asm/page-bits.h b/xen/arch/ppc/include/asm/page-bits.h
>>>> new file mode 100644
>>>> index 0000000000..4c01bf9716
>>>> --- /dev/null
>>>> +++ b/xen/arch/ppc/include/asm/page-bits.h
>>>> @@ -0,0 +1,7 @@
>>>> +#ifndef __PPC_PAGE_BITS_H__
>>>> +#define __PPC_PAGE_BITS_H__
>>>> +
>>>> +#define PAGE_SHIFT              16 /* 64 KiB Pages */
>>>> +#define PADDR_BITS              48
>>> Is 64k the minimum granularity?  Or is 4k an option?
>> Both 4K and 64K are supported by the hardware.
>>
>>> I ask because Xen has some very short sighted ABIs which we're still
>>> working on removing.  There are still quite a few expectations of
>>> PAGE_SHIFT being 12.
>>>
>>> To be clear, we're looking to fix all of these ABIs, but I suspect it
>>> will be an easier lift to begin with at 4k.  (Or perhaps the right thing
>>> is to double down and just get them fixed.)
>> Interesting. Given this I'm inclined to go with 4k just to reduce pain
>> points during initial bring up, though supporting 64k would definitely
>> be desirable going forward.
> 
> Maybe keep as 64k for now, with 4k as a backup if major problems are
> encountered?
> 
> I honestly don't know how much of Xen's common code is non-4k-clean, and
> this is the best opportunity to find out.

The hypervisor itself is probably alright. For the tools and the kernel, 
we did some work a few years ago so the code don't assume the kernel and 
the hypervisor are using the same page size.

The tools and kernel have hardcoded expectation for the tools. Have a 
look at XC_PAGE_SIZE (tools) and XEN_PAGE_SHIFT (linux).

There was an attempt from Costin Lupu a couple of years ago to clean-up 
the definition (see [1]) but this was never merged. I can't remember why...

Now regarding the default page-size for PPC. At the moment, the 
page-size of the ABI is tie to the hypervisor.

For the ABI, it is best to use the bigger size (i.e. 64KB) because with 
just some rework in the hypervisor, you could run the same guest image 
on both a 4KB and 64KB.

Therefore, I think the 64KB size for the hypervisor is probably the 
better choice for the initial port. This will avoid some external ABI 
issue in the future if you want to support more page-size (we have the 
problem on Arm as we started with 4KB). You would also not rely on a 
newer ABI.

Cheers,

[1] 
https://lore.kernel.org/xen-devel/cover.1628519855.git.costin.lupu@cs.pub.ro/
Jan Beulich June 20, 2023, 6:43 a.m. UTC | #12
On 19.06.2023 18:29, Shawn Anastasio wrote:
> On 6/19/23 11:07 AM, Jan Beulich wrote:
>>> Good point. Following the example in the Power ELFv2 ABI
>>> specification [1] for function type annotation, I'll place
>>>
>>> .type start, @function
>>>
>>> in the ENTRY macro. It's not clear what the difference between %function
>>> and @function are in this context (my toolchain seems to accept both and
>>> produce the same ELF metadata), but the latter is more idiomatic in
>>> Power assembly. The same goes for its placement before the entrypoint
>>> vs. after the last instruction.
>>
>> % and @ are entirely the same here, except for targets like arm-*,
>> where @ starts a comment.
> 
> Okay, thanks for the confirmation. Is there a preference for which one
> is used? I'd lean towards @ just for the sake of remaining idiomatic,
> but since they're equivalent it doesn't really matter from my
> perspective.

In arch specific code your fine to use what you think is a better fit.
There's a proposal though to make these ELF annotations arch-independent,
and if we went that route, we'd have to use % (or allow arch overrides)
to fit all architectures.

Jan
Jan Beulich June 20, 2023, 6:49 a.m. UTC | #13
On 19.06.2023 18:44, Julien Grall wrote:
> Hi,
> 
> On 19/06/2023 17:25, Andrew Cooper wrote:
>> On 19/06/2023 4:49 pm, Shawn Anastasio wrote:
>>> On 6/16/23 3:24 PM, Andrew Cooper wrote:
>>>> On 16/06/2023 6:48 pm, Shawn Anastasio wrote:
>>>>> Add the build system changes required to build for ppc64le (POWER8+).
>>>>> As of now the resulting image simply boots to an infinite loop.
>>>>>
>>>>> $ make XEN_TARGET_ARCH=ppc64 -C xen openpower_defconfig
>>>>> $ make XEN_TARGET_ARCH=ppc64 SUBSYSTEMS=xen -C xen build
>>>> I think the first of these isn't needed, given the config ARCH_DEFCONFIG
>>>> default.  I'd suggest dropping it.
>>> It seems like the build system expects an `$(ARCH)_defconfig` present if
>>> you don't manually specify a defconfig target. I see riscv64 has a
>>> tiny64_defconfig and a riscv64_defconfig that are idential, probably for
>>> this same reason. Would you like me to take the same approach of
>>> duplicating openpower_defconfig to ppc64_defconfig?
>>
>> Or just rename openpower_defconfig to ppc64_defconfig ?
>>
>> Is there any reason to keep openpower differently?
>>
>>>> If that's not feasible, then fine, but if it is, it ought to be the
>>>> default.  Which might be as simple as saying "or later" somewhere in
>>>> this text, or might be a giant can of worms that I shouldn't open...
>>> Originally the help text for the two ISA config options ended in a "+"
>>> but that was deemed ambiguous. Would adding "or later" to the help text
>>> for the two options clarify it sufficiently?
>>
>> Yeah, that would definitely help.
>>
>>>>> diff --git a/xen/arch/ppc/include/asm/page-bits.h b/xen/arch/ppc/include/asm/page-bits.h
>>>>> new file mode 100644
>>>>> index 0000000000..4c01bf9716
>>>>> --- /dev/null
>>>>> +++ b/xen/arch/ppc/include/asm/page-bits.h
>>>>> @@ -0,0 +1,7 @@
>>>>> +#ifndef __PPC_PAGE_BITS_H__
>>>>> +#define __PPC_PAGE_BITS_H__
>>>>> +
>>>>> +#define PAGE_SHIFT              16 /* 64 KiB Pages */
>>>>> +#define PADDR_BITS              48
>>>> Is 64k the minimum granularity?  Or is 4k an option?
>>> Both 4K and 64K are supported by the hardware.
>>>
>>>> I ask because Xen has some very short sighted ABIs which we're still
>>>> working on removing.  There are still quite a few expectations of
>>>> PAGE_SHIFT being 12.
>>>>
>>>> To be clear, we're looking to fix all of these ABIs, but I suspect it
>>>> will be an easier lift to begin with at 4k.  (Or perhaps the right thing
>>>> is to double down and just get them fixed.)
>>> Interesting. Given this I'm inclined to go with 4k just to reduce pain
>>> points during initial bring up, though supporting 64k would definitely
>>> be desirable going forward.
>>
>> Maybe keep as 64k for now, with 4k as a backup if major problems are
>> encountered?
>>
>> I honestly don't know how much of Xen's common code is non-4k-clean, and
>> this is the best opportunity to find out.
> 
> The hypervisor itself is probably alright. For the tools and the kernel, 
> we did some work a few years ago so the code don't assume the kernel and 
> the hypervisor are using the same page size.
> 
> The tools and kernel have hardcoded expectation for the tools. Have a 
> look at XC_PAGE_SIZE (tools) and XEN_PAGE_SHIFT (linux).
> 
> There was an attempt from Costin Lupu a couple of years ago to clean-up 
> the definition (see [1]) but this was never merged. I can't remember why...
> 
> Now regarding the default page-size for PPC. At the moment, the 
> page-size of the ABI is tie to the hypervisor.
> 
> For the ABI, it is best to use the bigger size (i.e. 64KB) because with 
> just some rework in the hypervisor, you could run the same guest image 
> on both a 4KB and 64KB.
> 
> Therefore, I think the 64KB size for the hypervisor is probably the 
> better choice for the initial port. This will avoid some external ABI 
> issue in the future if you want to support more page-size (we have the 
> problem on Arm as we started with 4KB). You would also not rely on a 
> newer ABI.

Just one remark here for consideration: Grant v2's sub-page grants use
a 16-bit length and page offset. There would be a corner case which
isn't representable (offset 0 length 64k).

Jan
Jan Beulich June 20, 2023, 10:15 a.m. UTC | #14
On 16.06.2023 19:48, Shawn Anastasio wrote:
> --- /dev/null
> +++ b/xen/arch/ppc/Kconfig
> @@ -0,0 +1,42 @@
> +config PPC
> +	def_bool y
> +
> +config PPC64
> +	def_bool y
> +	select 64BIT
> +
> +config ARCH_DEFCONFIG
> +	string
> +	default "arch/ppc/configs/openpower_defconfig"
> +
> +menu "Architecture Features"
> +
> +source "arch/Kconfig"
> +
> +endmenu
> +
> +menu "ISA Selection"
> +
> +choice
> +	prompt "Base ISA"
> +	default POWER_ISA_2_07B if PPC64

I think the "if" here is at best confusing. If / when ppc32 support
is added, a potentially different default here would make necessary
adjustments, yet imo would not be very likely to introduce this very
"if".

> --- /dev/null
> +++ b/xen/arch/ppc/arch.mk
> @@ -0,0 +1,12 @@
> +########################################
> +# Power-specific definitions
> +
> +ppc-march-$(CONFIG_POWER_ISA_2_07B) := power8
> +ppc-march-$(CONFIG_POWER_ISA_3_00) := power9
> +
> +CFLAGS += -m64 -mlittle-endian -mcpu=$(ppc-march-y)
> +CFLAGS += -mstrict-align -mcmodel=large -mabi=elfv2 -mno-altivec -mno-vsx

Just for my own education: Besides the expected effect, -mstrict-align
also appears to imply -mbit-align, which I'm not sure is intended here.
Could you clarify the intentions for me?

As to -mabi=elfv2, it looks as if this limits us to gcc12 and newer.
That's fine, but I think it wants pointing out in ./README (which has
a section for this kind of information).

> --- /dev/null
> +++ b/xen/arch/ppc/include/asm/config.h
> @@ -0,0 +1,63 @@
> +#ifndef __PPC_CONFIG_H__
> +#define __PPC_CONFIG_H__
> +
> +#include <xen/const.h>
> +#include <xen/page-size.h>
> +
> +#if defined(CONFIG_PPC64)
> +#define LONG_BYTEORDER 3
> +#define ELFSIZE        64
> +#define MAX_VIRT_CPUS  1024u
> +#else
> +#error "Unsupported PowerPC variant"
> +#endif
> +
> +#define BYTES_PER_LONG (1 << LONG_BYTEORDER)
> +#define BITS_PER_LONG  (BYTES_PER_LONG << 3)
> +#define POINTER_ALIGN  BYTES_PER_LONG
> +
> +#define BITS_PER_LLONG 64
> +
> +/* xen_ulong_t is always 64 bits */
> +#define BITS_PER_XEN_ULONG 64
> +
> +#define CONFIG_PPC_L1_CACHE_SHIFT  7
> +#define CONFIG_PAGEALLOC_MAX_ORDER 18
> +#define CONFIG_DOMU_MAX_ORDER      9
> +#define CONFIG_HWDOM_MAX_ORDER     10
> +
> +#define OPT_CONSOLE_STR "dtuart"
> +#define INVALID_VCPU_ID MAX_VIRT_CPUS
> +
> +/* Linkage for PPC */
> +#ifdef __ASSEMBLY__
> +#define ALIGN .align 2

I think I did ask for the same on RISC-V (yet sadly it's still .align
there): .align is notoriously ambiguous. May I ask that you use .p2align
(which I think is what is meant here, else .balign)?

Jan
Shawn Anastasio June 20, 2023, 3:28 p.m. UTC | #15
On 6/20/23 5:15 AM, Jan Beulich wrote:
> On 16.06.2023 19:48, Shawn Anastasio wrote:
>> --- /dev/null
>> +++ b/xen/arch/ppc/Kconfig
>> @@ -0,0 +1,42 @@
>> +config PPC
>> +	def_bool y
>> +
>> +config PPC64
>> +	def_bool y
>> +	select 64BIT
>> +
>> +config ARCH_DEFCONFIG
>> +	string
>> +	default "arch/ppc/configs/openpower_defconfig"
>> +
>> +menu "Architecture Features"
>> +
>> +source "arch/Kconfig"
>> +
>> +endmenu
>> +
>> +menu "ISA Selection"
>> +
>> +choice
>> +	prompt "Base ISA"
>> +	default POWER_ISA_2_07B if PPC64
> 
> I think the "if" here is at best confusing. If / when ppc32 support
> is added, a potentially different default here would make necessary
> adjustments, yet imo would not be very likely to introduce this very
> "if".

Fair enough, I'll drop the if.

>> --- /dev/null
>> +++ b/xen/arch/ppc/arch.mk
>> @@ -0,0 +1,12 @@
>> +########################################
>> +# Power-specific definitions
>> +
>> +ppc-march-$(CONFIG_POWER_ISA_2_07B) := power8
>> +ppc-march-$(CONFIG_POWER_ISA_3_00) := power9
>> +
>> +CFLAGS += -m64 -mlittle-endian -mcpu=$(ppc-march-y)
>> +CFLAGS += -mstrict-align -mcmodel=large -mabi=elfv2 -mno-altivec -mno-vsx
> 
> Just for my own education: Besides the expected effect, -mstrict-align
> also appears to imply -mbit-align, which I'm not sure is intended here.
> Could you clarify the intentions for me?

If I'm reading the GCC documentation right, -mbit-align is the default
behavior regardless of -mstrict-align. The intention was simply to avoid
the generation of unaligned memory accesses in the kernel, since they
are only handled in hardware up to a certain width.

> As to -mabi=elfv2, it looks as if this limits us to gcc12 and newer.
> That's fine, but I think it wants pointing out in ./README (which has
> a section for this kind of information).

This is not the case. The ELFv2 ABI was released in 2014 and has been
the de facto standard on ppc64le since its inception. I know that ELFv2
support in GCC dates back to at least version 6 (the earliest I can
remember using), but I believe it goes back even further than that.

>> --- /dev/null
>> +++ b/xen/arch/ppc/include/asm/config.h
>> @@ -0,0 +1,63 @@
>> +#ifndef __PPC_CONFIG_H__
>> +#define __PPC_CONFIG_H__
>> +
>> +#include <xen/const.h>
>> +#include <xen/page-size.h>
>> +
>> +#if defined(CONFIG_PPC64)
>> +#define LONG_BYTEORDER 3
>> +#define ELFSIZE        64
>> +#define MAX_VIRT_CPUS  1024u
>> +#else
>> +#error "Unsupported PowerPC variant"
>> +#endif
>> +
>> +#define BYTES_PER_LONG (1 << LONG_BYTEORDER)
>> +#define BITS_PER_LONG  (BYTES_PER_LONG << 3)
>> +#define POINTER_ALIGN  BYTES_PER_LONG
>> +
>> +#define BITS_PER_LLONG 64
>> +
>> +/* xen_ulong_t is always 64 bits */
>> +#define BITS_PER_XEN_ULONG 64
>> +
>> +#define CONFIG_PPC_L1_CACHE_SHIFT  7
>> +#define CONFIG_PAGEALLOC_MAX_ORDER 18
>> +#define CONFIG_DOMU_MAX_ORDER      9
>> +#define CONFIG_HWDOM_MAX_ORDER     10
>> +
>> +#define OPT_CONSOLE_STR "dtuart"
>> +#define INVALID_VCPU_ID MAX_VIRT_CPUS
>> +
>> +/* Linkage for PPC */
>> +#ifdef __ASSEMBLY__
>> +#define ALIGN .align 2
> 
> I think I did ask for the same on RISC-V (yet sadly it's still .align
> there): .align is notoriously ambiguous. May I ask that you use .p2align
> (which I think is what is meant here, else .balign)?

Sure, will do.

> Jan

Thanks,
Shawn
diff mbox series

Patch

diff --git a/config/ppc64.mk b/config/ppc64.mk
new file mode 100644
index 0000000000..597f0668c3
--- /dev/null
+++ b/config/ppc64.mk
@@ -0,0 +1,5 @@ 
+CONFIG_PPC := y
+CONFIG_PPC64 := y
+CONFIG_PPC_$(XEN_OS) := y
+
+CONFIG_XEN_INSTALL_SUFFIX :=
diff --git a/xen/Makefile b/xen/Makefile
index e89fc461fc..db5454fb58 100644
--- a/xen/Makefile
+++ b/xen/Makefile
@@ -38,7 +38,7 @@  EFI_MOUNTPOINT ?= $(BOOT_DIR)/efi
 ARCH=$(XEN_TARGET_ARCH)
 SRCARCH=$(shell echo $(ARCH) | \
           sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
-              -e s'/riscv.*/riscv/g')
+              -e s'/riscv.*/riscv/g' -e s'/ppc.*/ppc/g')
 export ARCH SRCARCH
 
 # Allow someone to change their config file
@@ -244,7 +244,7 @@  include $(XEN_ROOT)/Config.mk
 export TARGET_SUBARCH  := $(XEN_TARGET_ARCH)
 export TARGET_ARCH     := $(shell echo $(XEN_TARGET_ARCH) | \
                             sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g' \
-                                -e s'/riscv.*/riscv/g')
+                                -e s'/riscv.*/riscv/g' -e s'/ppc.*/ppc/g')
 
 export CONFIG_SHELL := $(SHELL)
 export CC CXX LD NM OBJCOPY OBJDUMP ADDR2LINE
@@ -563,6 +563,7 @@  _clean:
 	$(Q)$(MAKE) $(clean)=xsm
 	$(Q)$(MAKE) $(clean)=crypto
 	$(Q)$(MAKE) $(clean)=arch/arm
+	$(Q)$(MAKE) $(clean)=arch/ppc
 	$(Q)$(MAKE) $(clean)=arch/riscv
 	$(Q)$(MAKE) $(clean)=arch/x86
 	$(Q)$(MAKE) $(clean)=test
diff --git a/xen/arch/ppc/Kconfig b/xen/arch/ppc/Kconfig
new file mode 100644
index 0000000000..a0a70adef4
--- /dev/null
+++ b/xen/arch/ppc/Kconfig
@@ -0,0 +1,42 @@ 
+config PPC
+	def_bool y
+
+config PPC64
+	def_bool y
+	select 64BIT
+
+config ARCH_DEFCONFIG
+	string
+	default "arch/ppc/configs/openpower_defconfig"
+
+menu "Architecture Features"
+
+source "arch/Kconfig"
+
+endmenu
+
+menu "ISA Selection"
+
+choice
+	prompt "Base ISA"
+	default POWER_ISA_2_07B if PPC64
+	help
+	  This selects the base ISA version that Xen will target.
+
+config POWER_ISA_2_07B
+	bool "Power ISA 2.07B"
+	help
+	  Target version 2.07B of the Power ISA (POWER8)
+
+config POWER_ISA_3_00
+	bool "Power ISA 3.00"
+	help
+	  Target version 3.00 of the Power ISA (POWER9)
+
+endchoice
+
+endmenu
+
+source "common/Kconfig"
+
+source "drivers/Kconfig"
diff --git a/xen/arch/ppc/Kconfig.debug b/xen/arch/ppc/Kconfig.debug
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/xen/arch/ppc/Makefile b/xen/arch/ppc/Makefile
new file mode 100644
index 0000000000..98220648af
--- /dev/null
+++ b/xen/arch/ppc/Makefile
@@ -0,0 +1,16 @@ 
+obj-$(CONFIG_PPC64) += ppc64/
+
+$(TARGET): $(TARGET)-syms
+	cp -f $< $@
+
+$(TARGET)-syms: $(objtree)/prelink.o $(obj)/xen.lds
+	$(LD) $(XEN_LDFLAGS) -T $(obj)/xen.lds -N $< $(build_id_linker) -o $@
+	$(NM) -pa --format=sysv $@ \
+		| $(objtree)/tools/symbols --all-symbols --xensyms --sysv --sort \
+		> $@.map
+
+$(obj)/xen.lds: $(src)/xen.lds.S FORCE
+	$(call if_changed_dep,cpp_lds_S)
+
+.PHONY: include
+include:
diff --git a/xen/arch/ppc/Rules.mk b/xen/arch/ppc/Rules.mk
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/xen/arch/ppc/arch.mk b/xen/arch/ppc/arch.mk
new file mode 100644
index 0000000000..36830457c6
--- /dev/null
+++ b/xen/arch/ppc/arch.mk
@@ -0,0 +1,12 @@ 
+########################################
+# Power-specific definitions
+
+ppc-march-$(CONFIG_POWER_ISA_2_07B) := power8
+ppc-march-$(CONFIG_POWER_ISA_3_00) := power9
+
+CFLAGS += -m64 -mlittle-endian -mcpu=$(ppc-march-y)
+CFLAGS += -mstrict-align -mcmodel=large -mabi=elfv2 -mno-altivec -mno-vsx
+
+# TODO: Drop override when more of the build is working
+override ALL_OBJS-y = arch/$(TARGET_ARCH)/built_in.o
+override ALL_LIBS-y =
diff --git a/xen/arch/ppc/configs/openpower_defconfig b/xen/arch/ppc/configs/openpower_defconfig
new file mode 100644
index 0000000000..8783eb3488
--- /dev/null
+++ b/xen/arch/ppc/configs/openpower_defconfig
@@ -0,0 +1,13 @@ 
+# CONFIG_SCHED_CREDIT is not set
+# CONFIG_SCHED_RTDS is not set
+# CONFIG_SCHED_NULL is not set
+# CONFIG_SCHED_ARINC653 is not set
+# CONFIG_TRACEBUFFER is not set
+# CONFIG_HYPFS is not set
+# CONFIG_GRANT_TABLE is not set
+# CONFIG_SPECULATIVE_HARDEN_ARRAY is not set
+
+CONFIG_PPC64=y
+CONFIG_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_EXPERT=y
diff --git a/xen/arch/ppc/include/asm/config.h b/xen/arch/ppc/include/asm/config.h
new file mode 100644
index 0000000000..7a2862ef7a
--- /dev/null
+++ b/xen/arch/ppc/include/asm/config.h
@@ -0,0 +1,63 @@ 
+#ifndef __PPC_CONFIG_H__
+#define __PPC_CONFIG_H__
+
+#include <xen/const.h>
+#include <xen/page-size.h>
+
+#if defined(CONFIG_PPC64)
+#define LONG_BYTEORDER 3
+#define ELFSIZE        64
+#define MAX_VIRT_CPUS  1024u
+#else
+#error "Unsupported PowerPC variant"
+#endif
+
+#define BYTES_PER_LONG (1 << LONG_BYTEORDER)
+#define BITS_PER_LONG  (BYTES_PER_LONG << 3)
+#define POINTER_ALIGN  BYTES_PER_LONG
+
+#define BITS_PER_LLONG 64
+
+/* xen_ulong_t is always 64 bits */
+#define BITS_PER_XEN_ULONG 64
+
+#define CONFIG_PPC_L1_CACHE_SHIFT  7
+#define CONFIG_PAGEALLOC_MAX_ORDER 18
+#define CONFIG_DOMU_MAX_ORDER      9
+#define CONFIG_HWDOM_MAX_ORDER     10
+
+#define OPT_CONSOLE_STR "dtuart"
+#define INVALID_VCPU_ID MAX_VIRT_CPUS
+
+/* Linkage for PPC */
+#ifdef __ASSEMBLY__
+#define ALIGN .align 2
+
+#define ENTRY(name)                                                            \
+    .globl name;                                                               \
+    ALIGN;                                                                     \
+    name:
+#endif
+
+#define XEN_VIRT_START _AT(UL, 0x400000)
+
+#define SMP_CACHE_BYTES (1 << 6)
+
+#define STACK_ORDER 2
+#define STACK_SIZE  (PAGE_SIZE << STACK_ORDER)
+
+/* 288 bytes below the stack pointer must be preserved by interrupt handlers */
+#define STACK_VOLATILE_AREA 288
+
+/* size of minimum stack frame; C code can write into the caller's stack */
+#define STACK_FRAME_OVERHEAD 32
+
+#endif /* __PPC_CONFIG_H__ */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/ppc/include/asm/page-bits.h b/xen/arch/ppc/include/asm/page-bits.h
new file mode 100644
index 0000000000..4c01bf9716
--- /dev/null
+++ b/xen/arch/ppc/include/asm/page-bits.h
@@ -0,0 +1,7 @@ 
+#ifndef __PPC_PAGE_BITS_H__
+#define __PPC_PAGE_BITS_H__
+
+#define PAGE_SHIFT              16 /* 64 KiB Pages */
+#define PADDR_BITS              48
+
+#endif /* __PPC_PAGE_BITS_H__ */
diff --git a/xen/arch/ppc/ppc64/Makefile b/xen/arch/ppc/ppc64/Makefile
new file mode 100644
index 0000000000..3340058c08
--- /dev/null
+++ b/xen/arch/ppc/ppc64/Makefile
@@ -0,0 +1 @@ 
+obj-y += head.o
diff --git a/xen/arch/ppc/ppc64/asm-offsets.c b/xen/arch/ppc/ppc64/asm-offsets.c
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
new file mode 100644
index 0000000000..0b289c713a
--- /dev/null
+++ b/xen/arch/ppc/ppc64/head.S
@@ -0,0 +1,27 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+.section .text.header, "ax", %progbits
+
+ENTRY(start)
+    /*
+     * Depending on how we were booted, the CPU could be running in either
+     * Little Endian or Big Endian mode. The following trampoline from Linux
+     * cleverly uses an instruction that encodes to a NOP if the CPU's
+     * endianness matches the assumption of the assembler (LE, in our case)
+     * or a branch to code that performs the endian switch in the other case.
+     */
+    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
+    b . + 44          /* Skip trampoline if endian is good  */
+    .long 0xa600607d  /* mfmsr r11                          */
+    .long 0x01006b69  /* xori r11,r11,1                     */
+    .long 0x00004039  /* li r10,0                           */
+    .long 0x6401417d  /* mtmsrd r10,1                       */
+    .long 0x05009f42  /* bcl 20,31,$+4                      */
+    .long 0xa602487d  /* mflr r10                           */
+    .long 0x14004a39  /* addi r10,r10,20                    */
+    .long 0xa6035a7d  /* mtsrr0 r10                         */
+    .long 0xa6037b7d  /* mtsrr1 r11                         */
+    .long 0x2400004c  /* rfid                               */
+
+    /* Now that the endianness is confirmed, continue */
+1:  b 1b
diff --git a/xen/arch/ppc/xen.lds.S b/xen/arch/ppc/xen.lds.S
new file mode 100644
index 0000000000..a72e519c6a
--- /dev/null
+++ b/xen/arch/ppc/xen.lds.S
@@ -0,0 +1,172 @@ 
+#include <xen/xen.lds.h>
+
+#undef ENTRY
+#undef ALIGN
+
+OUTPUT_ARCH(powerpc:common64)
+ENTRY(start)
+
+PHDRS
+{
+    text PT_LOAD ;
+#if defined(BUILD_ID)
+    note PT_NOTE ;
+#endif
+}
+
+/**
+ * OF's base load address is 0x400000 (XEN_VIRT_START).
+ * By defining sections this way, we can keep our virtual address base at 0x400000
+ * while keeping the physical base at 0x0.
+ *
+ * Without this, OF incorrectly loads .text at 0x400000 + 0x400000 = 0x800000.
+ * Taken from x86/xen.lds.S
+ */
+#ifdef CONFIG_LD_IS_GNU
+# define DECL_SECTION(x) x : AT(ADDR(#x) - XEN_VIRT_START)
+#else
+# define DECL_SECTION(x) x : AT(ADDR(x) - XEN_VIRT_START)
+#endif
+
+SECTIONS
+{
+    . = XEN_VIRT_START;
+
+    DECL_SECTION(.text) {
+        _stext = .;            /* Text section */
+        *(.text.header)
+
+        *(.text.cold)
+        *(.text.unlikely .text.*_unlikely .text.unlikely.*)
+
+        *(.text)
+#ifdef CONFIG_CC_SPLIT_SECTIONS
+        *(.text.*)
+#endif
+
+        *(.fixup)
+        *(.gnu.warning)
+        . = ALIGN(POINTER_ALIGN);
+        _etext = .;             /* End of text section */
+    } :text
+
+    . = ALIGN(PAGE_SIZE);
+    DECL_SECTION(.rodata) {
+        _srodata = .;          /* Read-only data */
+        *(.rodata)
+        *(.rodata.*)
+        *(.data.rel.ro)
+        *(.data.rel.ro.*)
+
+        VPCI_ARRAY
+
+        . = ALIGN(POINTER_ALIGN);
+        _erodata = .;        /* End of read-only data */
+    } :text
+
+    #if defined(BUILD_ID)
+    . = ALIGN(4);
+    DECL_SECTION(.note.gnu.build-id) {
+        __note_gnu_build_id_start = .;
+        *(.note.gnu.build-id)
+        __note_gnu_build_id_end = .;
+    } :note :text
+    #endif
+    _erodata = .;                /* End of read-only data */
+
+    . = ALIGN(PAGE_SIZE);
+    DECL_SECTION(.data.ro_after_init) {
+        __ro_after_init_start = .;
+        *(.data.ro_after_init)
+        . = ALIGN(PAGE_SIZE);
+        __ro_after_init_end = .;
+    } : text
+
+    DECL_SECTION(.data.read_mostly) {
+        *(.data.read_mostly)
+    } :text
+
+    . = ALIGN(PAGE_SIZE);
+    DECL_SECTION(.data) {                    /* Data */
+        *(.data.page_aligned)
+        . = ALIGN(8);
+        __start_schedulers_array = .;
+        *(.data.schedulers)
+        __end_schedulers_array = .;
+
+        HYPFS_PARAM
+
+        *(.data .data.*)
+        CONSTRUCTORS
+    } :text
+
+    . = ALIGN(PAGE_SIZE);             /* Init code and data */
+    __init_begin = .;
+    DECL_SECTION(.init.text) {
+        _sinittext = .;
+        *(.init.text)
+        _einittext = .;
+        . = ALIGN(PAGE_SIZE);        /* Avoid mapping alt insns executable */
+    } :text
+
+    . = ALIGN(PAGE_SIZE);
+    DECL_SECTION(.init.data) {
+        *(.init.rodata)
+        *(.init.rodata.*)
+
+        . = ALIGN(POINTER_ALIGN);
+        __setup_start = .;
+        *(.init.setup)
+        __setup_end = .;
+
+        __initcall_start = .;
+        *(.initcallpresmp.init)
+        __presmp_initcall_end = .;
+        *(.initcall1.init)
+        __initcall_end = .;
+
+        LOCK_PROFILE_DATA
+
+        *(.init.data)
+        *(.init.data.rel)
+        *(.init.data.rel.*)
+
+        . = ALIGN(8);
+        __ctors_start = .;
+        *(.ctors)
+        *(.init_array)
+        *(SORT(.init_array.*))
+        __ctors_end = .;
+    } :text
+    . = ALIGN(POINTER_ALIGN);
+    __init_end = .;
+
+    DECL_SECTION(.bss) {                     /* BSS */
+        __bss_start = .;
+        *(.bss.stack_aligned)
+        *(.bss.page_aligned)
+        . = ALIGN(PAGE_SIZE);
+        __per_cpu_start = .;
+        *(.bss.percpu.page_aligned)
+        *(.bss.percpu)
+        . = ALIGN(SMP_CACHE_BYTES);
+        *(.bss.percpu.read_mostly)
+        . = ALIGN(SMP_CACHE_BYTES);
+        __per_cpu_data_end = .;
+        *(.bss .bss.*)
+        . = ALIGN(POINTER_ALIGN);
+        __bss_end = .;
+    } :text
+    _end = . ;
+
+    /* Section for the device tree blob (if any). */
+    DECL_SECTION(.dtb) { *(.dtb) } :text
+
+    DWARF2_DEBUG_SECTIONS
+
+    DISCARD_SECTIONS
+
+    STABS_DEBUG_SECTIONS
+
+    ELF_DETAILS_SECTIONS
+}