Message ID | 335ce2a18f8cce679dd8b30d11560989131b4337.1690579561.git.sanastasio@raptorengineering.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xen/ppc: Add PowerNV bare metal support | expand |
On 28.07.2023 23:35, Shawn Anastasio wrote: > --- a/xen/arch/ppc/ppc64/head.S > +++ b/xen/arch/ppc/ppc64/head.S > @@ -1,9 +1,11 @@ > /* SPDX-License-Identifier: GPL-2.0-or-later */ > > #include <asm/asm-defns.h> > +#include <asm/asm-offsets.h> > > .section .text.header, "ax", %progbits > > + > ENTRY(start) Nit: Stray change? > @@ -11,16 +13,19 @@ ENTRY(start) > FIXUP_ENDIAN > > /* set up the TOC pointer */ > - LOAD_IMM32(%r2, .TOC.) > + bcl 20, 31, .+4 Could you use a label name instead of .+4? Aiui you really mean > +1: mflr %r12 ... "1f" there? Jan > + addis %r2, %r12, .TOC.-1b@ha > + addi %r2, %r2, .TOC.-1b@l > > /* set up the initial stack */ > - LOAD_IMM32(%r1, cpu0_boot_stack) > + LOAD_REG_ADDR(%r1, cpu0_boot_stack) > li %r11, 0 > stdu %r11, -STACK_FRAME_OVERHEAD(%r1) > > /* clear .bss */ > - LOAD_IMM32(%r14, __bss_start) > - LOAD_IMM32(%r15, __bss_end) > + LOAD_REG_ADDR(%r14, __bss_start) > + LOAD_REG_ADDR(%r15, __bss_end) > 1: > std %r11, 0(%r14) > addi %r14, %r14, 8
On 7/31/23 10:58 AM, Jan Beulich wrote: > On 28.07.2023 23:35, Shawn Anastasio wrote: >> --- a/xen/arch/ppc/ppc64/head.S >> +++ b/xen/arch/ppc/ppc64/head.S >> @@ -1,9 +1,11 @@ >> /* SPDX-License-Identifier: GPL-2.0-or-later */ >> >> #include <asm/asm-defns.h> >> +#include <asm/asm-offsets.h> >> >> .section .text.header, "ax", %progbits >> >> + >> ENTRY(start) > > Nit: Stray change? > >> @@ -11,16 +13,19 @@ ENTRY(start) >> FIXUP_ENDIAN >> >> /* set up the TOC pointer */ >> - LOAD_IMM32(%r2, .TOC.) >> + bcl 20, 31, .+4 > > Could you use a label name instead of .+4? Aiui you really mean > >> +1: mflr %r12 > > ... "1f" there? Yes, good point. I'll point out that this form of the `bcl` instruction is specifically defined in the ISA specification as the recommended way to obtain the address of the next instruction, and hardware implementations presumably optimize it. Using a label instead of +4 would of course be fine as long as the label immediately follows the bcl, but if the label was elsewhere then the optimization that the ISA allows for this specific instruction might not be hit. Just something that should be kept in mind in case this code is ever refactored. I'll change it to 1f in v2. > > Jan Thanks, Shawn
On 31.07.2023 21:09, Shawn Anastasio wrote: > On 7/31/23 10:58 AM, Jan Beulich wrote: >> On 28.07.2023 23:35, Shawn Anastasio wrote: >>> --- a/xen/arch/ppc/ppc64/head.S >>> +++ b/xen/arch/ppc/ppc64/head.S >>> @@ -1,9 +1,11 @@ >>> /* SPDX-License-Identifier: GPL-2.0-or-later */ >>> >>> #include <asm/asm-defns.h> >>> +#include <asm/asm-offsets.h> >>> >>> .section .text.header, "ax", %progbits >>> >>> + >>> ENTRY(start) >> >> Nit: Stray change? >> >>> @@ -11,16 +13,19 @@ ENTRY(start) >>> FIXUP_ENDIAN >>> >>> /* set up the TOC pointer */ >>> - LOAD_IMM32(%r2, .TOC.) >>> + bcl 20, 31, .+4 >> >> Could you use a label name instead of .+4? Aiui you really mean >> >>> +1: mflr %r12 >> >> ... "1f" there? > > Yes, good point. I'll point out that this form of the `bcl` instruction > is specifically defined in the ISA specification as the recommended > way to obtain the address of the next instruction, and hardware > implementations presumably optimize it. Using a label instead of +4 > would of course be fine as long as the label immediately follows the > bcl, but if the label was elsewhere then the optimization that the ISA > allows for this specific instruction might not be hit. Just something > that should be kept in mind in case this code is ever refactored. Hmm, not really. If there was refactoring, the calculations using 1b would also need adjusting. That's why it's relevant that all "sides" agree on using one and the same label. (A similar "optimization" exists in x86 as well, just fyi, so we have precedent of that.) > I'll change it to 1f in v2. Thanks. Jan
On 28.07.2023 23:35, Shawn Anastasio wrote: > @@ -11,16 +13,19 @@ ENTRY(start) > FIXUP_ENDIAN > > /* set up the TOC pointer */ > - LOAD_IMM32(%r2, .TOC.) > + bcl 20, 31, .+4 > +1: mflr %r12 > + addis %r2, %r12, .TOC.-1b@ha > + addi %r2, %r2, .TOC.-1b@l > > /* set up the initial stack */ > - LOAD_IMM32(%r1, cpu0_boot_stack) > + LOAD_REG_ADDR(%r1, cpu0_boot_stack) Question: Would perhaps make sense to use %sp and %rtoc in place of %r1 and %r2 (not just here of course)? Jan
On 8/1/23 7:20 AM, Jan Beulich wrote: > On 28.07.2023 23:35, Shawn Anastasio wrote: >> @@ -11,16 +13,19 @@ ENTRY(start) >> FIXUP_ENDIAN >> >> /* set up the TOC pointer */ >> - LOAD_IMM32(%r2, .TOC.) >> + bcl 20, 31, .+4 >> +1: mflr %r12 >> + addis %r2, %r12, .TOC.-1b@ha >> + addi %r2, %r2, .TOC.-1b@l >> >> /* set up the initial stack */ >> - LOAD_IMM32(%r1, cpu0_boot_stack) >> + LOAD_REG_ADDR(%r1, cpu0_boot_stack) > > Question: Would perhaps make sense to use %sp and %rtoc in place of > %r1 and %r2 (not just here of course)? In my opinion, usage of the aliased register names ends up making the code less clear.
diff --git a/xen/arch/ppc/arch.mk b/xen/arch/ppc/arch.mk index 7eec22c283..0183b9ac6a 100644 --- a/xen/arch/ppc/arch.mk +++ b/xen/arch/ppc/arch.mk @@ -5,7 +5,7 @@ ppc-march-$(CONFIG_POWER_ISA_2_07B) := power8 ppc-march-$(CONFIG_POWER_ISA_3_00) := power9 CFLAGS += -m64 -mlittle-endian -mcpu=$(ppc-march-y) -CFLAGS += -mstrict-align -mcmodel=large -mabi=elfv2 -mno-altivec -mno-vsx +CFLAGS += -mstrict-align -mcmodel=medium -mabi=elfv2 -fPIC -mno-altivec -mno-vsx -msoft-float LDFLAGS += -m elf64lppc diff --git a/xen/arch/ppc/include/asm/asm-defns.h b/xen/arch/ppc/include/asm/asm-defns.h index 35b1c89d4e..5821f9024d 100644 --- a/xen/arch/ppc/include/asm/asm-defns.h +++ b/xen/arch/ppc/include/asm/asm-defns.h @@ -16,6 +16,13 @@ lis reg, (val) @h; \ ori reg, reg, (val) @l; \ +/* + * Load the address of a symbol from the TOC into the specified GPR. + */ +#define LOAD_REG_ADDR(reg,name) \ + addis reg,%r2,name@toc@ha; \ + addi reg,reg,name@toc@l + /* * Depending on how we were booted, the CPU could be running in either * Little Endian or Big Endian mode. The following trampoline from Linux diff --git a/xen/arch/ppc/include/asm/config.h b/xen/arch/ppc/include/asm/config.h index cb27d2781e..d060f0dca7 100644 --- a/xen/arch/ppc/include/asm/config.h +++ b/xen/arch/ppc/include/asm/config.h @@ -39,7 +39,7 @@ name: #endif -#define XEN_VIRT_START _AT(UL, 0x400000) +#define XEN_VIRT_START _AT(UL, 0xc000000000000000) #define SMP_CACHE_BYTES (1 << 6) diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S index 02ff520458..5ac2dad2ee 100644 --- a/xen/arch/ppc/ppc64/head.S +++ b/xen/arch/ppc/ppc64/head.S @@ -1,9 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include <asm/asm-defns.h> +#include <asm/asm-offsets.h> .section .text.header, "ax", %progbits + ENTRY(start) /* * NOTE: argument registers (r3-r9) must be preserved until the C entrypoint @@ -11,16 +13,19 @@ ENTRY(start) FIXUP_ENDIAN /* set up the TOC pointer */ - LOAD_IMM32(%r2, .TOC.) + bcl 20, 31, .+4 +1: mflr %r12 + addis %r2, %r12, .TOC.-1b@ha + addi %r2, %r2, .TOC.-1b@l /* set up the initial stack */ - LOAD_IMM32(%r1, cpu0_boot_stack) + LOAD_REG_ADDR(%r1, cpu0_boot_stack) li %r11, 0 stdu %r11, -STACK_FRAME_OVERHEAD(%r1) /* clear .bss */ - LOAD_IMM32(%r14, __bss_start) - LOAD_IMM32(%r15, __bss_end) + LOAD_REG_ADDR(%r14, __bss_start) + LOAD_REG_ADDR(%r15, __bss_end) 1: std %r11, 0(%r14) addi %r14, %r14, 8 diff --git a/xen/arch/ppc/xen.lds.S b/xen/arch/ppc/xen.lds.S index c628cc0e5c..2fa81d5a83 100644 --- a/xen/arch/ppc/xen.lds.S +++ b/xen/arch/ppc/xen.lds.S @@ -15,25 +15,12 @@ PHDRS #endif } -/** - * OpenFirmware's base load address is 0x400000 (XEN_VIRT_START). - * By defining sections this way, we can keep our virtual address base at 0x400000 - * while keeping the physical base at 0x0. - * - * Otherwise, OpenFirmware incorrectly loads .text at 0x400000 + 0x400000 = 0x800000. - * Taken from x86/xen.lds.S - */ -#ifdef CONFIG_LD_IS_GNU -# define DECL_SECTION(x) x : AT(ADDR(#x) - XEN_VIRT_START) -#else -# define DECL_SECTION(x) x : AT(ADDR(x) - XEN_VIRT_START) -#endif - SECTIONS { . = XEN_VIRT_START; + _start = .; - DECL_SECTION(.text) { + .text : { _stext = .; /* Text section */ *(.text.header) @@ -52,7 +39,7 @@ SECTIONS } :text . = ALIGN(PAGE_SIZE); - DECL_SECTION(.rodata) { + .rodata : { _srodata = .; /* Read-only data */ *(.rodata) *(.rodata.*) @@ -67,7 +54,7 @@ SECTIONS #if defined(BUILD_ID) . = ALIGN(4); - DECL_SECTION(.note.gnu.build-id) { + .note.gnu.build-id : { __note_gnu_build_id_start = .; *(.note.gnu.build-id) __note_gnu_build_id_end = .; @@ -76,19 +63,19 @@ SECTIONS _erodata = .; /* End of read-only data */ . = ALIGN(PAGE_SIZE); - DECL_SECTION(.data.ro_after_init) { + .data.ro_after_init : { __ro_after_init_start = .; *(.data.ro_after_init) . = ALIGN(PAGE_SIZE); __ro_after_init_end = .; - } : text + } :text - DECL_SECTION(.data.read_mostly) { + .data.read_mostly : { *(.data.read_mostly) } :text . = ALIGN(PAGE_SIZE); - DECL_SECTION(.data) { /* Data */ + .data : { /* Data */ *(.data.page_aligned) . = ALIGN(8); __start_schedulers_array = .; @@ -103,7 +90,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; - DECL_SECTION(.init.text) { + .init.text : { _sinittext = .; *(.init.text) _einittext = .; @@ -111,7 +98,7 @@ SECTIONS } :text . = ALIGN(PAGE_SIZE); - DECL_SECTION(.init.data) { + .init.data : { *(.init.rodata) *(.init.rodata.*) @@ -140,18 +127,18 @@ SECTIONS __ctors_end = .; } :text - DECL_SECTION(.got) { - *(.got) + .got : { + *(.got .toc) } :text - DECL_SECTION(.got.plt) { + .got.plt : { *(.got.plt) } :text . = ALIGN(POINTER_ALIGN); __init_end = .; - DECL_SECTION(.bss) { /* BSS */ + .bss : { /* BSS */ __bss_start = .; *(.bss.stack_aligned) *(.bss.page_aligned) @@ -167,10 +154,11 @@ SECTIONS . = ALIGN(POINTER_ALIGN); __bss_end = .; } :text + _end = . ; /* Section for the device tree blob (if any). */ - DECL_SECTION(.dtb) { *(.dtb) } :text + .dtb : { *(.dtb) } :text DWARF2_DEBUG_SECTIONS
Switch Xen to the medium PIC code model on Power. Among other things, this allows us to be load address agnostic and will open the door to booting on bare metal PowerNV systems that don't use OpenFirmware. Also update XEN_VIRT_START to 0xc000000000000000, which is equivalent to address 0x0 when the MMU is off. This prevents Open Firmware from loading Xen at an offset from its base load address, so the DECL_SECTION hack in xen.lds.S is no longer required. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/arch.mk | 2 +- xen/arch/ppc/include/asm/asm-defns.h | 7 +++++ xen/arch/ppc/include/asm/config.h | 2 +- xen/arch/ppc/ppc64/head.S | 13 +++++--- xen/arch/ppc/xen.lds.S | 44 ++++++++++------------------ 5 files changed, 34 insertions(+), 34 deletions(-)