Message ID | 20200706150205.22053-1-geert+renesas@glider.be (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [PATCH/RFC,v7] ARM: boot: Obtain start of physical memory from DTB | expand |
On Mon, 6 Jul 2020 at 18:02, Geert Uytterhoeven <geert+renesas@glider.be> wrote: > > Currently, the start address of physical memory is obtained by masking > the program counter with a fixed mask of 0xf8000000. This mask value > was chosen as a balance between the requirements of different platforms. > However, this does require that the start address of physical memory is > a multiple of 128 MiB, precluding booting Linux on platforms where this > requirement is not fulfilled. > > Fix this limitation by obtaining the start address from the DTB instead, > if available (either explicitly passed, or appended to the kernel). > Fall back to the traditional method when needed. > > This allows to boot Linux on r7s9210/rza2mevb using the 64 MiB of SDRAM > on the RZA2MEVB sub board, which is located at 0x0C000000 (CS3 space), > i.e. not at a multiple of 128 MiB. > > Suggested-by: Nicolas Pitre <nico@fluxnic.net> > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> > Reviewed-by: Nicolas Pitre <nico@fluxnic.net> > Reviewed-by: Ard Biesheuvel <ardb@kernel.org> > Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > Tested-by: Dmitry Osipenko <digetx@gmail.com> > Cc: Lukasz Stelmach <l.stelmach@samsung.com> > --- > Marked as RFC, because: > 1. This is known to break crashkernel support, as the memory used by > the crashkernel is not marked reserved in DT (yet), > 2. Russell won't apply this for v5.9 anyway, > Would it help if we make this behavior dependent on a simple heuristic, e.g., if (round_up(load_address, 128M) >= dram_end) use dram_start from DT else use round_up(load_address, 128M) That way, the fix is guaranteed to only take effect for systems that cannot even boot otherwise, which fixes the crashkernel case, as well as other potential regressions due to the load address of the core kernel changing for existing boards. > v7: > - Rebase on top of commits 161e04a5bae58a65 ("ARM: decompressor: split > off _edata and stack base into separate object") and > c7c06b8843c0aa65 ("kbuild: remove cc-option test of > -fno-stack-protector") in next-20200706. > > v6: > - Rebase on top of commit 7ae4a78daacf240a ("ARM: 8969/1: > decompressor: simplify libfdt builds"), > - Include <linux/libfdt.h> instead of <libfdt.h>, > > v5: > - Add Tested-by, Reviewed-by, > - Round up start of memory to satisfy 16 MiB alignment rule, > > v4: > - Fix stack location after commit 184bf653a7a452c1 ("ARM: > decompressor: factor out routine to obtain the inflated image > size"), > > v3: > - Add Reviewed-by, > - Fix ATAGs with appended DTB, > - Add Tested-by, > > v2: > - Use "cmp r0, #-1", instead of "cmn r0, #1", > - Add missing stack setup, > - Support appended DTB. > --- > arch/arm/boot/compressed/Makefile | 5 +- > arch/arm/boot/compressed/fdt_get_mem_start.c | 56 ++++++++++++++++++++ > arch/arm/boot/compressed/head.S | 52 +++++++++++++++++- > 3 files changed, 111 insertions(+), 2 deletions(-) > create mode 100644 arch/arm/boot/compressed/fdt_get_mem_start.c > > diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile > index b1147b7f2c8d372e..b1c09faf276e9193 100644 > --- a/arch/arm/boot/compressed/Makefile > +++ b/arch/arm/boot/compressed/Makefile > @@ -81,10 +81,13 @@ libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o > ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y) > OBJS += $(libfdt_objs) atags_to_fdt.o > endif > +ifeq ($(CONFIG_USE_OF),y) > +OBJS += $(libfdt_objs) fdt_get_mem_start.o > +endif > > # -fstack-protector-strong triggers protection checks in this code, > # but it is being used too early to link to meaningful stack_chk logic. > -$(foreach o, $(libfdt_objs) atags_to_fdt.o, \ > +$(foreach o, $(libfdt_objs) atags_to_fdt.o fdt_get_mem_start.o, \ > $(eval CFLAGS_$(o) := -I $(srctree)/scripts/dtc/libfdt -fno-stack-protector)) > > # These were previously generated C files. When you are building the kernel > diff --git a/arch/arm/boot/compressed/fdt_get_mem_start.c b/arch/arm/boot/compressed/fdt_get_mem_start.c > new file mode 100644 > index 0000000000000000..ae71fde731b869d7 > --- /dev/null > +++ b/arch/arm/boot/compressed/fdt_get_mem_start.c > @@ -0,0 +1,56 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +#include <linux/kernel.h> > +#include <linux/libfdt.h> > +#include <linux/sizes.h> > + > +static const void *getprop(const void *fdt, const char *node_path, > + const char *property) > +{ > + int offset = fdt_path_offset(fdt, node_path); > + > + if (offset == -FDT_ERR_NOTFOUND) > + return NULL; > + > + return fdt_getprop(fdt, offset, property, NULL); > +} > + > +static uint32_t get_addr_size(const void *fdt) > +{ > + const __be32 *addr_len = getprop(fdt, "/", "#address-cells"); > + > + if (!addr_len) { > + /* default */ > + return 1; > + } > + > + return fdt32_to_cpu(*addr_len); > +} > + > +/* > + * Get the start of physical memory > + */ > + > +unsigned long fdt_get_mem_start(const void *fdt) > +{ > + uint32_t addr_size, mem_start; > + const __be32 *memory; > + > + if (!fdt) > + return -1; > + > + if (*(__be32 *)fdt != cpu_to_fdt32(FDT_MAGIC)) > + return -1; > + > + /* Find the first memory node */ > + memory = getprop(fdt, "/memory", "reg"); > + if (!memory) > + return -1; > + > + /* There may be multiple cells on LPAE platforms */ > + addr_size = get_addr_size(fdt); > + > + mem_start = fdt32_to_cpu(memory[addr_size - 1]); > + /* Must be a multiple of 16 MiB for phys/virt patching */ > + return round_up(mem_start, SZ_16M); > +} > diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S > index 434a16982e344fe4..802621756ac0480b 100644 > --- a/arch/arm/boot/compressed/head.S > +++ b/arch/arm/boot/compressed/head.S > @@ -254,8 +254,56 @@ not_angel: > .text > > #ifdef CONFIG_AUTO_ZRELADDR > +#ifdef CONFIG_USE_OF > /* > - * Find the start of physical memory. As we are executing > + * Find the start of physical memory. > + * Try the DTB first, if available. > + */ > + adr r0, LC1 > + ldr sp, [r0] @ get stack location > + add sp, sp, r0 @ apply relocation > + > +#ifdef CONFIG_ARM_APPENDED_DTB > + /* > + * Look for an appended DTB. If found, use it and > + * move stack away from it. > + */ > + ldr r6, [r0, #4] @ get &_edata > + add r6, r6, r0 @ relocate it > + ldmia r6, {r0, r5} @ get DTB signature and size > +#ifndef __ARMEB__ > + ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian > + /* convert DTB size to little endian */ > + eor r2, r5, r5, ror #16 > + bic r2, r2, #0x00ff0000 > + mov r5, r5, ror #8 > + eor r5, r5, r2, lsr #8 > +#else > + ldr r1, =0xd00dfeed > +#endif > + cmp r0, r1 @ do we have a DTB there? > + bne 1f > + > + /* preserve 64-bit alignment */ > + add r5, r5, #7 > + bic r5, r5, #7 > + add sp, sp, r5 @ if so, move stack above DTB > + mov r0, r6 @ and extract memory start from DTB > + b 2f > + > +1: > +#endif /* CONFIG_ARM_APPENDED_DTB */ > + > + mov r0, r8 > +2: > + bl fdt_get_mem_start > + mov r4, r0 > + cmp r0, #-1 > + bne 1f > +#endif /* CONFIG_USE_OF */ > + > + /* > + * Fall back to the traditional method. As we are executing > * without the MMU on, we are in the physical address space. > * We just need to get rid of any offset by aligning the > * address. > @@ -273,6 +321,8 @@ not_angel: > */ > mov r4, pc > and r4, r4, #0xf8000000 > + > +1: > /* Determine final kernel image address. */ > add r4, r4, #TEXT_OFFSET > #else > -- > 2.17.1 >
Hi Ard, On Tue, Jul 7, 2020 at 8:50 AM Ard Biesheuvel <ardb@kernel.org> wrote: > On Mon, 6 Jul 2020 at 18:02, Geert Uytterhoeven <geert+renesas@glider.be> wrote: > > Currently, the start address of physical memory is obtained by masking > > the program counter with a fixed mask of 0xf8000000. This mask value > > was chosen as a balance between the requirements of different platforms. > > However, this does require that the start address of physical memory is > > a multiple of 128 MiB, precluding booting Linux on platforms where this > > requirement is not fulfilled. > > > > Fix this limitation by obtaining the start address from the DTB instead, > > if available (either explicitly passed, or appended to the kernel). > > Fall back to the traditional method when needed. > > > > This allows to boot Linux on r7s9210/rza2mevb using the 64 MiB of SDRAM > > on the RZA2MEVB sub board, which is located at 0x0C000000 (CS3 space), > > i.e. not at a multiple of 128 MiB. > > > > Suggested-by: Nicolas Pitre <nico@fluxnic.net> > > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> > > Reviewed-by: Nicolas Pitre <nico@fluxnic.net> > > Reviewed-by: Ard Biesheuvel <ardb@kernel.org> > > Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > > Tested-by: Dmitry Osipenko <digetx@gmail.com> > > Cc: Lukasz Stelmach <l.stelmach@samsung.com> > > --- > > Marked as RFC, because: > > 1. This is known to break crashkernel support, as the memory used by > > the crashkernel is not marked reserved in DT (yet), > > 2. Russell won't apply this for v5.9 anyway, > > > > Would it help if we make this behavior dependent on a simple heuristic, e.g., > > if (round_up(load_address, 128M) >= dram_end) > use dram_start from DT > else > use round_up(load_address, 128M) > > That way, the fix is guaranteed to only take effect for systems that > cannot even boot otherwise, which fixes the crashkernel case, as well > as other potential regressions due to the load address of the core > kernel changing for existing boards. Thanks for your suggestion! 1. Shouldn't the calculation use round_down() instead of round_up()? 2. Likewise, "round_down(load_address, 128M) < dram_start from DT"? Gr{oetje,eeting}s, Geert
On Tue, 7 Jul 2020 at 10:39, Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > Hi Ard, > > On Tue, Jul 7, 2020 at 8:50 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Mon, 6 Jul 2020 at 18:02, Geert Uytterhoeven <geert+renesas@glider.be> wrote: > > > Currently, the start address of physical memory is obtained by masking > > > the program counter with a fixed mask of 0xf8000000. This mask value > > > was chosen as a balance between the requirements of different platforms. > > > However, this does require that the start address of physical memory is > > > a multiple of 128 MiB, precluding booting Linux on platforms where this > > > requirement is not fulfilled. > > > > > > Fix this limitation by obtaining the start address from the DTB instead, > > > if available (either explicitly passed, or appended to the kernel). > > > Fall back to the traditional method when needed. > > > > > > This allows to boot Linux on r7s9210/rza2mevb using the 64 MiB of SDRAM > > > on the RZA2MEVB sub board, which is located at 0x0C000000 (CS3 space), > > > i.e. not at a multiple of 128 MiB. > > > > > > Suggested-by: Nicolas Pitre <nico@fluxnic.net> > > > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> > > > Reviewed-by: Nicolas Pitre <nico@fluxnic.net> > > > Reviewed-by: Ard Biesheuvel <ardb@kernel.org> > > > Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > > > Tested-by: Dmitry Osipenko <digetx@gmail.com> > > > Cc: Lukasz Stelmach <l.stelmach@samsung.com> > > > --- > > > Marked as RFC, because: > > > 1. This is known to break crashkernel support, as the memory used by > > > the crashkernel is not marked reserved in DT (yet), > > > 2. Russell won't apply this for v5.9 anyway, > > > > > > > Would it help if we make this behavior dependent on a simple heuristic, e.g., > > > > if (round_up(load_address, 128M) >= dram_end) > > use dram_start from DT > > else > > use round_up(load_address, 128M) > > > > That way, the fix is guaranteed to only take effect for systems that > > cannot even boot otherwise, which fixes the crashkernel case, as well > > as other potential regressions due to the load address of the core > > kernel changing for existing boards. > > Thanks for your suggestion! > 1. Shouldn't the calculation use round_down() instead of round_up()? > 2. Likewise, "round_down(load_address, 128M) < dram_start from DT"? > No. What the code does today is round *up* to a multiple of 128 MB, and only when that leads to a problem, we should use the DT provided memory regions.
Hi Ard, On Tue, Jul 7, 2020 at 9:45 AM Ard Biesheuvel <ardb@kernel.org> wrote: > On Tue, 7 Jul 2020 at 10:39, Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > On Tue, Jul 7, 2020 at 8:50 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > On Mon, 6 Jul 2020 at 18:02, Geert Uytterhoeven <geert+renesas@glider.be> wrote: > > > > Currently, the start address of physical memory is obtained by masking > > > > the program counter with a fixed mask of 0xf8000000. This mask value > > > > was chosen as a balance between the requirements of different platforms. > > > > However, this does require that the start address of physical memory is > > > > a multiple of 128 MiB, precluding booting Linux on platforms where this > > > > requirement is not fulfilled. > > > > > > > > Fix this limitation by obtaining the start address from the DTB instead, > > > > if available (either explicitly passed, or appended to the kernel). > > > > Fall back to the traditional method when needed. > > > > > > > > This allows to boot Linux on r7s9210/rza2mevb using the 64 MiB of SDRAM > > > > on the RZA2MEVB sub board, which is located at 0x0C000000 (CS3 space), > > > > i.e. not at a multiple of 128 MiB. > > > > > > > > Suggested-by: Nicolas Pitre <nico@fluxnic.net> > > > > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> > > > > Reviewed-by: Nicolas Pitre <nico@fluxnic.net> > > > > Reviewed-by: Ard Biesheuvel <ardb@kernel.org> > > > > Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > > > > Tested-by: Dmitry Osipenko <digetx@gmail.com> > > > > Cc: Lukasz Stelmach <l.stelmach@samsung.com> > > > > --- > > > > Marked as RFC, because: > > > > 1. This is known to break crashkernel support, as the memory used by > > > > the crashkernel is not marked reserved in DT (yet), > > > > 2. Russell won't apply this for v5.9 anyway, > > > > > > > > > > Would it help if we make this behavior dependent on a simple heuristic, e.g., > > > > > > if (round_up(load_address, 128M) >= dram_end) > > > use dram_start from DT > > > else > > > use round_up(load_address, 128M) > > > > > > That way, the fix is guaranteed to only take effect for systems that > > > cannot even boot otherwise, which fixes the crashkernel case, as well > > > as other potential regressions due to the load address of the core > > > kernel changing for existing boards. > > > > Thanks for your suggestion! > > 1. Shouldn't the calculation use round_down() instead of round_up()? > > 2. Likewise, "round_down(load_address, 128M) < dram_start from DT"? > > > > No. > > What the code does today is round *up* to a multiple of 128 MB, and > only when that leads to a problem, we should use the DT provided > memory regions. mov r4, pc and r4, r4, #0xf8000000 Surely this is rounding down, isn't it? add r4, r4, #TEXT_OFFSET Followed by adding a small number (typically 0x00008000). On RZA2MEVB with 64 MiB of RAM, the result lies below dram_start. BTW, how to obtain dram_end? From DT again? Do we trust it, as we apparently cannot trust dram_start in some configurations. Do I need more coffee? Gr{oetje,eeting}s, Geert
On Tue, 7 Jul 2020 at 10:58, Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > Hi Ard, > > On Tue, Jul 7, 2020 at 9:45 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Tue, 7 Jul 2020 at 10:39, Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > > On Tue, Jul 7, 2020 at 8:50 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > On Mon, 6 Jul 2020 at 18:02, Geert Uytterhoeven <geert+renesas@glider.be> wrote: > > > > > Currently, the start address of physical memory is obtained by masking > > > > > the program counter with a fixed mask of 0xf8000000. This mask value > > > > > was chosen as a balance between the requirements of different platforms. > > > > > However, this does require that the start address of physical memory is > > > > > a multiple of 128 MiB, precluding booting Linux on platforms where this > > > > > requirement is not fulfilled. > > > > > > > > > > Fix this limitation by obtaining the start address from the DTB instead, > > > > > if available (either explicitly passed, or appended to the kernel). > > > > > Fall back to the traditional method when needed. > > > > > > > > > > This allows to boot Linux on r7s9210/rza2mevb using the 64 MiB of SDRAM > > > > > on the RZA2MEVB sub board, which is located at 0x0C000000 (CS3 space), > > > > > i.e. not at a multiple of 128 MiB. > > > > > > > > > > Suggested-by: Nicolas Pitre <nico@fluxnic.net> > > > > > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> > > > > > Reviewed-by: Nicolas Pitre <nico@fluxnic.net> > > > > > Reviewed-by: Ard Biesheuvel <ardb@kernel.org> > > > > > Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > > > > > Tested-by: Dmitry Osipenko <digetx@gmail.com> > > > > > Cc: Lukasz Stelmach <l.stelmach@samsung.com> > > > > > --- > > > > > Marked as RFC, because: > > > > > 1. This is known to break crashkernel support, as the memory used by > > > > > the crashkernel is not marked reserved in DT (yet), > > > > > 2. Russell won't apply this for v5.9 anyway, > > > > > > > > > > > > > Would it help if we make this behavior dependent on a simple heuristic, e.g., > > > > > > > > if (round_up(load_address, 128M) >= dram_end) > > > > use dram_start from DT > > > > else > > > > use round_up(load_address, 128M) > > > > > > > > That way, the fix is guaranteed to only take effect for systems that > > > > cannot even boot otherwise, which fixes the crashkernel case, as well > > > > as other potential regressions due to the load address of the core > > > > kernel changing for existing boards. > > > > > > Thanks for your suggestion! > > > 1. Shouldn't the calculation use round_down() instead of round_up()? > > > 2. Likewise, "round_down(load_address, 128M) < dram_start from DT"? > > > > > > > No. > > > > What the code does today is round *up* to a multiple of 128 MB, and > > only when that leads to a problem, we should use the DT provided > > memory regions. > > mov r4, pc > and r4, r4, #0xf8000000 > > Surely this is rounding down, isn't it? > Yes you are right. > add r4, r4, #TEXT_OFFSET > > Followed by adding a small number (typically 0x00008000). > > On RZA2MEVB with 64 MiB of RAM, the result lies below dram_start. Yes, but in the general case, this is not true. Platforms that manage to boot using the current arrangement will do so by putting the decompressor above the first 128 MB aligned boundary covered by DRAM (and lose access to any memory below it via the linear mapping, but this memory could still be used via a no-map reserved-memory node AFAIK.) > BTW, how to obtain dram_end? From DT again? Do we trust it, as we > apparently cannot trust dram_start in some configurations. > > Do I need more coffee? > Maybe we both do :-) AIUI, the reason we cannot trust dram_start is because of the crashkernel case, i.e., the kernel may have deliberately been put high up in memory, and the expectation is that the load address is derived by rounding down the load address of the decompressor. Hence my suggestion to round *up* and compare with dram_end: if round_up(load_address, 128M) >= dram_end holds, it is guaranteed that no address exists in memory from which we could round down and arrive at a valid DRAM address. This would mean that your change will only affect platforms that were unable to boot to begin with, and not affect any other weird configurations including crashkernels etc
On Tue, 7 Jul 2020 at 11:35, Ard Biesheuvel <ardb@kernel.org> wrote: > > On Tue, 7 Jul 2020 at 10:58, Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > > > Hi Ard, > > > > On Tue, Jul 7, 2020 at 9:45 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > On Tue, 7 Jul 2020 at 10:39, Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > > > On Tue, Jul 7, 2020 at 8:50 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > On Mon, 6 Jul 2020 at 18:02, Geert Uytterhoeven <geert+renesas@glider.be> wrote: > > > > > > Currently, the start address of physical memory is obtained by masking > > > > > > the program counter with a fixed mask of 0xf8000000. This mask value > > > > > > was chosen as a balance between the requirements of different platforms. > > > > > > However, this does require that the start address of physical memory is > > > > > > a multiple of 128 MiB, precluding booting Linux on platforms where this > > > > > > requirement is not fulfilled. > > > > > > > > > > > > Fix this limitation by obtaining the start address from the DTB instead, > > > > > > if available (either explicitly passed, or appended to the kernel). > > > > > > Fall back to the traditional method when needed. > > > > > > > > > > > > This allows to boot Linux on r7s9210/rza2mevb using the 64 MiB of SDRAM > > > > > > on the RZA2MEVB sub board, which is located at 0x0C000000 (CS3 space), > > > > > > i.e. not at a multiple of 128 MiB. > > > > > > > > > > > > Suggested-by: Nicolas Pitre <nico@fluxnic.net> > > > > > > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> > > > > > > Reviewed-by: Nicolas Pitre <nico@fluxnic.net> > > > > > > Reviewed-by: Ard Biesheuvel <ardb@kernel.org> > > > > > > Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > > > > > > Tested-by: Dmitry Osipenko <digetx@gmail.com> > > > > > > Cc: Lukasz Stelmach <l.stelmach@samsung.com> > > > > > > --- > > > > > > Marked as RFC, because: > > > > > > 1. This is known to break crashkernel support, as the memory used by > > > > > > the crashkernel is not marked reserved in DT (yet), > > > > > > 2. Russell won't apply this for v5.9 anyway, > > > > > > > > > > > > > > > > Would it help if we make this behavior dependent on a simple heuristic, e.g., > > > > > > > > > > if (round_up(load_address, 128M) >= dram_end) > > > > > use dram_start from DT > > > > > else > > > > > use round_up(load_address, 128M) > > > > > > > > > > That way, the fix is guaranteed to only take effect for systems that > > > > > cannot even boot otherwise, which fixes the crashkernel case, as well > > > > > as other potential regressions due to the load address of the core > > > > > kernel changing for existing boards. > > > > > > > > Thanks for your suggestion! > > > > 1. Shouldn't the calculation use round_down() instead of round_up()? > > > > 2. Likewise, "round_down(load_address, 128M) < dram_start from DT"? > > > > > > > > > > No. > > > > > > What the code does today is round *up* to a multiple of 128 MB, and > > > only when that leads to a problem, we should use the DT provided > > > memory regions. > > > > mov r4, pc > > and r4, r4, #0xf8000000 > > > > Surely this is rounding down, isn't it? > > > > Yes you are right. > > > add r4, r4, #TEXT_OFFSET > > > > Followed by adding a small number (typically 0x00008000). > > > > On RZA2MEVB with 64 MiB of RAM, the result lies below dram_start. > > Yes, but in the general case, this is not true. Platforms that manage > to boot using the current arrangement will do so by putting the > decompressor above the first 128 MB aligned boundary covered by DRAM > (and lose access to any memory below it via the linear mapping, but > this memory could still be used via a no-map reserved-memory node > AFAIK.) > > > BTW, how to obtain dram_end? From DT again? Do we trust it, as we > > apparently cannot trust dram_start in some configurations. > > > > Do I need more coffee? > > > > Maybe we both do :-) > > AIUI, the reason we cannot trust dram_start is because of the > crashkernel case, i.e., the kernel may have deliberately been put high > up in memory, and the expectation is that the load address is derived > by rounding down the load address of the decompressor. > > Hence my suggestion to round *up* and compare with dram_end: if > round_up(load_address, 128M) >= dram_end holds, it is guaranteed that > no address exists in memory from which we could round down and arrive > at a valid DRAM address. This would mean that your change will only > affect platforms that were unable to boot to begin with, and not > affect any other weird configurations including crashkernels etc Uhm maybe not ... Time to get that coffee...
On Tue, 7 Jul 2020 at 11:40, Ard Biesheuvel <ardb@kernel.org> wrote: > > On Tue, 7 Jul 2020 at 11:35, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > On Tue, 7 Jul 2020 at 10:58, Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > > > > > Hi Ard, > > > > > > On Tue, Jul 7, 2020 at 9:45 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > On Tue, 7 Jul 2020 at 10:39, Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > > > > On Tue, Jul 7, 2020 at 8:50 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > On Mon, 6 Jul 2020 at 18:02, Geert Uytterhoeven <geert+renesas@glider.be> wrote: > > > > > > > Currently, the start address of physical memory is obtained by masking > > > > > > > the program counter with a fixed mask of 0xf8000000. This mask value > > > > > > > was chosen as a balance between the requirements of different platforms. > > > > > > > However, this does require that the start address of physical memory is > > > > > > > a multiple of 128 MiB, precluding booting Linux on platforms where this > > > > > > > requirement is not fulfilled. > > > > > > > > > > > > > > Fix this limitation by obtaining the start address from the DTB instead, > > > > > > > if available (either explicitly passed, or appended to the kernel). > > > > > > > Fall back to the traditional method when needed. > > > > > > > > > > > > > > This allows to boot Linux on r7s9210/rza2mevb using the 64 MiB of SDRAM > > > > > > > on the RZA2MEVB sub board, which is located at 0x0C000000 (CS3 space), > > > > > > > i.e. not at a multiple of 128 MiB. > > > > > > > > > > > > > > Suggested-by: Nicolas Pitre <nico@fluxnic.net> > > > > > > > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> > > > > > > > Reviewed-by: Nicolas Pitre <nico@fluxnic.net> > > > > > > > Reviewed-by: Ard Biesheuvel <ardb@kernel.org> > > > > > > > Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > > > > > > > Tested-by: Dmitry Osipenko <digetx@gmail.com> > > > > > > > Cc: Lukasz Stelmach <l.stelmach@samsung.com> > > > > > > > --- > > > > > > > Marked as RFC, because: > > > > > > > 1. This is known to break crashkernel support, as the memory used by > > > > > > > the crashkernel is not marked reserved in DT (yet), > > > > > > > 2. Russell won't apply this for v5.9 anyway, > > > > > > > > > > > > > > > > > > > Would it help if we make this behavior dependent on a simple heuristic, e.g., > > > > > > > > > > > > if (round_up(load_address, 128M) >= dram_end) > > > > > > use dram_start from DT > > > > > > else > > > > > > use round_up(load_address, 128M) > > > > > > > > > > > > That way, the fix is guaranteed to only take effect for systems that > > > > > > cannot even boot otherwise, which fixes the crashkernel case, as well > > > > > > as other potential regressions due to the load address of the core > > > > > > kernel changing for existing boards. > > > > > > > > > > Thanks for your suggestion! > > > > > 1. Shouldn't the calculation use round_down() instead of round_up()? > > > > > 2. Likewise, "round_down(load_address, 128M) < dram_start from DT"? > > > > > > > > > > > > > No. > > > > > > > > What the code does today is round *up* to a multiple of 128 MB, and > > > > only when that leads to a problem, we should use the DT provided > > > > memory regions. > > > > > > mov r4, pc > > > and r4, r4, #0xf8000000 > > > > > > Surely this is rounding down, isn't it? > > > > > > > Yes you are right. > > > > > add r4, r4, #TEXT_OFFSET > > > > > > Followed by adding a small number (typically 0x00008000). > > > > > > On RZA2MEVB with 64 MiB of RAM, the result lies below dram_start. > > > > Yes, but in the general case, this is not true. Platforms that manage > > to boot using the current arrangement will do so by putting the > > decompressor above the first 128 MB aligned boundary covered by DRAM > > (and lose access to any memory below it via the linear mapping, but > > this memory could still be used via a no-map reserved-memory node > > AFAIK.) > > > > > BTW, how to obtain dram_end? From DT again? Do we trust it, as we > > > apparently cannot trust dram_start in some configurations. > > > > > > Do I need more coffee? > > > > > > > Maybe we both do :-) > > > > AIUI, the reason we cannot trust dram_start is because of the > > crashkernel case, i.e., the kernel may have deliberately been put high > > up in memory, and the expectation is that the load address is derived > > by rounding down the load address of the decompressor. > > > > Hence my suggestion to round *up* and compare with dram_end: if > > round_up(load_address, 128M) >= dram_end holds, it is guaranteed that > > no address exists in memory from which we could round down and arrive > > at a valid DRAM address. This would mean that your change will only > > affect platforms that were unable to boot to begin with, and not > > affect any other weird configurations including crashkernels etc > > Uhm maybe not ... > > Time to get that coffee... OK so we know that the memory base should be a 16 MB aligned value >= dram_start. This holds for the crashkernel as well, although in that case, the memory base is much higher than dram_start, and not right above it. So how about if we *only* use the DT dram_start as the memory base if it is aligned to 16 MB, and if rounding down from the load_address produces an address that is below it? That should cover your use case, but very conservatively, reducing the likelihood of regressions.
On Mon, Jul 6, 2020 at 5:03 PM Geert Uytterhoeven <geert+renesas@glider.be> wrote: > Currently, the start address of physical memory is obtained by masking > the program counter with a fixed mask of 0xf8000000. This mask value > was chosen as a balance between the requirements of different platforms. > However, this does require that the start address of physical memory is > a multiple of 128 MiB, precluding booting Linux on platforms where this > requirement is not fulfilled. > > Fix this limitation by obtaining the start address from the DTB instead, > if available (either explicitly passed, or appended to the kernel). > Fall back to the traditional method when needed. > > This allows to boot Linux on r7s9210/rza2mevb using the 64 MiB of SDRAM > on the RZA2MEVB sub board, which is located at 0x0C000000 (CS3 space), > i.e. not at a multiple of 128 MiB. > > Suggested-by: Nicolas Pitre <nico@fluxnic.net> > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> > Reviewed-by: Nicolas Pitre <nico@fluxnic.net> > Reviewed-by: Ard Biesheuvel <ardb@kernel.org> > Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > Tested-by: Dmitry Osipenko <digetx@gmail.com> > Cc: Lukasz Stelmach <l.stelmach@samsung.com> I tried to test this on the APQ8060 Qualcomm board. This is an odd beast, because physical memory starts at 0x40200000 which is 8MiB aligned, not even 16 MiB. Oddly this *works* with the mainline kernel, giving the following boot crawl: [ 0.000000] cma: Reserved 256 MiB at 0x50000000 [ 0.000000] Zone ranges: [ 0.000000] Normal [mem 0x0000000040200000-0x000000005fffffff] [ 0.000000] HighMem empty [ 0.000000] Movable zone start for each node [ 0.000000] Early memory node ranges [ 0.000000] node 0: [mem 0x0000000040200000-0x0000000042dfffff] [ 0.000000] node 0: [mem 0x0000000048000000-0x000000005fffffff] [ 0.000000] Initmem setup node 0 [mem 0x0000000040200000-0x000000005fffffff] It crashes so hard with this patch that I don't even get earlydebug messages. (Scary!) I also tried to simply load the kernel to 0x50000000 which solved an issue I had with KASan in the past, but it doesn't help. The first memblock is at 0x40200000 after all. Any hints at what may be going wrong here? No panic though - I know this platform is a stress test, but it'd be nice not to regress it. Yours, Linus Walleij
On Mon, Jul 20, 2020 at 11:45 AM Linus Walleij <linus.walleij@linaro.org> wrote: > > On Mon, Jul 6, 2020 at 5:03 PM Geert Uytterhoeven > > I tried to test this on the APQ8060 Qualcomm board. This is an odd beast, > because physical memory starts at 0x40200000 which is 8MiB aligned, > not even 16 MiB. Oddly this *works* with the mainline kernel, giving > the following boot crawl: > > [ 0.000000] cma: Reserved 256 MiB at 0x50000000 > [ 0.000000] Zone ranges: > [ 0.000000] Normal [mem 0x0000000040200000-0x000000005fffffff] > [ 0.000000] HighMem empty > [ 0.000000] Movable zone start for each node > [ 0.000000] Early memory node ranges > [ 0.000000] node 0: [mem 0x0000000040200000-0x0000000042dfffff] > [ 0.000000] node 0: [mem 0x0000000048000000-0x000000005fffffff] > [ 0.000000] Initmem setup node 0 [mem 0x0000000040200000-0x000000005fffffff] > > It crashes so hard with this patch that I don't even get earlydebug > messages. (Scary!) > > I also tried to simply load the kernel to 0x50000000 which solved > an issue I had with KASan in the past, but it doesn't help. The > first memblock is at 0x40200000 after all. > > Any hints at what may be going wrong here? > > No panic though - I know this platform is a stress test, but it'd be > nice not to regress it. No idea what /exactly/ is going wrong, but I would point out that this is one of the platforms that is handled as a special case in the Makefile when setting TEXT_OFFSET: # Text offset. This list is sorted numerically by address in order to # provide a means to avoid/resolve conflicts in multi-arch kernels. textofs-y := 0x00008000 # We don't want the htc bootloader to corrupt kernel during resume textofs-$(CONFIG_PM_H1940) := 0x00108000 # RTD1195 has Boot ROM at start of address space textofs-$(CONFIG_ARCH_REALTEK) := 0x00108000 # SA1111 DMA bug: we don't want the kernel to live in precious DMA-able memory ifeq ($(CONFIG_ARCH_SA1100),y) textofs-$(CONFIG_SA1111) := 0x00208000 endif textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000 textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000 textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000 textofs-$(CONFIG_ARCH_MESON) := 0x00208000 textofs-$(CONFIG_ARCH_AXXIA) := 0x00308000 TEXT_OFFSET := $(textofs-y) Arnd
On Mon, Jul 20, 2020 at 11:53 AM Arnd Bergmann <arnd@arndb.de> wrote: > No idea what /exactly/ is going wrong, but I would point out that this is one > of the platforms that is handled as a special case in the Makefile when > setting TEXT_OFFSET: (...) > textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000 > textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000 > textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000 But what on earth is this? I just deleted this and the platform boots just as well. It was originally added by Stephen in commit 9e775ad19f52d70a53797b4d0eb740c52b0a9567 "ARM: 7012/1: Set proper TEXT_OFFSET for newer MSMs" to patch around memblocks in the board files in mach-msm/* These boardfile hacks that seem to relate to this textofs are now *GONE* but this is still here! Laura, Stephen, Bjorn: can't we just delete these QCOM textofs things so as to clean out some confusion? Or is my APQ8060 odd once again and the rest of the world crashes if we remove this? Yours, Linus Walleij
Quoting Linus Walleij (2020-07-21 05:58:59) > On Mon, Jul 20, 2020 at 11:53 AM Arnd Bergmann <arnd@arndb.de> wrote: > > > No idea what /exactly/ is going wrong, but I would point out that this is one > > of the platforms that is handled as a special case in the Makefile when > > setting TEXT_OFFSET: > (...) > > textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000 > > textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000 > > textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000 > > But what on earth is this? I just deleted this and the platform > boots just as well. We need to shift the kernel text to start 2MB beyond the start of memory because there is the shared memory region used to communicate with other processors in the SoC there. It took a while for us to convince other OS folks in the company to put shared memory somewhere else besides the start of RAM, but eventually we won that battle. Does your booted kernel have its text section at the start of RAM or is it offset by 2MB without this change? Check out /proc/iomem to see where the kernel text is in relation to the start of RAM. I think the problem is the decompressor would have to parse the reserved memory sections in DT to figure out that it shouldn't decompress over shared memory, and changing the decompressor to do that was deemed "hard". Does this patch series resolve that? > > It was originally added by Stephen in > commit 9e775ad19f52d70a53797b4d0eb740c52b0a9567 That was almost a decade ago! Don't remind me of these things ;-) > "ARM: 7012/1: Set proper TEXT_OFFSET for newer MSMs" > to patch around memblocks in the board files in > mach-msm/* These boardfile hacks that seem to relate to this > textofs are now *GONE* but this is still here! > > Laura, Stephen, Bjorn: can't we just delete these QCOM > textofs things so as to clean out some confusion? > > Or is my APQ8060 odd once again and the rest of the world > crashes if we remove this? >
Hi Stephen, On Thu, Jul 23, 2020 at 3:19 AM Stephen Boyd <sboyd@kernel.org> wrote: > Quoting Linus Walleij (2020-07-21 05:58:59) > > On Mon, Jul 20, 2020 at 11:53 AM Arnd Bergmann <arnd@arndb.de> wrote: > > > No idea what /exactly/ is going wrong, but I would point out that this is one > > > of the platforms that is handled as a special case in the Makefile when > > > setting TEXT_OFFSET: > > (...) > > > textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000 > > > textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000 > > > textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000 > > > > But what on earth is this? I just deleted this and the platform > > boots just as well. > > We need to shift the kernel text to start 2MB beyond the start of memory > because there is the shared memory region used to communicate with other > processors in the SoC there. It took a while for us to convince other OS > folks in the company to put shared memory somewhere else besides the > start of RAM, but eventually we won that battle. > > Does your booted kernel have its text section at the start of RAM or is > it offset by 2MB without this change? Check out /proc/iomem to see where > the kernel text is in relation to the start of RAM. I think the problem > is the decompressor would have to parse the reserved memory sections in > DT to figure out that it shouldn't decompress over shared memory, and > changing the decompressor to do that was deemed "hard". Does this patch > series resolve that? As this patch adds C code to extract the start of memory from DT, it should be quite easy to add code to filter out regions marked reserved in DT. In fact that would be a prerequisite for making this work with crashkernel support (+ making the crashkernel code mark its memory as reserved in DT). Gr{oetje,eeting}s, Geert
Quoting Geert Uytterhoeven (2020-08-03 03:18:08) > Hi Stephen, > > On Thu, Jul 23, 2020 at 3:19 AM Stephen Boyd <sboyd@kernel.org> wrote: > > Quoting Linus Walleij (2020-07-21 05:58:59) > > > On Mon, Jul 20, 2020 at 11:53 AM Arnd Bergmann <arnd@arndb.de> wrote: > > > > No idea what /exactly/ is going wrong, but I would point out that this is one > > > > of the platforms that is handled as a special case in the Makefile when > > > > setting TEXT_OFFSET: > > > (...) > > > > textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000 > > > > textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000 > > > > textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000 > > > > > > But what on earth is this? I just deleted this and the platform > > > boots just as well. > > > > We need to shift the kernel text to start 2MB beyond the start of memory > > because there is the shared memory region used to communicate with other > > processors in the SoC there. It took a while for us to convince other OS > > folks in the company to put shared memory somewhere else besides the > > start of RAM, but eventually we won that battle. > > > > Does your booted kernel have its text section at the start of RAM or is > > it offset by 2MB without this change? Check out /proc/iomem to see where > > the kernel text is in relation to the start of RAM. I think the problem > > is the decompressor would have to parse the reserved memory sections in > > DT to figure out that it shouldn't decompress over shared memory, and > > changing the decompressor to do that was deemed "hard". Does this patch > > series resolve that? > > As this patch adds C code to extract the start of memory from DT, it > should be quite easy to add code to filter out regions marked reserved > in DT. In fact that would be a prerequisite for making this work with > crashkernel support (+ making the crashkernel code mark its memory as > reserved in DT). > Cool. Sounds like that may need to be done then before it works on these particular qcom platforms.
On Thu, Jul 23, 2020 at 3:19 AM Stephen Boyd <sboyd@kernel.org> wrote: > > > textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000 > > > textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000 > > > textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000 > > > > But what on earth is this? I just deleted this and the platform > > boots just as well. > > We need to shift the kernel text to start 2MB beyond the start of memory > because there is the shared memory region used to communicate with other > processors in the SoC there. It took a while for us to convince other OS > folks in the company to put shared memory somewhere else besides the > start of RAM, but eventually we won that battle. > > Does your booted kernel have its text section at the start of RAM or is > it offset by 2MB without this change? Check out /proc/iomem to see where > the kernel text is in relation to the start of RAM. The memory on this machine starts at 0x40200000 since the effect of the current code is to take pc &= 0xf8000000 and that results in 0x40000000 and then this adds textofs 0x00208000 to that resulting in 0x40208000 for the kernel physical RAM. Which is what we want to achieve since the RAM starts at 0x40200000. But TEXT_OFFSET is also used inside the kernel to offset the virtual memory. This means that when we set up the virtual memory split, the kernel virtual memory is also bumped by these 2 MB so the virtual memory starts at 0xC0208000 instead of 0xC0008000 as is normal. It looks weird to me but maybe someone can explain how logical that is? Yours, Linus Walleij
On Fri, 14 Aug 2020 at 16:03, Linus Walleij <linus.walleij@linaro.org> wrote: > > On Thu, Jul 23, 2020 at 3:19 AM Stephen Boyd <sboyd@kernel.org> wrote: > > > > > textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000 > > > > textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000 > > > > textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000 > > > > > > But what on earth is this? I just deleted this and the platform > > > boots just as well. > > > > We need to shift the kernel text to start 2MB beyond the start of memory > > because there is the shared memory region used to communicate with other > > processors in the SoC there. It took a while for us to convince other OS > > folks in the company to put shared memory somewhere else besides the > > start of RAM, but eventually we won that battle. > > > > Does your booted kernel have its text section at the start of RAM or is > > it offset by 2MB without this change? Check out /proc/iomem to see where > > the kernel text is in relation to the start of RAM. > > The memory on this machine starts at 0x40200000 since the effect > of the current code is to take pc &= 0xf8000000 and that results in > 0x40000000 and then this adds textofs 0x00208000 to that > resulting in 0x40208000 for the kernel physical RAM. Which > is what we want to achieve since the RAM starts at > 0x40200000. > > But TEXT_OFFSET is also used inside the kernel to offset the > virtual memory. This means that when we set up the virtual > memory split, the kernel virtual memory is also bumped by > these 2 MB so the virtual memory starts at 0xC0208000 > instead of 0xC0008000 as is normal. > > It looks weird to me but maybe someone can explain how > logical that is? > The ARM mm code assumes that the relative alignment between PA and VA is 16 MB, so if we skip 2 MB in the physical space, we must do the same in the virtual space.
Quoting Linus Walleij (2020-08-14 07:03:41) > On Thu, Jul 23, 2020 at 3:19 AM Stephen Boyd <sboyd@kernel.org> wrote: > > > > > textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000 > > > > textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000 > > > > textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000 > > > > > > But what on earth is this? I just deleted this and the platform > > > boots just as well. > > > > We need to shift the kernel text to start 2MB beyond the start of memory > > because there is the shared memory region used to communicate with other > > processors in the SoC there. It took a while for us to convince other OS > > folks in the company to put shared memory somewhere else besides the > > start of RAM, but eventually we won that battle. > > > > Does your booted kernel have its text section at the start of RAM or is > > it offset by 2MB without this change? Check out /proc/iomem to see where > > the kernel text is in relation to the start of RAM. > > The memory on this machine starts at 0x40200000 since the effect > of the current code is to take pc &= 0xf8000000 and that results in > 0x40000000 and then this adds textofs 0x00208000 to that > resulting in 0x40208000 for the kernel physical RAM. Which > is what we want to achieve since the RAM starts at > 0x40200000. The bootloader is telling the kernel that memory starts at 0x40200000 but in reality RAM or DDR starts at 0x40000000 and the first 2MB are reserved for shared memory. In the old days the bootloader would remove the shared memory region from the memory layout and update ATAGs to indicate that memory started at 0x40200000. > > But TEXT_OFFSET is also used inside the kernel to offset the > virtual memory. This means that when we set up the virtual > memory split, the kernel virtual memory is also bumped by > these 2 MB so the virtual memory starts at 0xC0208000 > instead of 0xC0008000 as is normal. > > It looks weird to me but maybe someone can explain how > logical that is? Yes, that's intentional. I believe that's because it will map the first 2MB of memmory otherwise with the wrong attributes. The kernel needs to map shared memory as non-cacheable or something like that so that communication to the modem isn't going through the cache and needing constant cleaning. Hope it helps! If not, we can probably dig up mailing list discussions on this.
On Fri, Aug 14, 2020 at 04:03:41PM +0200, Linus Walleij wrote: > But TEXT_OFFSET is also used inside the kernel to offset the > virtual memory. This means that when we set up the virtual > memory split, the kernel virtual memory is also bumped by > these 2 MB so the virtual memory starts at 0xC0208000 > instead of 0xC0008000 as is normal. No. Virtual memory starts without the 32KiB offset. The L1 swapper page table starts 16KiB below (or slightly more if LPAE) for example.
On Fri, Aug 14, 2020 at 04:06:03PM +0200, Ard Biesheuvel wrote: > On Fri, 14 Aug 2020 at 16:03, Linus Walleij <linus.walleij@linaro.org> wrote: > > > > On Thu, Jul 23, 2020 at 3:19 AM Stephen Boyd <sboyd@kernel.org> wrote: > > > > > > > textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000 > > > > > textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000 > > > > > textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000 > > > > > > > > But what on earth is this? I just deleted this and the platform > > > > boots just as well. > > > > > > We need to shift the kernel text to start 2MB beyond the start of memory > > > because there is the shared memory region used to communicate with other > > > processors in the SoC there. It took a while for us to convince other OS > > > folks in the company to put shared memory somewhere else besides the > > > start of RAM, but eventually we won that battle. > > > > > > Does your booted kernel have its text section at the start of RAM or is > > > it offset by 2MB without this change? Check out /proc/iomem to see where > > > the kernel text is in relation to the start of RAM. > > > > The memory on this machine starts at 0x40200000 since the effect > > of the current code is to take pc &= 0xf8000000 and that results in > > 0x40000000 and then this adds textofs 0x00208000 to that > > resulting in 0x40208000 for the kernel physical RAM. Which > > is what we want to achieve since the RAM starts at > > 0x40200000. > > > > But TEXT_OFFSET is also used inside the kernel to offset the > > virtual memory. This means that when we set up the virtual > > memory split, the kernel virtual memory is also bumped by > > these 2 MB so the virtual memory starts at 0xC0208000 > > instead of 0xC0008000 as is normal. > > > > It looks weird to me but maybe someone can explain how > > logical that is? > > > > The ARM mm code assumes that the relative alignment between PA and VA > is 16 MB, so if we skip 2 MB in the physical space, we must do the > same in the virtual space. That is a good thing; it makes the virtual to physical translations easy - we only have facility for offsets with bits 23..0 clear to make the code rewriting simple.
On Sat, Aug 15, 2020 at 11:16 AM Russell King - ARM Linux admin <linux@armlinux.org.uk> wrote: > On Fri, Aug 14, 2020 at 04:03:41PM +0200, Linus Walleij wrote: > > But TEXT_OFFSET is also used inside the kernel to offset the > > virtual memory. This means that when we set up the virtual > > memory split, the kernel virtual memory is also bumped by > > these 2 MB so the virtual memory starts at 0xC0208000 > > instead of 0xC0008000 as is normal. > > No. Virtual memory starts without the 32KiB offset. The L1 swapper > page table starts 16KiB below (or slightly more if LPAE) for example. Thanks Russell, I am struggling to learn these things. Slow learner. I'm reading through the assembler line-by-line and trying to learn things from the ground up now, let's see if I manage to get these things eventually. Best regards, Linus Walleij
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index b1147b7f2c8d372e..b1c09faf276e9193 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -81,10 +81,13 @@ libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y) OBJS += $(libfdt_objs) atags_to_fdt.o endif +ifeq ($(CONFIG_USE_OF),y) +OBJS += $(libfdt_objs) fdt_get_mem_start.o +endif # -fstack-protector-strong triggers protection checks in this code, # but it is being used too early to link to meaningful stack_chk logic. -$(foreach o, $(libfdt_objs) atags_to_fdt.o, \ +$(foreach o, $(libfdt_objs) atags_to_fdt.o fdt_get_mem_start.o, \ $(eval CFLAGS_$(o) := -I $(srctree)/scripts/dtc/libfdt -fno-stack-protector)) # These were previously generated C files. When you are building the kernel diff --git a/arch/arm/boot/compressed/fdt_get_mem_start.c b/arch/arm/boot/compressed/fdt_get_mem_start.c new file mode 100644 index 0000000000000000..ae71fde731b869d7 --- /dev/null +++ b/arch/arm/boot/compressed/fdt_get_mem_start.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/kernel.h> +#include <linux/libfdt.h> +#include <linux/sizes.h> + +static const void *getprop(const void *fdt, const char *node_path, + const char *property) +{ + int offset = fdt_path_offset(fdt, node_path); + + if (offset == -FDT_ERR_NOTFOUND) + return NULL; + + return fdt_getprop(fdt, offset, property, NULL); +} + +static uint32_t get_addr_size(const void *fdt) +{ + const __be32 *addr_len = getprop(fdt, "/", "#address-cells"); + + if (!addr_len) { + /* default */ + return 1; + } + + return fdt32_to_cpu(*addr_len); +} + +/* + * Get the start of physical memory + */ + +unsigned long fdt_get_mem_start(const void *fdt) +{ + uint32_t addr_size, mem_start; + const __be32 *memory; + + if (!fdt) + return -1; + + if (*(__be32 *)fdt != cpu_to_fdt32(FDT_MAGIC)) + return -1; + + /* Find the first memory node */ + memory = getprop(fdt, "/memory", "reg"); + if (!memory) + return -1; + + /* There may be multiple cells on LPAE platforms */ + addr_size = get_addr_size(fdt); + + mem_start = fdt32_to_cpu(memory[addr_size - 1]); + /* Must be a multiple of 16 MiB for phys/virt patching */ + return round_up(mem_start, SZ_16M); +} diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 434a16982e344fe4..802621756ac0480b 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -254,8 +254,56 @@ not_angel: .text #ifdef CONFIG_AUTO_ZRELADDR +#ifdef CONFIG_USE_OF /* - * Find the start of physical memory. As we are executing + * Find the start of physical memory. + * Try the DTB first, if available. + */ + adr r0, LC1 + ldr sp, [r0] @ get stack location + add sp, sp, r0 @ apply relocation + +#ifdef CONFIG_ARM_APPENDED_DTB + /* + * Look for an appended DTB. If found, use it and + * move stack away from it. + */ + ldr r6, [r0, #4] @ get &_edata + add r6, r6, r0 @ relocate it + ldmia r6, {r0, r5} @ get DTB signature and size +#ifndef __ARMEB__ + ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian + /* convert DTB size to little endian */ + eor r2, r5, r5, ror #16 + bic r2, r2, #0x00ff0000 + mov r5, r5, ror #8 + eor r5, r5, r2, lsr #8 +#else + ldr r1, =0xd00dfeed +#endif + cmp r0, r1 @ do we have a DTB there? + bne 1f + + /* preserve 64-bit alignment */ + add r5, r5, #7 + bic r5, r5, #7 + add sp, sp, r5 @ if so, move stack above DTB + mov r0, r6 @ and extract memory start from DTB + b 2f + +1: +#endif /* CONFIG_ARM_APPENDED_DTB */ + + mov r0, r8 +2: + bl fdt_get_mem_start + mov r4, r0 + cmp r0, #-1 + bne 1f +#endif /* CONFIG_USE_OF */ + + /* + * Fall back to the traditional method. As we are executing * without the MMU on, we are in the physical address space. * We just need to get rid of any offset by aligning the * address. @@ -273,6 +321,8 @@ not_angel: */ mov r4, pc and r4, r4, #0xf8000000 + +1: /* Determine final kernel image address. */ add r4, r4, #TEXT_OFFSET #else