Message ID | 20201012142410.308002-1-mick@ics.forth.gr (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | RISC-V: Add kernel image sections to the resource tree | expand |
On Mon, 12 Oct 2020 07:24:10 PDT (-0700), mick@ics.forth.gr wrote: > This patch (previously part of my kexec/kdump series) populates > /proc/iomem with the various sections of the kernel image. We need > this for kexec-tools to be able to prepare the crashkernel image > for kdump to work. Since resource tree initialization is not > related to memory initialization I added the code to kernel/setup.c > and removed the original code (derived from the arm64 tree) from > mm/init.c. > > Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> > --- > arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++ > arch/riscv/mm/init.c | 27 ------- > 2 files changed, 160 insertions(+), 27 deletions(-) > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > index 2c6dd3293..450f0142f 100644 > --- a/arch/riscv/kernel/setup.c > +++ b/arch/riscv/kernel/setup.c > @@ -4,6 +4,8 @@ > * Chen Liqin <liqin.chen@sunplusct.com> > * Lennox Wu <lennox.wu@sunplusct.com> > * Copyright (C) 2012 Regents of the University of California > + * Copyright (C) 2020 FORTH-ICS/CARV > + * Nick Kossifidis <mick@ics.forth.gr> > */ > > #include <linux/init.h> > @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata); > unsigned long boot_cpu_hartid; > static DEFINE_PER_CPU(struct cpu, cpu_devices); > > +/* > + * Place kernel memory regions on the resource tree so that > + * kexec-tools can retrieve them from /proc/iomem. While there > + * also add "System RAM" regions for compatibility with other > + * archs, and the rest of the known regions for completeness. > + */ > +static struct resource code_res = { .name = "Kernel code", }; > +static struct resource data_res = { .name = "Kernel data", }; > +static struct resource rodata_res = { .name = "Kernel rodata", }; > +static struct resource bss_res = { .name = "Kernel bss", }; > + > +static int __init add_resource(struct resource *parent, > + struct resource *res) > +{ > + int ret = 0; > + > + ret = insert_resource(parent, res); > + if (ret < 0) { > + pr_err("Failed to add a %s resource at %llx\n", > + res->name, (unsigned long long) res->start); > + return ret; > + } > + > + return 1; > +} > + > +static int __init add_kernel_resources(struct resource *res) > +{ > + int ret = 0; > + > + /* > + * The memory region of the kernel image is continuous and > + * was reserved on setup_bootmem, find it here and register > + * it as a resource, then register the various segments of > + * the image as child nodes > + */ > + if (!(res->start <= code_res.start && res->end >= data_res.end)) > + return 0; > + > + res->name = "Kernel image"; > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > + > + /* > + * We removed a part of this region on setup_bootmem so > + * we need to expand the resource for the bss to fit in. > + */ > + res->end = bss_res.end; > + > + ret = add_resource(&iomem_resource, res); > + if (ret < 0) > + return ret; > + > + ret = add_resource(res, &code_res); > + if (ret < 0) > + return ret; > + > + ret = add_resource(res, &rodata_res); > + if (ret < 0) > + return ret; > + > + ret = add_resource(res, &data_res); > + if (ret < 0) > + return ret; > + > + ret = add_resource(res, &bss_res); > + > + return ret; > +} > + > +static void __init init_resources(void) > +{ > + struct memblock_region *region = NULL; > + struct resource *res = NULL; > + int ret = 0; > + > + code_res.start = __pa_symbol(_text); > + code_res.end = __pa_symbol(_etext) - 1; > + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > + > + rodata_res.start = __pa_symbol(__start_rodata); > + rodata_res.end = __pa_symbol(__end_rodata) - 1; > + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > + > + data_res.start = __pa_symbol(_data); > + data_res.end = __pa_symbol(_edata) - 1; > + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > + > + bss_res.start = __pa_symbol(__bss_start); > + bss_res.end = __pa_symbol(__bss_stop) - 1; > + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > + > + /* > + * Start by adding the reserved regions, if they overlap > + * with /memory regions, insert_resource later on will take > + * care of it. > + */ > + for_each_memblock(reserved, region) { > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > + if (!res) > + panic("%s: Failed to allocate %zu bytes\n", __func__, > + sizeof(struct resource)); > + > + res->name = "Reserved"; > + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > + res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); > + res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; > + > + ret = add_kernel_resources(res); > + if (ret < 0) > + goto error; > + else if (ret) > + continue; > + > + /* > + * Ignore any other reserved regions within > + * system memory. > + */ > + if (memblock_is_memory(res->start)) > + continue; > + > + ret = add_resource(&iomem_resource, res); > + if (ret < 0) > + goto error; > + } > + > + /* Add /memory regions to the resource tree */ > + for_each_memblock(memory, region) { > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > + if (!res) > + panic("%s: Failed to allocate %zu bytes\n", __func__, > + sizeof(struct resource)); > + > + if (unlikely(memblock_is_nomap(region))) { > + res->name = "Reserved"; > + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > + } else { > + res->name = "System RAM"; > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > + } > + > + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); > + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; > + > + ret = add_resource(&iomem_resource, res); > + if (ret < 0) > + goto error; > + } > + > + return; > + > + error: > + memblock_free((phys_addr_t) res, sizeof(struct resource)); > + /* Better an empty resource tree than an inconsistent one */ > + release_child_resources(&iomem_resource); > +} > + > + > void __init parse_dtb(void) > { > if (early_init_dt_scan(dtb_early_va)) > @@ -73,6 +232,7 @@ void __init setup_arch(char **cmdline_p) > > setup_bootmem(); > paging_init(); > + init_resources(); > #if IS_ENABLED(CONFIG_BUILTIN_DTB) > unflatten_and_copy_device_tree(); > #else > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index f750e012d..05d6cf0c2 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -541,39 +541,12 @@ void mark_rodata_ro(void) > } > #endif > > -static void __init resource_init(void) > -{ > - struct memblock_region *region; > - > - for_each_memblock(memory, region) { > - struct resource *res; > - > - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > - if (!res) > - panic("%s: Failed to allocate %zu bytes\n", __func__, > - sizeof(struct resource)); > - > - if (memblock_is_nomap(region)) { > - res->name = "reserved"; > - res->flags = IORESOURCE_MEM; > - } else { > - res->name = "System RAM"; > - res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > - } > - res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); > - res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; > - > - request_resource(&iomem_resource, res); > - } > -} > - > void __init paging_init(void) > { > setup_vm_final(); > sparse_init(); > setup_zero_page(); > zone_sizes_init(); > - resource_init(); > } > > #ifdef CONFIG_SPARSEMEM_VMEMMAP Thanks, this is on for-next. I had to fix up a few things, LMK if I screwed anything up.
On Thu, Nov 5, 2020 at 10:37 AM Palmer Dabbelt <palmer@dabbelt.com> wrote: > > On Mon, 12 Oct 2020 07:24:10 PDT (-0700), mick@ics.forth.gr wrote: > > This patch (previously part of my kexec/kdump series) populates > > /proc/iomem with the various sections of the kernel image. We need > > this for kexec-tools to be able to prepare the crashkernel image > > for kdump to work. Since resource tree initialization is not > > related to memory initialization I added the code to kernel/setup.c > > and removed the original code (derived from the arm64 tree) from > > mm/init.c. > > > > Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> > > --- > > arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++ > > arch/riscv/mm/init.c | 27 ------- > > 2 files changed, 160 insertions(+), 27 deletions(-) > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > > index 2c6dd3293..450f0142f 100644 > > --- a/arch/riscv/kernel/setup.c > > +++ b/arch/riscv/kernel/setup.c > > @@ -4,6 +4,8 @@ > > * Chen Liqin <liqin.chen@sunplusct.com> > > * Lennox Wu <lennox.wu@sunplusct.com> > > * Copyright (C) 2012 Regents of the University of California > > + * Copyright (C) 2020 FORTH-ICS/CARV > > + * Nick Kossifidis <mick@ics.forth.gr> > > */ > > > > #include <linux/init.h> > > @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata); > > unsigned long boot_cpu_hartid; > > static DEFINE_PER_CPU(struct cpu, cpu_devices); > > > > +/* > > + * Place kernel memory regions on the resource tree so that > > + * kexec-tools can retrieve them from /proc/iomem. While there > > + * also add "System RAM" regions for compatibility with other > > + * archs, and the rest of the known regions for completeness. > > + */ > > +static struct resource code_res = { .name = "Kernel code", }; > > +static struct resource data_res = { .name = "Kernel data", }; > > +static struct resource rodata_res = { .name = "Kernel rodata", }; > > +static struct resource bss_res = { .name = "Kernel bss", }; > > + > > +static int __init add_resource(struct resource *parent, > > + struct resource *res) > > +{ > > + int ret = 0; > > + > > + ret = insert_resource(parent, res); > > + if (ret < 0) { > > + pr_err("Failed to add a %s resource at %llx\n", > > + res->name, (unsigned long long) res->start); > > + return ret; > > + } > > + > > + return 1; > > +} > > + > > +static int __init add_kernel_resources(struct resource *res) > > +{ > > + int ret = 0; > > + > > + /* > > + * The memory region of the kernel image is continuous and > > + * was reserved on setup_bootmem, find it here and register > > + * it as a resource, then register the various segments of > > + * the image as child nodes > > + */ > > + if (!(res->start <= code_res.start && res->end >= data_res.end)) > > + return 0; > > + > > + res->name = "Kernel image"; > > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > + > > + /* > > + * We removed a part of this region on setup_bootmem so > > + * we need to expand the resource for the bss to fit in. > > + */ > > + res->end = bss_res.end; > > + > > + ret = add_resource(&iomem_resource, res); > > + if (ret < 0) > > + return ret; > > + > > + ret = add_resource(res, &code_res); > > + if (ret < 0) > > + return ret; > > + > > + ret = add_resource(res, &rodata_res); > > + if (ret < 0) > > + return ret; > > + > > + ret = add_resource(res, &data_res); > > + if (ret < 0) > > + return ret; > > + > > + ret = add_resource(res, &bss_res); > > + > > + return ret; > > +} > > + > > +static void __init init_resources(void) > > +{ > > + struct memblock_region *region = NULL; > > + struct resource *res = NULL; > > + int ret = 0; > > + > > + code_res.start = __pa_symbol(_text); > > + code_res.end = __pa_symbol(_etext) - 1; > > + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > + > > + rodata_res.start = __pa_symbol(__start_rodata); > > + rodata_res.end = __pa_symbol(__end_rodata) - 1; > > + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > + > > + data_res.start = __pa_symbol(_data); > > + data_res.end = __pa_symbol(_edata) - 1; > > + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > + > > + bss_res.start = __pa_symbol(__bss_start); > > + bss_res.end = __pa_symbol(__bss_stop) - 1; > > + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > + > > + /* > > + * Start by adding the reserved regions, if they overlap > > + * with /memory regions, insert_resource later on will take > > + * care of it. > > + */ > > + for_each_memblock(reserved, region) { > > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); Is there a specific reason to invoke memblock_alloc while iterating reserved regions ? memblock_alloc also adds calls memblock_reserve. So we are modifying the reserved region entries while iterating it. It resulted in below warning for rv32. [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 __insert_resource+0x8e/0xd0 [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-00022-ge20097fb37e2-dirty #549 [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff [ 0.000000] t5 : c1008e38 t6 : 00000001 [ 0.000000] status: 00000100 badaddr: 00000000 cause: 00000003 [ 0.000000] irq event stamp: 0 [ 0.000000] hardirqs last enabled at (0): [<00000000>] 0x0 [ 0.000000] hardirqs last disabled at (0): [<00000000>] 0x0 [ 0.000000] softirqs last enabled at (0): [<00000000>] 0x0 [ 0.000000] softirqs last disabled at (0): [<00000000>] 0x0 [ 0.000000] random: get_random_bytes called from __warn+0xd8/0x11e with crng_init=0 [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Failed to add a Kernel code resource at 80402000 Here is the fix that works: --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -127,7 +127,9 @@ static void __init init_resources(void) { struct memblock_region *region = NULL; struct resource *res = NULL; - int ret = 0; + int ret = 0, i = 0; + int num_mem_res; + struct resource *mem_res; code_res.start = __pa_symbol(_text); code_res.end = __pa_symbol(_etext) - 1; @@ -145,16 +147,17 @@ static void __init init_resources(void) bss_res.end = __pa_symbol(__bss_stop) - 1; bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + num_mem_res = memblock.memory.cnt + memblock.reserved.cnt; + mem_res = memblock_alloc(num_mem_res * sizeof(*mem_res), SMP_CACHE_BYTES); + if (!mem_res) + panic("%s: Failed to allocate %zu bytes\n", __func__, num_mem_res * sizeof(*mem_res)); /* * Start by adding the reserved regions, if they overlap * with /memory regions, insert_resource later on will take * care of it. */ for_each_reserved_mem_region(region) { - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + res = &mem_res[i++]; res->name = "Reserved"; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; @@ -181,11 +184,8 @@ static void __init init_resources(void) /* Add /memory regions to the resource tree */ for_each_mem_region(region) { - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + res = &mem_res[i++]; If this looks okay to you, I will send the patch. > > + if (!res) > > + panic("%s: Failed to allocate %zu bytes\n", __func__, > > + sizeof(struct resource)); > > + > > + res->name = "Reserved"; > > + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > > + res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); > > + res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; > > + > > + ret = add_kernel_resources(res); > > + if (ret < 0) > > + goto error; > > + else if (ret) > > + continue; > > + > > + /* > > + * Ignore any other reserved regions within > > + * system memory. > > + */ > > + if (memblock_is_memory(res->start)) > > + continue; > > + Shouldn't we free the res in this case ? The allocated memblock is useless as it is not going to be added to the resource tree. > > + ret = add_resource(&iomem_resource, res); > > + if (ret < 0) > > + goto error; > > + } > > + > > + /* Add /memory regions to the resource tree */ > > + for_each_memblock(memory, region) { > > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > > + if (!res) > > + panic("%s: Failed to allocate %zu bytes\n", __func__, > > + sizeof(struct resource)); > > + > > + if (unlikely(memblock_is_nomap(region))) { > > + res->name = "Reserved"; > > + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > > + } else { > > + res->name = "System RAM"; > > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > + } > > + > > + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); > > + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; > > + > > + ret = add_resource(&iomem_resource, res); > > + if (ret < 0) > > + goto error; > > + } > > + > > + return; > > + > > + error: > > + memblock_free((phys_addr_t) res, sizeof(struct resource)); > > + /* Better an empty resource tree than an inconsistent one */ > > + release_child_resources(&iomem_resource); > > +} > > + > > + > > void __init parse_dtb(void) > > { > > if (early_init_dt_scan(dtb_early_va)) > > @@ -73,6 +232,7 @@ void __init setup_arch(char **cmdline_p) > > > > setup_bootmem(); > > paging_init(); > > + init_resources(); > > #if IS_ENABLED(CONFIG_BUILTIN_DTB) > > unflatten_and_copy_device_tree(); > > #else > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > > index f750e012d..05d6cf0c2 100644 > > --- a/arch/riscv/mm/init.c > > +++ b/arch/riscv/mm/init.c > > @@ -541,39 +541,12 @@ void mark_rodata_ro(void) > > } > > #endif > > > > -static void __init resource_init(void) > > -{ > > - struct memblock_region *region; > > - > > - for_each_memblock(memory, region) { > > - struct resource *res; > > - > > - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > > - if (!res) > > - panic("%s: Failed to allocate %zu bytes\n", __func__, > > - sizeof(struct resource)); > > - > > - if (memblock_is_nomap(region)) { > > - res->name = "reserved"; > > - res->flags = IORESOURCE_MEM; > > - } else { > > - res->name = "System RAM"; > > - res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > - } > > - res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); > > - res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; > > - > > - request_resource(&iomem_resource, res); > > - } > > -} > > - > > void __init paging_init(void) > > { > > setup_vm_final(); > > sparse_init(); > > setup_zero_page(); > > zone_sizes_init(); > > - resource_init(); > > } > > > > #ifdef CONFIG_SPARSEMEM_VMEMMAP > > Thanks, this is on for-next. I had to fix up a few things, LMK if I screwed > anything up. > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Στις 2020-12-19 01:52, Atish Patra έγραψε: > On Thu, Nov 5, 2020 at 10:37 AM Palmer Dabbelt <palmer@dabbelt.com> > wrote: >> >> On Mon, 12 Oct 2020 07:24:10 PDT (-0700), mick@ics.forth.gr wrote: >> > This patch (previously part of my kexec/kdump series) populates >> > /proc/iomem with the various sections of the kernel image. We need >> > this for kexec-tools to be able to prepare the crashkernel image >> > for kdump to work. Since resource tree initialization is not >> > related to memory initialization I added the code to kernel/setup.c >> > and removed the original code (derived from the arm64 tree) from >> > mm/init.c. >> > >> > Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> >> > --- >> > arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++ >> > arch/riscv/mm/init.c | 27 ------- >> > 2 files changed, 160 insertions(+), 27 deletions(-) >> > >> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c >> > index 2c6dd3293..450f0142f 100644 >> > --- a/arch/riscv/kernel/setup.c >> > +++ b/arch/riscv/kernel/setup.c >> > @@ -4,6 +4,8 @@ >> > * Chen Liqin <liqin.chen@sunplusct.com> >> > * Lennox Wu <lennox.wu@sunplusct.com> >> > * Copyright (C) 2012 Regents of the University of California >> > + * Copyright (C) 2020 FORTH-ICS/CARV >> > + * Nick Kossifidis <mick@ics.forth.gr> >> > */ >> > >> > #include <linux/init.h> >> > @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata); >> > unsigned long boot_cpu_hartid; >> > static DEFINE_PER_CPU(struct cpu, cpu_devices); >> > >> > +/* >> > + * Place kernel memory regions on the resource tree so that >> > + * kexec-tools can retrieve them from /proc/iomem. While there >> > + * also add "System RAM" regions for compatibility with other >> > + * archs, and the rest of the known regions for completeness. >> > + */ >> > +static struct resource code_res = { .name = "Kernel code", }; >> > +static struct resource data_res = { .name = "Kernel data", }; >> > +static struct resource rodata_res = { .name = "Kernel rodata", }; >> > +static struct resource bss_res = { .name = "Kernel bss", }; >> > + >> > +static int __init add_resource(struct resource *parent, >> > + struct resource *res) >> > +{ >> > + int ret = 0; >> > + >> > + ret = insert_resource(parent, res); >> > + if (ret < 0) { >> > + pr_err("Failed to add a %s resource at %llx\n", >> > + res->name, (unsigned long long) res->start); >> > + return ret; >> > + } >> > + >> > + return 1; >> > +} >> > + >> > +static int __init add_kernel_resources(struct resource *res) >> > +{ >> > + int ret = 0; >> > + >> > + /* >> > + * The memory region of the kernel image is continuous and >> > + * was reserved on setup_bootmem, find it here and register >> > + * it as a resource, then register the various segments of >> > + * the image as child nodes >> > + */ >> > + if (!(res->start <= code_res.start && res->end >= data_res.end)) >> > + return 0; >> > + >> > + res->name = "Kernel image"; >> > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > + >> > + /* >> > + * We removed a part of this region on setup_bootmem so >> > + * we need to expand the resource for the bss to fit in. >> > + */ >> > + res->end = bss_res.end; >> > + >> > + ret = add_resource(&iomem_resource, res); >> > + if (ret < 0) >> > + return ret; >> > + >> > + ret = add_resource(res, &code_res); >> > + if (ret < 0) >> > + return ret; >> > + >> > + ret = add_resource(res, &rodata_res); >> > + if (ret < 0) >> > + return ret; >> > + >> > + ret = add_resource(res, &data_res); >> > + if (ret < 0) >> > + return ret; >> > + >> > + ret = add_resource(res, &bss_res); >> > + >> > + return ret; >> > +} >> > + >> > +static void __init init_resources(void) >> > +{ >> > + struct memblock_region *region = NULL; >> > + struct resource *res = NULL; >> > + int ret = 0; >> > + >> > + code_res.start = __pa_symbol(_text); >> > + code_res.end = __pa_symbol(_etext) - 1; >> > + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > + >> > + rodata_res.start = __pa_symbol(__start_rodata); >> > + rodata_res.end = __pa_symbol(__end_rodata) - 1; >> > + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > + >> > + data_res.start = __pa_symbol(_data); >> > + data_res.end = __pa_symbol(_edata) - 1; >> > + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > + >> > + bss_res.start = __pa_symbol(__bss_start); >> > + bss_res.end = __pa_symbol(__bss_stop) - 1; >> > + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > + >> > + /* >> > + * Start by adding the reserved regions, if they overlap >> > + * with /memory regions, insert_resource later on will take >> > + * care of it. >> > + */ >> > + for_each_memblock(reserved, region) { >> > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > > Is there a specific reason to invoke memblock_alloc while iterating > reserved regions ? > memblock_alloc also adds calls memblock_reserve. So we are modifying > the reserved region entries > while iterating it. It resulted in below warning for rv32. > > [ 0.000000] ------------[ cut here ]------------ > [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 > __insert_resource+0x8e/0xd0 > [ 0.000000] Modules linked in: > [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted > 5.10.0-00022-ge20097fb37e2-dirty #549 > [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 > [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 > [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 > [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 > [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 > [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 > [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 > [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c > [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 > [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff > [ 0.000000] t5 : c1008e38 t6 : 00000001 > [ 0.000000] status: 00000100 badaddr: 00000000 cause: 00000003 > [ 0.000000] irq event stamp: 0 > [ 0.000000] hardirqs last enabled at (0): [<00000000>] 0x0 > [ 0.000000] hardirqs last disabled at (0): [<00000000>] 0x0 > [ 0.000000] softirqs last enabled at (0): [<00000000>] 0x0 > [ 0.000000] softirqs last disabled at (0): [<00000000>] 0x0 > [ 0.000000] random: get_random_bytes called from __warn+0xd8/0x11e > with crng_init=0 > [ 0.000000] ---[ end trace 0000000000000000 ]--- > [ 0.000000] Failed to add a Kernel code resource at 80402000 > > Here is the fix that works: > --- a/arch/riscv/kernel/setup.c > +++ b/arch/riscv/kernel/setup.c > @@ -127,7 +127,9 @@ static void __init init_resources(void) > { > struct memblock_region *region = NULL; > struct resource *res = NULL; > - int ret = 0; > + int ret = 0, i = 0; > + int num_mem_res; > + struct resource *mem_res; > > code_res.start = __pa_symbol(_text); > code_res.end = __pa_symbol(_etext) - 1; > @@ -145,16 +147,17 @@ static void __init init_resources(void) > bss_res.end = __pa_symbol(__bss_stop) - 1; > bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > + num_mem_res = memblock.memory.cnt + memblock.reserved.cnt; > + mem_res = memblock_alloc(num_mem_res * sizeof(*mem_res), > SMP_CACHE_BYTES); > + if (!mem_res) > + panic("%s: Failed to allocate %zu bytes\n", __func__, > num_mem_res * sizeof(*mem_res)); > /* > * Start by adding the reserved regions, if they overlap > * with /memory regions, insert_resource later on will take > * care of it. > */ > for_each_reserved_mem_region(region) { > - res = memblock_alloc(sizeof(struct resource), > SMP_CACHE_BYTES); > - if (!res) > - panic("%s: Failed to allocate %zu bytes\n", > __func__, > - sizeof(struct resource)); > + res = &mem_res[i++]; > > res->name = "Reserved"; > res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > @@ -181,11 +184,8 @@ static void __init init_resources(void) > > /* Add /memory regions to the resource tree */ > for_each_mem_region(region) { > - res = memblock_alloc(sizeof(struct resource), > SMP_CACHE_BYTES); > - if (!res) > - panic("%s: Failed to allocate %zu bytes\n", > __func__, > - sizeof(struct resource)); > > + res = &mem_res[i++]; > > If this looks okay to you, I will send the patch. > The problem is that we don't want to include all reserved regions within /memory such as the device tree or initramfs, since they'll get modified and/or freed later on. So pre-allocating resources for all reserved regions doesn't seem the right thing to do. My goal here was to allocate a resource on each itteration and free it if not needed / on failure, I free the failed resource on failure but not if it's not needed as you noted. As for the issue of memblock_alloc() calling memblock_reserve(), we don't add any new reserved regions at this point directly through memblock_reserve(), and memblock_reserve() will run on neighboring regions that memblock_merge_regions() will later on merge together, so it'll work on a single growing region instead of creating new ones. The issue is that we call memblock_alloc() for the first time inside the loop and for_each_mem_region() macro uses pointer arithmetic on the initial array of reserved memblocks, we could use for_each_memblock_type() macro but it's not on memblock.h anymore OR we could call memblock_alloc() before the loop so that the array is not expanded while itterating. How about this, does it work for you ?: diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 1d85e9bf7..460cfddb7 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -127,8 +127,16 @@ static void __init init_resources(void) { struct memblock_region *region = NULL; struct resource *res = NULL; + struct resource *mem_resources = NULL; + size_t mem_resources_sz = 0; int ret = 0; + mem_resources_sz = memblock.memory.cnt * sizeof(struct resource); + mem_resources = memblock_alloc(mem_resources_sz, SMP_CACHE_BYTES); + if (!mem_resources) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct resource)); + code_res.start = __pa_symbol(_text); code_res.end = __pa_symbol(_etext) - 1; code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; @@ -171,20 +179,21 @@ static void __init init_resources(void) * Ignore any other reserved regions within * system memory. */ - if (memblock_is_memory(res->start)) + if (memblock_is_memory(res->start)) { + memblock_free((phys_addr_t) res, sizeof(struct resource)); continue; + } ret = add_resource(&iomem_resource, res); - if (ret < 0) + if (ret < 0) { + memblock_free((phys_addr_t) res, sizeof(struct resource)); goto error; + } } /* Add /memory regions to the resource tree */ for_each_mem_region(region) { - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + res = mem_resources++; if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; @@ -205,9 +214,9 @@ static void __init init_resources(void) return; error: - memblock_free((phys_addr_t) res, sizeof(struct resource)); /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); + memblock_free((phys_addr_t) mem_resources, mem_resources_sz); } Regards, Nick
On Tue, Dec 22, 2020 at 3:59 PM Nick Kossifidis <mick@ics.forth.gr> wrote: > > Στις 2020-12-19 01:52, Atish Patra έγραψε: > > On Thu, Nov 5, 2020 at 10:37 AM Palmer Dabbelt <palmer@dabbelt.com> > > wrote: > >> > >> On Mon, 12 Oct 2020 07:24:10 PDT (-0700), mick@ics.forth.gr wrote: > >> > This patch (previously part of my kexec/kdump series) populates > >> > /proc/iomem with the various sections of the kernel image. We need > >> > this for kexec-tools to be able to prepare the crashkernel image > >> > for kdump to work. Since resource tree initialization is not > >> > related to memory initialization I added the code to kernel/setup.c > >> > and removed the original code (derived from the arm64 tree) from > >> > mm/init.c. > >> > > >> > Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> > >> > --- > >> > arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++ > >> > arch/riscv/mm/init.c | 27 ------- > >> > 2 files changed, 160 insertions(+), 27 deletions(-) > >> > > >> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > >> > index 2c6dd3293..450f0142f 100644 > >> > --- a/arch/riscv/kernel/setup.c > >> > +++ b/arch/riscv/kernel/setup.c > >> > @@ -4,6 +4,8 @@ > >> > * Chen Liqin <liqin.chen@sunplusct.com> > >> > * Lennox Wu <lennox.wu@sunplusct.com> > >> > * Copyright (C) 2012 Regents of the University of California > >> > + * Copyright (C) 2020 FORTH-ICS/CARV > >> > + * Nick Kossifidis <mick@ics.forth.gr> > >> > */ > >> > > >> > #include <linux/init.h> > >> > @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata); > >> > unsigned long boot_cpu_hartid; > >> > static DEFINE_PER_CPU(struct cpu, cpu_devices); > >> > > >> > +/* > >> > + * Place kernel memory regions on the resource tree so that > >> > + * kexec-tools can retrieve them from /proc/iomem. While there > >> > + * also add "System RAM" regions for compatibility with other > >> > + * archs, and the rest of the known regions for completeness. > >> > + */ > >> > +static struct resource code_res = { .name = "Kernel code", }; > >> > +static struct resource data_res = { .name = "Kernel data", }; > >> > +static struct resource rodata_res = { .name = "Kernel rodata", }; > >> > +static struct resource bss_res = { .name = "Kernel bss", }; > >> > + > >> > +static int __init add_resource(struct resource *parent, > >> > + struct resource *res) > >> > +{ > >> > + int ret = 0; > >> > + > >> > + ret = insert_resource(parent, res); > >> > + if (ret < 0) { > >> > + pr_err("Failed to add a %s resource at %llx\n", > >> > + res->name, (unsigned long long) res->start); > >> > + return ret; > >> > + } > >> > + > >> > + return 1; > >> > +} > >> > + > >> > +static int __init add_kernel_resources(struct resource *res) > >> > +{ > >> > + int ret = 0; > >> > + > >> > + /* > >> > + * The memory region of the kernel image is continuous and > >> > + * was reserved on setup_bootmem, find it here and register > >> > + * it as a resource, then register the various segments of > >> > + * the image as child nodes > >> > + */ > >> > + if (!(res->start <= code_res.start && res->end >= data_res.end)) > >> > + return 0; > >> > + > >> > + res->name = "Kernel image"; > >> > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > + > >> > + /* > >> > + * We removed a part of this region on setup_bootmem so > >> > + * we need to expand the resource for the bss to fit in. > >> > + */ > >> > + res->end = bss_res.end; > >> > + > >> > + ret = add_resource(&iomem_resource, res); > >> > + if (ret < 0) > >> > + return ret; > >> > + > >> > + ret = add_resource(res, &code_res); > >> > + if (ret < 0) > >> > + return ret; > >> > + > >> > + ret = add_resource(res, &rodata_res); > >> > + if (ret < 0) > >> > + return ret; > >> > + > >> > + ret = add_resource(res, &data_res); > >> > + if (ret < 0) > >> > + return ret; > >> > + > >> > + ret = add_resource(res, &bss_res); > >> > + > >> > + return ret; > >> > +} > >> > + > >> > +static void __init init_resources(void) > >> > +{ > >> > + struct memblock_region *region = NULL; > >> > + struct resource *res = NULL; > >> > + int ret = 0; > >> > + > >> > + code_res.start = __pa_symbol(_text); > >> > + code_res.end = __pa_symbol(_etext) - 1; > >> > + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > + > >> > + rodata_res.start = __pa_symbol(__start_rodata); > >> > + rodata_res.end = __pa_symbol(__end_rodata) - 1; > >> > + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > + > >> > + data_res.start = __pa_symbol(_data); > >> > + data_res.end = __pa_symbol(_edata) - 1; > >> > + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > + > >> > + bss_res.start = __pa_symbol(__bss_start); > >> > + bss_res.end = __pa_symbol(__bss_stop) - 1; > >> > + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > + > >> > + /* > >> > + * Start by adding the reserved regions, if they overlap > >> > + * with /memory regions, insert_resource later on will take > >> > + * care of it. > >> > + */ > >> > + for_each_memblock(reserved, region) { > >> > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > > > > Is there a specific reason to invoke memblock_alloc while iterating > > reserved regions ? > > memblock_alloc also adds calls memblock_reserve. So we are modifying > > the reserved region entries > > while iterating it. It resulted in below warning for rv32. > > > > [ 0.000000] ------------[ cut here ]------------ > > [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 > > __insert_resource+0x8e/0xd0 > > [ 0.000000] Modules linked in: > > [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted > > 5.10.0-00022-ge20097fb37e2-dirty #549 > > [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 > > [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 > > [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 > > [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 > > [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 > > [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 > > [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 > > [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c > > [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 > > [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff > > [ 0.000000] t5 : c1008e38 t6 : 00000001 > > [ 0.000000] status: 00000100 badaddr: 00000000 cause: 00000003 > > [ 0.000000] irq event stamp: 0 > > [ 0.000000] hardirqs last enabled at (0): [<00000000>] 0x0 > > [ 0.000000] hardirqs last disabled at (0): [<00000000>] 0x0 > > [ 0.000000] softirqs last enabled at (0): [<00000000>] 0x0 > > [ 0.000000] softirqs last disabled at (0): [<00000000>] 0x0 > > [ 0.000000] random: get_random_bytes called from __warn+0xd8/0x11e > > with crng_init=0 > > [ 0.000000] ---[ end trace 0000000000000000 ]--- > > [ 0.000000] Failed to add a Kernel code resource at 80402000 > > > > Here is the fix that works: > > --- a/arch/riscv/kernel/setup.c > > +++ b/arch/riscv/kernel/setup.c > > @@ -127,7 +127,9 @@ static void __init init_resources(void) > > { > > struct memblock_region *region = NULL; > > struct resource *res = NULL; > > - int ret = 0; > > + int ret = 0, i = 0; > > + int num_mem_res; > > + struct resource *mem_res; > > > > code_res.start = __pa_symbol(_text); > > code_res.end = __pa_symbol(_etext) - 1; > > @@ -145,16 +147,17 @@ static void __init init_resources(void) > > bss_res.end = __pa_symbol(__bss_stop) - 1; > > bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > > > + num_mem_res = memblock.memory.cnt + memblock.reserved.cnt; > > + mem_res = memblock_alloc(num_mem_res * sizeof(*mem_res), > > SMP_CACHE_BYTES); > > + if (!mem_res) > > + panic("%s: Failed to allocate %zu bytes\n", __func__, > > num_mem_res * sizeof(*mem_res)); > > /* > > * Start by adding the reserved regions, if they overlap > > * with /memory regions, insert_resource later on will take > > * care of it. > > */ > > for_each_reserved_mem_region(region) { > > - res = memblock_alloc(sizeof(struct resource), > > SMP_CACHE_BYTES); > > - if (!res) > > - panic("%s: Failed to allocate %zu bytes\n", > > __func__, > > - sizeof(struct resource)); > > + res = &mem_res[i++]; > > > > res->name = "Reserved"; > > res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > > @@ -181,11 +184,8 @@ static void __init init_resources(void) > > > > /* Add /memory regions to the resource tree */ > > for_each_mem_region(region) { > > - res = memblock_alloc(sizeof(struct resource), > > SMP_CACHE_BYTES); > > - if (!res) > > - panic("%s: Failed to allocate %zu bytes\n", > > __func__, > > - sizeof(struct resource)); > > > > + res = &mem_res[i++]; > > > > If this looks okay to you, I will send the patch. > > > > The problem is that we don't want to include all reserved regions within > /memory such as the device tree or initramfs, since they'll get modified > and/or freed later on. So pre-allocating resources for all reserved > regions doesn't seem the right thing to do. My goal here was to allocate > a resource on each itteration and free it if not needed / on failure, I > free the failed resource on failure but not if it's not needed as you > noted. > > As for the issue of memblock_alloc() calling memblock_reserve(), we > don't add any new reserved regions at this point directly through > memblock_reserve(), and memblock_reserve() will run on neighboring > regions that memblock_merge_regions() will later on merge together, so > it'll work on a single growing region instead of creating new ones. The > issue is that we call memblock_alloc() for the first time inside the > loop and for_each_mem_region() macro uses pointer arithmetic on the > initial array of reserved memblocks, Yes. We also need to do that for_each_reserved_mem_region as well. Adding that to your diff will be exactly similar to mine. > for_each_memblock_type() macro but it's not on memblock.h anymore OR we > could call memblock_alloc() before the loop so that the array is not > expanded while itterating. > > How about this, does it work for you ?: > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > index 1d85e9bf7..460cfddb7 100644 > --- a/arch/riscv/kernel/setup.c > +++ b/arch/riscv/kernel/setup.c > @@ -127,8 +127,16 @@ static void __init init_resources(void) > { > struct memblock_region *region = NULL; > struct resource *res = NULL; > + struct resource *mem_resources = NULL; > + size_t mem_resources_sz = 0; > int ret = 0; > > + mem_resources_sz = memblock.memory.cnt * sizeof(struct resource); > + mem_resources = memblock_alloc(mem_resources_sz, SMP_CACHE_BYTES); > + if (!mem_resources) > + panic("%s: Failed to allocate %zu bytes\n", __func__, > + sizeof(struct resource)); > + > code_res.start = __pa_symbol(_text); > code_res.end = __pa_symbol(_etext) - 1; > code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > @@ -171,20 +179,21 @@ static void __init init_resources(void) > * Ignore any other reserved regions within > * system memory. > */ > - if (memblock_is_memory(res->start)) > + if (memblock_is_memory(res->start)) { > + memblock_free((phys_addr_t) res, sizeof(struct resource)); > continue; > + } > > ret = add_resource(&iomem_resource, res); > - if (ret < 0) > + if (ret < 0) { > + memblock_free((phys_addr_t) res, sizeof(struct resource)); > goto error; > + } > } > > /* Add /memory regions to the resource tree */ > for_each_mem_region(region) { > - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > - if (!res) > - panic("%s: Failed to allocate %zu bytes\n", __func__, > - sizeof(struct resource)); > + res = mem_resources++; > > if (unlikely(memblock_is_nomap(region))) { > res->name = "Reserved"; > @@ -205,9 +214,9 @@ static void __init init_resources(void) > return; > > error: > - memblock_free((phys_addr_t) res, sizeof(struct resource)); > /* Better an empty resource tree than an inconsistent one */ > release_child_resources(&iomem_resource); > + memblock_free((phys_addr_t) mem_resources, mem_resources_sz); > } > > Regards, > Nick > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Wed, Dec 23, 2020 at 12:02 PM Atish Patra <atishp@atishpatra.org> wrote: > > On Tue, Dec 22, 2020 at 3:59 PM Nick Kossifidis <mick@ics.forth.gr> wrote: > > > > Στις 2020-12-19 01:52, Atish Patra έγραψε: > > > On Thu, Nov 5, 2020 at 10:37 AM Palmer Dabbelt <palmer@dabbelt.com> > > > wrote: > > >> > > >> On Mon, 12 Oct 2020 07:24:10 PDT (-0700), mick@ics.forth.gr wrote: > > >> > This patch (previously part of my kexec/kdump series) populates > > >> > /proc/iomem with the various sections of the kernel image. We need > > >> > this for kexec-tools to be able to prepare the crashkernel image > > >> > for kdump to work. Since resource tree initialization is not > > >> > related to memory initialization I added the code to kernel/setup.c > > >> > and removed the original code (derived from the arm64 tree) from > > >> > mm/init.c. > > >> > > > >> > Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> > > >> > --- > > >> > arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++ > > >> > arch/riscv/mm/init.c | 27 ------- > > >> > 2 files changed, 160 insertions(+), 27 deletions(-) > > >> > > > >> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > > >> > index 2c6dd3293..450f0142f 100644 > > >> > --- a/arch/riscv/kernel/setup.c > > >> > +++ b/arch/riscv/kernel/setup.c > > >> > @@ -4,6 +4,8 @@ > > >> > * Chen Liqin <liqin.chen@sunplusct.com> > > >> > * Lennox Wu <lennox.wu@sunplusct.com> > > >> > * Copyright (C) 2012 Regents of the University of California > > >> > + * Copyright (C) 2020 FORTH-ICS/CARV > > >> > + * Nick Kossifidis <mick@ics.forth.gr> > > >> > */ > > >> > > > >> > #include <linux/init.h> > > >> > @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata); > > >> > unsigned long boot_cpu_hartid; > > >> > static DEFINE_PER_CPU(struct cpu, cpu_devices); > > >> > > > >> > +/* > > >> > + * Place kernel memory regions on the resource tree so that > > >> > + * kexec-tools can retrieve them from /proc/iomem. While there > > >> > + * also add "System RAM" regions for compatibility with other > > >> > + * archs, and the rest of the known regions for completeness. > > >> > + */ > > >> > +static struct resource code_res = { .name = "Kernel code", }; > > >> > +static struct resource data_res = { .name = "Kernel data", }; > > >> > +static struct resource rodata_res = { .name = "Kernel rodata", }; > > >> > +static struct resource bss_res = { .name = "Kernel bss", }; > > >> > + > > >> > +static int __init add_resource(struct resource *parent, > > >> > + struct resource *res) > > >> > +{ > > >> > + int ret = 0; > > >> > + > > >> > + ret = insert_resource(parent, res); > > >> > + if (ret < 0) { > > >> > + pr_err("Failed to add a %s resource at %llx\n", > > >> > + res->name, (unsigned long long) res->start); > > >> > + return ret; > > >> > + } > > >> > + > > >> > + return 1; > > >> > +} > > >> > + > > >> > +static int __init add_kernel_resources(struct resource *res) > > >> > +{ > > >> > + int ret = 0; > > >> > + > > >> > + /* > > >> > + * The memory region of the kernel image is continuous and > > >> > + * was reserved on setup_bootmem, find it here and register > > >> > + * it as a resource, then register the various segments of > > >> > + * the image as child nodes > > >> > + */ > > >> > + if (!(res->start <= code_res.start && res->end >= data_res.end)) > > >> > + return 0; > > >> > + > > >> > + res->name = "Kernel image"; > > >> > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > >> > + > > >> > + /* > > >> > + * We removed a part of this region on setup_bootmem so > > >> > + * we need to expand the resource for the bss to fit in. > > >> > + */ > > >> > + res->end = bss_res.end; > > >> > + > > >> > + ret = add_resource(&iomem_resource, res); > > >> > + if (ret < 0) > > >> > + return ret; > > >> > + > > >> > + ret = add_resource(res, &code_res); > > >> > + if (ret < 0) > > >> > + return ret; > > >> > + > > >> > + ret = add_resource(res, &rodata_res); > > >> > + if (ret < 0) > > >> > + return ret; > > >> > + > > >> > + ret = add_resource(res, &data_res); > > >> > + if (ret < 0) > > >> > + return ret; > > >> > + > > >> > + ret = add_resource(res, &bss_res); > > >> > + > > >> > + return ret; > > >> > +} > > >> > + > > >> > +static void __init init_resources(void) > > >> > +{ > > >> > + struct memblock_region *region = NULL; > > >> > + struct resource *res = NULL; > > >> > + int ret = 0; > > >> > + > > >> > + code_res.start = __pa_symbol(_text); > > >> > + code_res.end = __pa_symbol(_etext) - 1; > > >> > + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > >> > + > > >> > + rodata_res.start = __pa_symbol(__start_rodata); > > >> > + rodata_res.end = __pa_symbol(__end_rodata) - 1; > > >> > + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > >> > + > > >> > + data_res.start = __pa_symbol(_data); > > >> > + data_res.end = __pa_symbol(_edata) - 1; > > >> > + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > >> > + > > >> > + bss_res.start = __pa_symbol(__bss_start); > > >> > + bss_res.end = __pa_symbol(__bss_stop) - 1; > > >> > + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > >> > + > > >> > + /* > > >> > + * Start by adding the reserved regions, if they overlap > > >> > + * with /memory regions, insert_resource later on will take > > >> > + * care of it. > > >> > + */ > > >> > + for_each_memblock(reserved, region) { > > >> > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > > > > > > Is there a specific reason to invoke memblock_alloc while iterating > > > reserved regions ? > > > memblock_alloc also adds calls memblock_reserve. So we are modifying > > > the reserved region entries > > > while iterating it. It resulted in below warning for rv32. > > > > > > [ 0.000000] ------------[ cut here ]------------ > > > [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 > > > __insert_resource+0x8e/0xd0 > > > [ 0.000000] Modules linked in: > > > [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted > > > 5.10.0-00022-ge20097fb37e2-dirty #549 > > > [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 > > > [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 > > > [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 > > > [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 > > > [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 > > > [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 > > > [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 > > > [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c > > > [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 > > > [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff > > > [ 0.000000] t5 : c1008e38 t6 : 00000001 > > > [ 0.000000] status: 00000100 badaddr: 00000000 cause: 00000003 > > > [ 0.000000] irq event stamp: 0 > > > [ 0.000000] hardirqs last enabled at (0): [<00000000>] 0x0 > > > [ 0.000000] hardirqs last disabled at (0): [<00000000>] 0x0 > > > [ 0.000000] softirqs last enabled at (0): [<00000000>] 0x0 > > > [ 0.000000] softirqs last disabled at (0): [<00000000>] 0x0 > > > [ 0.000000] random: get_random_bytes called from __warn+0xd8/0x11e > > > with crng_init=0 > > > [ 0.000000] ---[ end trace 0000000000000000 ]--- > > > [ 0.000000] Failed to add a Kernel code resource at 80402000 > > > > > > Here is the fix that works: > > > --- a/arch/riscv/kernel/setup.c > > > +++ b/arch/riscv/kernel/setup.c > > > @@ -127,7 +127,9 @@ static void __init init_resources(void) > > > { > > > struct memblock_region *region = NULL; > > > struct resource *res = NULL; > > > - int ret = 0; > > > + int ret = 0, i = 0; > > > + int num_mem_res; > > > + struct resource *mem_res; > > > > > > code_res.start = __pa_symbol(_text); > > > code_res.end = __pa_symbol(_etext) - 1; > > > @@ -145,16 +147,17 @@ static void __init init_resources(void) > > > bss_res.end = __pa_symbol(__bss_stop) - 1; > > > bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > > > > > + num_mem_res = memblock.memory.cnt + memblock.reserved.cnt; > > > + mem_res = memblock_alloc(num_mem_res * sizeof(*mem_res), > > > SMP_CACHE_BYTES); > > > + if (!mem_res) > > > + panic("%s: Failed to allocate %zu bytes\n", __func__, > > > num_mem_res * sizeof(*mem_res)); > > > /* > > > * Start by adding the reserved regions, if they overlap > > > * with /memory regions, insert_resource later on will take > > > * care of it. > > > */ > > > for_each_reserved_mem_region(region) { > > > - res = memblock_alloc(sizeof(struct resource), > > > SMP_CACHE_BYTES); > > > - if (!res) > > > - panic("%s: Failed to allocate %zu bytes\n", > > > __func__, > > > - sizeof(struct resource)); > > > + res = &mem_res[i++]; > > > > > > res->name = "Reserved"; > > > res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > > > @@ -181,11 +184,8 @@ static void __init init_resources(void) > > > > > > /* Add /memory regions to the resource tree */ > > > for_each_mem_region(region) { > > > - res = memblock_alloc(sizeof(struct resource), > > > SMP_CACHE_BYTES); > > > - if (!res) > > > - panic("%s: Failed to allocate %zu bytes\n", > > > __func__, > > > - sizeof(struct resource)); > > > > > > + res = &mem_res[i++]; > > > > > > If this looks okay to you, I will send the patch. > > > > > > > The problem is that we don't want to include all reserved regions within > > /memory such as the device tree or initramfs, since they'll get modified > > and/or freed later on. So pre-allocating resources for all reserved > > regions doesn't seem the right thing to do. My goal here was to allocate > > a resource on each itteration and free it if not needed / on failure, I > > free the failed resource on failure but not if it's not needed as you > > noted. > > > > As for the issue of memblock_alloc() calling memblock_reserve(), we > > don't add any new reserved regions at this point directly through > > memblock_reserve(), and memblock_reserve() will run on neighboring > > regions that memblock_merge_regions() will later on merge together, so > > it'll work on a single growing region instead of creating new ones. Forgot to add this: The contiguous region memory allocation and merging happens for RV64 but doesn't happen for RV32. Here is a boot log with additional printk messages which clearly indicate that memory regions are not contiguous. [ 0.000000] memblock_reserve: [0xc03fff80-0xc03fffff] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03fff40-0xc03fff5f] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 5 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03fff00-0xc03fff1f] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 6 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03ffec0-0xc03ffedf] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 7 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03ffe80-0xc03ffe9f] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 8 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03ffe40-0xc03ffe5f] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 9 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03ffe00-0xc03ffe1f] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 10 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03ffdc0-0xc03ffddf] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 11 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03ffd80-0xc03ffd9f] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 12 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03ffd40-0xc03ffd5f] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 13 [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe [ 0.000000] memblock_reserve: [0xc03ffd00-0xc03ffd1f] memblock_alloc_range_nid+0xa8/0x112 [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt 14 I am looking into the memblock allocation code to find out if this is expected. > > issue is that we call memblock_alloc() for the first time inside the > > loop and for_each_mem_region() macro uses pointer arithmetic on the > > initial array of reserved memblocks, > > Yes. We also need to do that for_each_reserved_mem_region as well. > Adding that to your diff will be exactly similar to mine. > > > for_each_memblock_type() macro but it's not on memblock.h anymore OR we > > could call memblock_alloc() before the loop so that the array is not > > expanded while itterating. > > > > How about this, does it work for you ?: > > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > > index 1d85e9bf7..460cfddb7 100644 > > --- a/arch/riscv/kernel/setup.c > > +++ b/arch/riscv/kernel/setup.c > > @@ -127,8 +127,16 @@ static void __init init_resources(void) > > { > > struct memblock_region *region = NULL; > > struct resource *res = NULL; > > + struct resource *mem_resources = NULL; > > + size_t mem_resources_sz = 0; > > int ret = 0; > > > > + mem_resources_sz = memblock.memory.cnt * sizeof(struct resource); > > + mem_resources = memblock_alloc(mem_resources_sz, SMP_CACHE_BYTES); > > + if (!mem_resources) > > + panic("%s: Failed to allocate %zu bytes\n", __func__, > > + sizeof(struct resource)); > > + > > code_res.start = __pa_symbol(_text); > > code_res.end = __pa_symbol(_etext) - 1; > > code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > @@ -171,20 +179,21 @@ static void __init init_resources(void) > > * Ignore any other reserved regions within > > * system memory. > > */ > > - if (memblock_is_memory(res->start)) > > + if (memblock_is_memory(res->start)) { > > + memblock_free((phys_addr_t) res, sizeof(struct resource)); > > continue; > > + } > > > > ret = add_resource(&iomem_resource, res); > > - if (ret < 0) > > + if (ret < 0) { > > + memblock_free((phys_addr_t) res, sizeof(struct resource)); > > goto error; > > + } > > } > > > > /* Add /memory regions to the resource tree */ > > for_each_mem_region(region) { > > - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > > - if (!res) > > - panic("%s: Failed to allocate %zu bytes\n", __func__, > > - sizeof(struct resource)); > > + res = mem_resources++; > > > > if (unlikely(memblock_is_nomap(region))) { > > res->name = "Reserved"; > > @@ -205,9 +214,9 @@ static void __init init_resources(void) > > return; > > > > error: > > - memblock_free((phys_addr_t) res, sizeof(struct resource)); > > /* Better an empty resource tree than an inconsistent one */ > > release_child_resources(&iomem_resource); > > + memblock_free((phys_addr_t) mem_resources, mem_resources_sz); > > } > > > > Regards, > > Nick > > > > _______________________________________________ > > linux-riscv mailing list > > linux-riscv@lists.infradead.org > > http://lists.infradead.org/mailman/listinfo/linux-riscv > > > > -- > Regards, > Atish
On Wed, Dec 23, 2020 at 1:17 PM Atish Patra <atishp@atishpatra.org> wrote: > > On Wed, Dec 23, 2020 at 12:02 PM Atish Patra <atishp@atishpatra.org> wrote: > > > > On Tue, Dec 22, 2020 at 3:59 PM Nick Kossifidis <mick@ics.forth.gr> wrote: > > > > > > Στις 2020-12-19 01:52, Atish Patra έγραψε: > > > > On Thu, Nov 5, 2020 at 10:37 AM Palmer Dabbelt <palmer@dabbelt.com> > > > > wrote: > > > >> > > > >> On Mon, 12 Oct 2020 07:24:10 PDT (-0700), mick@ics.forth.gr wrote: > > > >> > This patch (previously part of my kexec/kdump series) populates > > > >> > /proc/iomem with the various sections of the kernel image. We need > > > >> > this for kexec-tools to be able to prepare the crashkernel image > > > >> > for kdump to work. Since resource tree initialization is not > > > >> > related to memory initialization I added the code to kernel/setup.c > > > >> > and removed the original code (derived from the arm64 tree) from > > > >> > mm/init.c. > > > >> > > > > >> > Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> > > > >> > --- > > > >> > arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++ > > > >> > arch/riscv/mm/init.c | 27 ------- > > > >> > 2 files changed, 160 insertions(+), 27 deletions(-) > > > >> > > > > >> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > > > >> > index 2c6dd3293..450f0142f 100644 > > > >> > --- a/arch/riscv/kernel/setup.c > > > >> > +++ b/arch/riscv/kernel/setup.c > > > >> > @@ -4,6 +4,8 @@ > > > >> > * Chen Liqin <liqin.chen@sunplusct.com> > > > >> > * Lennox Wu <lennox.wu@sunplusct.com> > > > >> > * Copyright (C) 2012 Regents of the University of California > > > >> > + * Copyright (C) 2020 FORTH-ICS/CARV > > > >> > + * Nick Kossifidis <mick@ics.forth.gr> > > > >> > */ > > > >> > > > > >> > #include <linux/init.h> > > > >> > @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata); > > > >> > unsigned long boot_cpu_hartid; > > > >> > static DEFINE_PER_CPU(struct cpu, cpu_devices); > > > >> > > > > >> > +/* > > > >> > + * Place kernel memory regions on the resource tree so that > > > >> > + * kexec-tools can retrieve them from /proc/iomem. While there > > > >> > + * also add "System RAM" regions for compatibility with other > > > >> > + * archs, and the rest of the known regions for completeness. > > > >> > + */ > > > >> > +static struct resource code_res = { .name = "Kernel code", }; > > > >> > +static struct resource data_res = { .name = "Kernel data", }; > > > >> > +static struct resource rodata_res = { .name = "Kernel rodata", }; > > > >> > +static struct resource bss_res = { .name = "Kernel bss", }; > > > >> > + > > > >> > +static int __init add_resource(struct resource *parent, > > > >> > + struct resource *res) > > > >> > +{ > > > >> > + int ret = 0; > > > >> > + > > > >> > + ret = insert_resource(parent, res); > > > >> > + if (ret < 0) { > > > >> > + pr_err("Failed to add a %s resource at %llx\n", > > > >> > + res->name, (unsigned long long) res->start); > > > >> > + return ret; > > > >> > + } > > > >> > + > > > >> > + return 1; > > > >> > +} > > > >> > + > > > >> > +static int __init add_kernel_resources(struct resource *res) > > > >> > +{ > > > >> > + int ret = 0; > > > >> > + > > > >> > + /* > > > >> > + * The memory region of the kernel image is continuous and > > > >> > + * was reserved on setup_bootmem, find it here and register > > > >> > + * it as a resource, then register the various segments of > > > >> > + * the image as child nodes > > > >> > + */ > > > >> > + if (!(res->start <= code_res.start && res->end >= data_res.end)) > > > >> > + return 0; > > > >> > + > > > >> > + res->name = "Kernel image"; > > > >> > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > > >> > + > > > >> > + /* > > > >> > + * We removed a part of this region on setup_bootmem so > > > >> > + * we need to expand the resource for the bss to fit in. > > > >> > + */ > > > >> > + res->end = bss_res.end; > > > >> > + > > > >> > + ret = add_resource(&iomem_resource, res); > > > >> > + if (ret < 0) > > > >> > + return ret; > > > >> > + > > > >> > + ret = add_resource(res, &code_res); > > > >> > + if (ret < 0) > > > >> > + return ret; > > > >> > + > > > >> > + ret = add_resource(res, &rodata_res); > > > >> > + if (ret < 0) > > > >> > + return ret; > > > >> > + > > > >> > + ret = add_resource(res, &data_res); > > > >> > + if (ret < 0) > > > >> > + return ret; > > > >> > + > > > >> > + ret = add_resource(res, &bss_res); > > > >> > + > > > >> > + return ret; > > > >> > +} > > > >> > + > > > >> > +static void __init init_resources(void) > > > >> > +{ > > > >> > + struct memblock_region *region = NULL; > > > >> > + struct resource *res = NULL; > > > >> > + int ret = 0; > > > >> > + > > > >> > + code_res.start = __pa_symbol(_text); > > > >> > + code_res.end = __pa_symbol(_etext) - 1; > > > >> > + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > > >> > + > > > >> > + rodata_res.start = __pa_symbol(__start_rodata); > > > >> > + rodata_res.end = __pa_symbol(__end_rodata) - 1; > > > >> > + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > > >> > + > > > >> > + data_res.start = __pa_symbol(_data); > > > >> > + data_res.end = __pa_symbol(_edata) - 1; > > > >> > + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > > >> > + > > > >> > + bss_res.start = __pa_symbol(__bss_start); > > > >> > + bss_res.end = __pa_symbol(__bss_stop) - 1; > > > >> > + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > > >> > + > > > >> > + /* > > > >> > + * Start by adding the reserved regions, if they overlap > > > >> > + * with /memory regions, insert_resource later on will take > > > >> > + * care of it. > > > >> > + */ > > > >> > + for_each_memblock(reserved, region) { > > > >> > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > > > > > > > > Is there a specific reason to invoke memblock_alloc while iterating > > > > reserved regions ? > > > > memblock_alloc also adds calls memblock_reserve. So we are modifying > > > > the reserved region entries > > > > while iterating it. It resulted in below warning for rv32. > > > > > > > > [ 0.000000] ------------[ cut here ]------------ > > > > [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 > > > > __insert_resource+0x8e/0xd0 > > > > [ 0.000000] Modules linked in: > > > > [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted > > > > 5.10.0-00022-ge20097fb37e2-dirty #549 > > > > [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 > > > > [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 > > > > [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 > > > > [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 > > > > [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 > > > > [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 > > > > [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 > > > > [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c > > > > [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 > > > > [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff > > > > [ 0.000000] t5 : c1008e38 t6 : 00000001 > > > > [ 0.000000] status: 00000100 badaddr: 00000000 cause: 00000003 > > > > [ 0.000000] irq event stamp: 0 > > > > [ 0.000000] hardirqs last enabled at (0): [<00000000>] 0x0 > > > > [ 0.000000] hardirqs last disabled at (0): [<00000000>] 0x0 > > > > [ 0.000000] softirqs last enabled at (0): [<00000000>] 0x0 > > > > [ 0.000000] softirqs last disabled at (0): [<00000000>] 0x0 > > > > [ 0.000000] random: get_random_bytes called from __warn+0xd8/0x11e > > > > with crng_init=0 > > > > [ 0.000000] ---[ end trace 0000000000000000 ]--- > > > > [ 0.000000] Failed to add a Kernel code resource at 80402000 > > > > > > > > Here is the fix that works: > > > > --- a/arch/riscv/kernel/setup.c > > > > +++ b/arch/riscv/kernel/setup.c > > > > @@ -127,7 +127,9 @@ static void __init init_resources(void) > > > > { > > > > struct memblock_region *region = NULL; > > > > struct resource *res = NULL; > > > > - int ret = 0; > > > > + int ret = 0, i = 0; > > > > + int num_mem_res; > > > > + struct resource *mem_res; > > > > > > > > code_res.start = __pa_symbol(_text); > > > > code_res.end = __pa_symbol(_etext) - 1; > > > > @@ -145,16 +147,17 @@ static void __init init_resources(void) > > > > bss_res.end = __pa_symbol(__bss_stop) - 1; > > > > bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > > > > > > > + num_mem_res = memblock.memory.cnt + memblock.reserved.cnt; > > > > + mem_res = memblock_alloc(num_mem_res * sizeof(*mem_res), > > > > SMP_CACHE_BYTES); > > > > + if (!mem_res) > > > > + panic("%s: Failed to allocate %zu bytes\n", __func__, > > > > num_mem_res * sizeof(*mem_res)); > > > > /* > > > > * Start by adding the reserved regions, if they overlap > > > > * with /memory regions, insert_resource later on will take > > > > * care of it. > > > > */ > > > > for_each_reserved_mem_region(region) { > > > > - res = memblock_alloc(sizeof(struct resource), > > > > SMP_CACHE_BYTES); > > > > - if (!res) > > > > - panic("%s: Failed to allocate %zu bytes\n", > > > > __func__, > > > > - sizeof(struct resource)); > > > > + res = &mem_res[i++]; > > > > > > > > res->name = "Reserved"; > > > > res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > > > > @@ -181,11 +184,8 @@ static void __init init_resources(void) > > > > > > > > /* Add /memory regions to the resource tree */ > > > > for_each_mem_region(region) { > > > > - res = memblock_alloc(sizeof(struct resource), > > > > SMP_CACHE_BYTES); > > > > - if (!res) > > > > - panic("%s: Failed to allocate %zu bytes\n", > > > > __func__, > > > > - sizeof(struct resource)); > > > > > > > > + res = &mem_res[i++]; > > > > > > > > If this looks okay to you, I will send the patch. > > > > > > > > > > The problem is that we don't want to include all reserved regions within > > > /memory such as the device tree or initramfs, since they'll get modified > > > and/or freed later on. So pre-allocating resources for all reserved > > > regions doesn't seem the right thing to do. My goal here was to allocate > > > a resource on each itteration and free it if not needed / on failure, I > > > free the failed resource on failure but not if it's not needed as you > > > noted. > > > > > > As for the issue of memblock_alloc() calling memblock_reserve(), we > > > don't add any new reserved regions at this point directly through > > > memblock_reserve(), and memblock_reserve() will run on neighboring > > > regions that memblock_merge_regions() will later on merge together, so > > > it'll work on a single growing region instead of creating new ones. > > Forgot to add this: > The contiguous region memory allocation and merging happens for RV64 > but doesn't happen for RV32. > Here is a boot log with additional printk messages which clearly > indicate that memory regions are not contiguous. > > [ 0.000000] memblock_reserve: [0xc03fff80-0xc03fffff] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03fff40-0xc03fff5f] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 5 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03fff00-0xc03fff1f] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 6 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03ffec0-0xc03ffedf] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 7 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03ffe80-0xc03ffe9f] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 8 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03ffe40-0xc03ffe5f] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 9 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03ffe00-0xc03ffe1f] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 10 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03ffdc0-0xc03ffddf] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 11 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03ffd80-0xc03ffd9f] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 12 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03ffd40-0xc03ffd5f] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 13 > [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > [ 0.000000] memblock_reserve: [0xc03ffd00-0xc03ffd1f] > memblock_alloc_range_nid+0xa8/0x112 > [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > 14 > > I am looking into the memblock allocation code to find out if this is expected. > Ahh found it. memblock_align is called with SMP_CACHE_BYTES (64) bytes alignment. Using a 32byte alignment solves the memory resource warning I was seeing on RV32 boot. I still prefer one time allocation before the loop instead of memblock_alloc invocation in every iteration (both mem_region & reserved_mem_region). This is happening during boot time. The less work it does, it is better. Let me know if you think there are any downsides to this approach. Otherwise, I will revise my patch and send it. > > > issue is that we call memblock_alloc() for the first time inside the > > > loop and for_each_mem_region() macro uses pointer arithmetic on the > > > initial array of reserved memblocks, > > > > Yes. We also need to do that for_each_reserved_mem_region as well. > > Adding that to your diff will be exactly similar to mine. > > > > > for_each_memblock_type() macro but it's not on memblock.h anymore OR we > > > could call memblock_alloc() before the loop so that the array is not > > > expanded while itterating. > > > > > > How about this, does it work for you ?: > > > > > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > > > index 1d85e9bf7..460cfddb7 100644 > > > --- a/arch/riscv/kernel/setup.c > > > +++ b/arch/riscv/kernel/setup.c > > > @@ -127,8 +127,16 @@ static void __init init_resources(void) > > > { > > > struct memblock_region *region = NULL; > > > struct resource *res = NULL; > > > + struct resource *mem_resources = NULL; > > > + size_t mem_resources_sz = 0; > > > int ret = 0; > > > > > > + mem_resources_sz = memblock.memory.cnt * sizeof(struct resource); > > > + mem_resources = memblock_alloc(mem_resources_sz, SMP_CACHE_BYTES); > > > + if (!mem_resources) > > > + panic("%s: Failed to allocate %zu bytes\n", __func__, > > > + sizeof(struct resource)); > > > + > > > code_res.start = __pa_symbol(_text); > > > code_res.end = __pa_symbol(_etext) - 1; > > > code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > > > @@ -171,20 +179,21 @@ static void __init init_resources(void) > > > * Ignore any other reserved regions within > > > * system memory. > > > */ > > > - if (memblock_is_memory(res->start)) > > > + if (memblock_is_memory(res->start)) { > > > + memblock_free((phys_addr_t) res, sizeof(struct resource)); > > > continue; > > > + } > > > > > > ret = add_resource(&iomem_resource, res); > > > - if (ret < 0) > > > + if (ret < 0) { > > > + memblock_free((phys_addr_t) res, sizeof(struct resource)); > > > goto error; > > > + } > > > } > > > > > > /* Add /memory regions to the resource tree */ > > > for_each_mem_region(region) { > > > - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > > > - if (!res) > > > - panic("%s: Failed to allocate %zu bytes\n", __func__, > > > - sizeof(struct resource)); > > > + res = mem_resources++; > > > > > > if (unlikely(memblock_is_nomap(region))) { > > > res->name = "Reserved"; > > > @@ -205,9 +214,9 @@ static void __init init_resources(void) > > > return; > > > > > > error: > > > - memblock_free((phys_addr_t) res, sizeof(struct resource)); > > > /* Better an empty resource tree than an inconsistent one */ > > > release_child_resources(&iomem_resource); > > > + memblock_free((phys_addr_t) mem_resources, mem_resources_sz); > > > } > > > > > > Regards, > > > Nick > > > > > > _______________________________________________ > > > linux-riscv mailing list > > > linux-riscv@lists.infradead.org > > > http://lists.infradead.org/mailman/listinfo/linux-riscv > > > > > > > > -- > > Regards, > > Atish > > > > -- > Regards, > Atish
Στις 2020-12-24 01:32, Atish Patra έγραψε: > On Wed, Dec 23, 2020 at 1:17 PM Atish Patra <atishp@atishpatra.org> > wrote: >> >> On Wed, Dec 23, 2020 at 12:02 PM Atish Patra <atishp@atishpatra.org> >> wrote: >> > >> > On Tue, Dec 22, 2020 at 3:59 PM Nick Kossifidis <mick@ics.forth.gr> wrote: >> > > >> > > Στις 2020-12-19 01:52, Atish Patra έγραψε: >> > > > On Thu, Nov 5, 2020 at 10:37 AM Palmer Dabbelt <palmer@dabbelt.com> >> > > > wrote: >> > > >> >> > > >> On Mon, 12 Oct 2020 07:24:10 PDT (-0700), mick@ics.forth.gr wrote: >> > > >> > This patch (previously part of my kexec/kdump series) populates >> > > >> > /proc/iomem with the various sections of the kernel image. We need >> > > >> > this for kexec-tools to be able to prepare the crashkernel image >> > > >> > for kdump to work. Since resource tree initialization is not >> > > >> > related to memory initialization I added the code to kernel/setup.c >> > > >> > and removed the original code (derived from the arm64 tree) from >> > > >> > mm/init.c. >> > > >> > >> > > >> > Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> >> > > >> > --- >> > > >> > arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++ >> > > >> > arch/riscv/mm/init.c | 27 ------- >> > > >> > 2 files changed, 160 insertions(+), 27 deletions(-) >> > > >> > >> > > >> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c >> > > >> > index 2c6dd3293..450f0142f 100644 >> > > >> > --- a/arch/riscv/kernel/setup.c >> > > >> > +++ b/arch/riscv/kernel/setup.c >> > > >> > @@ -4,6 +4,8 @@ >> > > >> > * Chen Liqin <liqin.chen@sunplusct.com> >> > > >> > * Lennox Wu <lennox.wu@sunplusct.com> >> > > >> > * Copyright (C) 2012 Regents of the University of California >> > > >> > + * Copyright (C) 2020 FORTH-ICS/CARV >> > > >> > + * Nick Kossifidis <mick@ics.forth.gr> >> > > >> > */ >> > > >> > >> > > >> > #include <linux/init.h> >> > > >> > @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata); >> > > >> > unsigned long boot_cpu_hartid; >> > > >> > static DEFINE_PER_CPU(struct cpu, cpu_devices); >> > > >> > >> > > >> > +/* >> > > >> > + * Place kernel memory regions on the resource tree so that >> > > >> > + * kexec-tools can retrieve them from /proc/iomem. While there >> > > >> > + * also add "System RAM" regions for compatibility with other >> > > >> > + * archs, and the rest of the known regions for completeness. >> > > >> > + */ >> > > >> > +static struct resource code_res = { .name = "Kernel code", }; >> > > >> > +static struct resource data_res = { .name = "Kernel data", }; >> > > >> > +static struct resource rodata_res = { .name = "Kernel rodata", }; >> > > >> > +static struct resource bss_res = { .name = "Kernel bss", }; >> > > >> > + >> > > >> > +static int __init add_resource(struct resource *parent, >> > > >> > + struct resource *res) >> > > >> > +{ >> > > >> > + int ret = 0; >> > > >> > + >> > > >> > + ret = insert_resource(parent, res); >> > > >> > + if (ret < 0) { >> > > >> > + pr_err("Failed to add a %s resource at %llx\n", >> > > >> > + res->name, (unsigned long long) res->start); >> > > >> > + return ret; >> > > >> > + } >> > > >> > + >> > > >> > + return 1; >> > > >> > +} >> > > >> > + >> > > >> > +static int __init add_kernel_resources(struct resource *res) >> > > >> > +{ >> > > >> > + int ret = 0; >> > > >> > + >> > > >> > + /* >> > > >> > + * The memory region of the kernel image is continuous and >> > > >> > + * was reserved on setup_bootmem, find it here and register >> > > >> > + * it as a resource, then register the various segments of >> > > >> > + * the image as child nodes >> > > >> > + */ >> > > >> > + if (!(res->start <= code_res.start && res->end >= data_res.end)) >> > > >> > + return 0; >> > > >> > + >> > > >> > + res->name = "Kernel image"; >> > > >> > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > > >> > + >> > > >> > + /* >> > > >> > + * We removed a part of this region on setup_bootmem so >> > > >> > + * we need to expand the resource for the bss to fit in. >> > > >> > + */ >> > > >> > + res->end = bss_res.end; >> > > >> > + >> > > >> > + ret = add_resource(&iomem_resource, res); >> > > >> > + if (ret < 0) >> > > >> > + return ret; >> > > >> > + >> > > >> > + ret = add_resource(res, &code_res); >> > > >> > + if (ret < 0) >> > > >> > + return ret; >> > > >> > + >> > > >> > + ret = add_resource(res, &rodata_res); >> > > >> > + if (ret < 0) >> > > >> > + return ret; >> > > >> > + >> > > >> > + ret = add_resource(res, &data_res); >> > > >> > + if (ret < 0) >> > > >> > + return ret; >> > > >> > + >> > > >> > + ret = add_resource(res, &bss_res); >> > > >> > + >> > > >> > + return ret; >> > > >> > +} >> > > >> > + >> > > >> > +static void __init init_resources(void) >> > > >> > +{ >> > > >> > + struct memblock_region *region = NULL; >> > > >> > + struct resource *res = NULL; >> > > >> > + int ret = 0; >> > > >> > + >> > > >> > + code_res.start = __pa_symbol(_text); >> > > >> > + code_res.end = __pa_symbol(_etext) - 1; >> > > >> > + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > > >> > + >> > > >> > + rodata_res.start = __pa_symbol(__start_rodata); >> > > >> > + rodata_res.end = __pa_symbol(__end_rodata) - 1; >> > > >> > + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > > >> > + >> > > >> > + data_res.start = __pa_symbol(_data); >> > > >> > + data_res.end = __pa_symbol(_edata) - 1; >> > > >> > + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > > >> > + >> > > >> > + bss_res.start = __pa_symbol(__bss_start); >> > > >> > + bss_res.end = __pa_symbol(__bss_stop) - 1; >> > > >> > + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > > >> > + >> > > >> > + /* >> > > >> > + * Start by adding the reserved regions, if they overlap >> > > >> > + * with /memory regions, insert_resource later on will take >> > > >> > + * care of it. >> > > >> > + */ >> > > >> > + for_each_memblock(reserved, region) { >> > > >> > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); >> > > > >> > > > Is there a specific reason to invoke memblock_alloc while iterating >> > > > reserved regions ? >> > > > memblock_alloc also adds calls memblock_reserve. So we are modifying >> > > > the reserved region entries >> > > > while iterating it. It resulted in below warning for rv32. >> > > > >> > > > [ 0.000000] ------------[ cut here ]------------ >> > > > [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 >> > > > __insert_resource+0x8e/0xd0 >> > > > [ 0.000000] Modules linked in: >> > > > [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted >> > > > 5.10.0-00022-ge20097fb37e2-dirty #549 >> > > > [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 >> > > > [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 >> > > > [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 >> > > > [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 >> > > > [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 >> > > > [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 >> > > > [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 >> > > > [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c >> > > > [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 >> > > > [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff >> > > > [ 0.000000] t5 : c1008e38 t6 : 00000001 >> > > > [ 0.000000] status: 00000100 badaddr: 00000000 cause: 00000003 >> > > > [ 0.000000] irq event stamp: 0 >> > > > [ 0.000000] hardirqs last enabled at (0): [<00000000>] 0x0 >> > > > [ 0.000000] hardirqs last disabled at (0): [<00000000>] 0x0 >> > > > [ 0.000000] softirqs last enabled at (0): [<00000000>] 0x0 >> > > > [ 0.000000] softirqs last disabled at (0): [<00000000>] 0x0 >> > > > [ 0.000000] random: get_random_bytes called from __warn+0xd8/0x11e >> > > > with crng_init=0 >> > > > [ 0.000000] ---[ end trace 0000000000000000 ]--- >> > > > [ 0.000000] Failed to add a Kernel code resource at 80402000 >> > > > >> > > > Here is the fix that works: >> > > > --- a/arch/riscv/kernel/setup.c >> > > > +++ b/arch/riscv/kernel/setup.c >> > > > @@ -127,7 +127,9 @@ static void __init init_resources(void) >> > > > { >> > > > struct memblock_region *region = NULL; >> > > > struct resource *res = NULL; >> > > > - int ret = 0; >> > > > + int ret = 0, i = 0; >> > > > + int num_mem_res; >> > > > + struct resource *mem_res; >> > > > >> > > > code_res.start = __pa_symbol(_text); >> > > > code_res.end = __pa_symbol(_etext) - 1; >> > > > @@ -145,16 +147,17 @@ static void __init init_resources(void) >> > > > bss_res.end = __pa_symbol(__bss_stop) - 1; >> > > > bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; >> > > > >> > > > + num_mem_res = memblock.memory.cnt + memblock.reserved.cnt; >> > > > + mem_res = memblock_alloc(num_mem_res * sizeof(*mem_res), >> > > > SMP_CACHE_BYTES); >> > > > + if (!mem_res) >> > > > + panic("%s: Failed to allocate %zu bytes\n", __func__, >> > > > num_mem_res * sizeof(*mem_res)); >> > > > /* >> > > > * Start by adding the reserved regions, if they overlap >> > > > * with /memory regions, insert_resource later on will take >> > > > * care of it. >> > > > */ >> > > > for_each_reserved_mem_region(region) { >> > > > - res = memblock_alloc(sizeof(struct resource), >> > > > SMP_CACHE_BYTES); >> > > > - if (!res) >> > > > - panic("%s: Failed to allocate %zu bytes\n", >> > > > __func__, >> > > > - sizeof(struct resource)); >> > > > + res = &mem_res[i++]; >> > > > >> > > > res->name = "Reserved"; >> > > > res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; >> > > > @@ -181,11 +184,8 @@ static void __init init_resources(void) >> > > > >> > > > /* Add /memory regions to the resource tree */ >> > > > for_each_mem_region(region) { >> > > > - res = memblock_alloc(sizeof(struct resource), >> > > > SMP_CACHE_BYTES); >> > > > - if (!res) >> > > > - panic("%s: Failed to allocate %zu bytes\n", >> > > > __func__, >> > > > - sizeof(struct resource)); >> > > > >> > > > + res = &mem_res[i++]; >> > > > >> > > > If this looks okay to you, I will send the patch. >> > > > >> > > >> > > The problem is that we don't want to include all reserved regions within >> > > /memory such as the device tree or initramfs, since they'll get modified >> > > and/or freed later on. So pre-allocating resources for all reserved >> > > regions doesn't seem the right thing to do. My goal here was to allocate >> > > a resource on each itteration and free it if not needed / on failure, I >> > > free the failed resource on failure but not if it's not needed as you >> > > noted. >> > > >> > > As for the issue of memblock_alloc() calling memblock_reserve(), we >> > > don't add any new reserved regions at this point directly through >> > > memblock_reserve(), and memblock_reserve() will run on neighboring >> > > regions that memblock_merge_regions() will later on merge together, so >> > > it'll work on a single growing region instead of creating new ones. >> >> Forgot to add this: >> The contiguous region memory allocation and merging happens for RV64 >> but doesn't happen for RV32. >> Here is a boot log with additional printk messages which clearly >> indicate that memory regions are not contiguous. >> >> [ 0.000000] memblock_reserve: [0xc03fff80-0xc03fffff] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03fff40-0xc03fff5f] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 5 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03fff00-0xc03fff1f] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 6 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03ffec0-0xc03ffedf] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 7 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03ffe80-0xc03ffe9f] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 8 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03ffe40-0xc03ffe5f] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 9 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03ffe00-0xc03ffe1f] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 10 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03ffdc0-0xc03ffddf] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 11 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03ffd80-0xc03ffd9f] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 12 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03ffd40-0xc03ffd5f] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 13 >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe >> [ 0.000000] memblock_reserve: [0xc03ffd00-0xc03ffd1f] >> memblock_alloc_range_nid+0xa8/0x112 >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt >> 14 >> >> I am looking into the memblock allocation code to find out if this is >> expected. >> > > Ahh found it. memblock_align is called with SMP_CACHE_BYTES (64) bytes > alignment. > Using a 32byte alignment solves the memory resource warning I was > seeing on RV32 boot. > > I still prefer one time allocation before the loop instead of > memblock_alloc invocation > in every iteration (both mem_region & reserved_mem_region). This is > happening during boot time. > The less work it does, it is better. Let me know if you think there > are any downsides to this approach. > > Otherwise, I will revise my patch and send it. > ACK, we'll loose some memory for the reserved regions we'll ignore but it's safer / simpler that way. Thanks for debugging this, I guess we'll need a patch for the SMP_CACHE_BYTES issue, we'll need a cache.h.
On Wed, Dec 23, 2020 at 5:17 PM Nick Kossifidis <mick@ics.forth.gr> wrote: > > Στις 2020-12-24 01:32, Atish Patra έγραψε: > > On Wed, Dec 23, 2020 at 1:17 PM Atish Patra <atishp@atishpatra.org> > > wrote: > >> > >> On Wed, Dec 23, 2020 at 12:02 PM Atish Patra <atishp@atishpatra.org> > >> wrote: > >> > > >> > On Tue, Dec 22, 2020 at 3:59 PM Nick Kossifidis <mick@ics.forth.gr> wrote: > >> > > > >> > > Στις 2020-12-19 01:52, Atish Patra έγραψε: > >> > > > On Thu, Nov 5, 2020 at 10:37 AM Palmer Dabbelt <palmer@dabbelt.com> > >> > > > wrote: > >> > > >> > >> > > >> On Mon, 12 Oct 2020 07:24:10 PDT (-0700), mick@ics.forth.gr wrote: > >> > > >> > This patch (previously part of my kexec/kdump series) populates > >> > > >> > /proc/iomem with the various sections of the kernel image. We need > >> > > >> > this for kexec-tools to be able to prepare the crashkernel image > >> > > >> > for kdump to work. Since resource tree initialization is not > >> > > >> > related to memory initialization I added the code to kernel/setup.c > >> > > >> > and removed the original code (derived from the arm64 tree) from > >> > > >> > mm/init.c. > >> > > >> > > >> > > >> > Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> > >> > > >> > --- > >> > > >> > arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++ > >> > > >> > arch/riscv/mm/init.c | 27 ------- > >> > > >> > 2 files changed, 160 insertions(+), 27 deletions(-) > >> > > >> > > >> > > >> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > >> > > >> > index 2c6dd3293..450f0142f 100644 > >> > > >> > --- a/arch/riscv/kernel/setup.c > >> > > >> > +++ b/arch/riscv/kernel/setup.c > >> > > >> > @@ -4,6 +4,8 @@ > >> > > >> > * Chen Liqin <liqin.chen@sunplusct.com> > >> > > >> > * Lennox Wu <lennox.wu@sunplusct.com> > >> > > >> > * Copyright (C) 2012 Regents of the University of California > >> > > >> > + * Copyright (C) 2020 FORTH-ICS/CARV > >> > > >> > + * Nick Kossifidis <mick@ics.forth.gr> > >> > > >> > */ > >> > > >> > > >> > > >> > #include <linux/init.h> > >> > > >> > @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata); > >> > > >> > unsigned long boot_cpu_hartid; > >> > > >> > static DEFINE_PER_CPU(struct cpu, cpu_devices); > >> > > >> > > >> > > >> > +/* > >> > > >> > + * Place kernel memory regions on the resource tree so that > >> > > >> > + * kexec-tools can retrieve them from /proc/iomem. While there > >> > > >> > + * also add "System RAM" regions for compatibility with other > >> > > >> > + * archs, and the rest of the known regions for completeness. > >> > > >> > + */ > >> > > >> > +static struct resource code_res = { .name = "Kernel code", }; > >> > > >> > +static struct resource data_res = { .name = "Kernel data", }; > >> > > >> > +static struct resource rodata_res = { .name = "Kernel rodata", }; > >> > > >> > +static struct resource bss_res = { .name = "Kernel bss", }; > >> > > >> > + > >> > > >> > +static int __init add_resource(struct resource *parent, > >> > > >> > + struct resource *res) > >> > > >> > +{ > >> > > >> > + int ret = 0; > >> > > >> > + > >> > > >> > + ret = insert_resource(parent, res); > >> > > >> > + if (ret < 0) { > >> > > >> > + pr_err("Failed to add a %s resource at %llx\n", > >> > > >> > + res->name, (unsigned long long) res->start); > >> > > >> > + return ret; > >> > > >> > + } > >> > > >> > + > >> > > >> > + return 1; > >> > > >> > +} > >> > > >> > + > >> > > >> > +static int __init add_kernel_resources(struct resource *res) > >> > > >> > +{ > >> > > >> > + int ret = 0; > >> > > >> > + > >> > > >> > + /* > >> > > >> > + * The memory region of the kernel image is continuous and > >> > > >> > + * was reserved on setup_bootmem, find it here and register > >> > > >> > + * it as a resource, then register the various segments of > >> > > >> > + * the image as child nodes > >> > > >> > + */ > >> > > >> > + if (!(res->start <= code_res.start && res->end >= data_res.end)) > >> > > >> > + return 0; > >> > > >> > + > >> > > >> > + res->name = "Kernel image"; > >> > > >> > + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > > >> > + > >> > > >> > + /* > >> > > >> > + * We removed a part of this region on setup_bootmem so > >> > > >> > + * we need to expand the resource for the bss to fit in. > >> > > >> > + */ > >> > > >> > + res->end = bss_res.end; > >> > > >> > + > >> > > >> > + ret = add_resource(&iomem_resource, res); > >> > > >> > + if (ret < 0) > >> > > >> > + return ret; > >> > > >> > + > >> > > >> > + ret = add_resource(res, &code_res); > >> > > >> > + if (ret < 0) > >> > > >> > + return ret; > >> > > >> > + > >> > > >> > + ret = add_resource(res, &rodata_res); > >> > > >> > + if (ret < 0) > >> > > >> > + return ret; > >> > > >> > + > >> > > >> > + ret = add_resource(res, &data_res); > >> > > >> > + if (ret < 0) > >> > > >> > + return ret; > >> > > >> > + > >> > > >> > + ret = add_resource(res, &bss_res); > >> > > >> > + > >> > > >> > + return ret; > >> > > >> > +} > >> > > >> > + > >> > > >> > +static void __init init_resources(void) > >> > > >> > +{ > >> > > >> > + struct memblock_region *region = NULL; > >> > > >> > + struct resource *res = NULL; > >> > > >> > + int ret = 0; > >> > > >> > + > >> > > >> > + code_res.start = __pa_symbol(_text); > >> > > >> > + code_res.end = __pa_symbol(_etext) - 1; > >> > > >> > + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > > >> > + > >> > > >> > + rodata_res.start = __pa_symbol(__start_rodata); > >> > > >> > + rodata_res.end = __pa_symbol(__end_rodata) - 1; > >> > > >> > + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > > >> > + > >> > > >> > + data_res.start = __pa_symbol(_data); > >> > > >> > + data_res.end = __pa_symbol(_edata) - 1; > >> > > >> > + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > > >> > + > >> > > >> > + bss_res.start = __pa_symbol(__bss_start); > >> > > >> > + bss_res.end = __pa_symbol(__bss_stop) - 1; > >> > > >> > + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > > >> > + > >> > > >> > + /* > >> > > >> > + * Start by adding the reserved regions, if they overlap > >> > > >> > + * with /memory regions, insert_resource later on will take > >> > > >> > + * care of it. > >> > > >> > + */ > >> > > >> > + for_each_memblock(reserved, region) { > >> > > >> > + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); > >> > > > > >> > > > Is there a specific reason to invoke memblock_alloc while iterating > >> > > > reserved regions ? > >> > > > memblock_alloc also adds calls memblock_reserve. So we are modifying > >> > > > the reserved region entries > >> > > > while iterating it. It resulted in below warning for rv32. > >> > > > > >> > > > [ 0.000000] ------------[ cut here ]------------ > >> > > > [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 > >> > > > __insert_resource+0x8e/0xd0 > >> > > > [ 0.000000] Modules linked in: > >> > > > [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted > >> > > > 5.10.0-00022-ge20097fb37e2-dirty #549 > >> > > > [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 > >> > > > [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 > >> > > > [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 > >> > > > [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 > >> > > > [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 > >> > > > [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 > >> > > > [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 > >> > > > [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c > >> > > > [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 > >> > > > [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff > >> > > > [ 0.000000] t5 : c1008e38 t6 : 00000001 > >> > > > [ 0.000000] status: 00000100 badaddr: 00000000 cause: 00000003 > >> > > > [ 0.000000] irq event stamp: 0 > >> > > > [ 0.000000] hardirqs last enabled at (0): [<00000000>] 0x0 > >> > > > [ 0.000000] hardirqs last disabled at (0): [<00000000>] 0x0 > >> > > > [ 0.000000] softirqs last enabled at (0): [<00000000>] 0x0 > >> > > > [ 0.000000] softirqs last disabled at (0): [<00000000>] 0x0 > >> > > > [ 0.000000] random: get_random_bytes called from __warn+0xd8/0x11e > >> > > > with crng_init=0 > >> > > > [ 0.000000] ---[ end trace 0000000000000000 ]--- > >> > > > [ 0.000000] Failed to add a Kernel code resource at 80402000 > >> > > > > >> > > > Here is the fix that works: > >> > > > --- a/arch/riscv/kernel/setup.c > >> > > > +++ b/arch/riscv/kernel/setup.c > >> > > > @@ -127,7 +127,9 @@ static void __init init_resources(void) > >> > > > { > >> > > > struct memblock_region *region = NULL; > >> > > > struct resource *res = NULL; > >> > > > - int ret = 0; > >> > > > + int ret = 0, i = 0; > >> > > > + int num_mem_res; > >> > > > + struct resource *mem_res; > >> > > > > >> > > > code_res.start = __pa_symbol(_text); > >> > > > code_res.end = __pa_symbol(_etext) - 1; > >> > > > @@ -145,16 +147,17 @@ static void __init init_resources(void) > >> > > > bss_res.end = __pa_symbol(__bss_stop) - 1; > >> > > > bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; > >> > > > > >> > > > + num_mem_res = memblock.memory.cnt + memblock.reserved.cnt; > >> > > > + mem_res = memblock_alloc(num_mem_res * sizeof(*mem_res), > >> > > > SMP_CACHE_BYTES); > >> > > > + if (!mem_res) > >> > > > + panic("%s: Failed to allocate %zu bytes\n", __func__, > >> > > > num_mem_res * sizeof(*mem_res)); > >> > > > /* > >> > > > * Start by adding the reserved regions, if they overlap > >> > > > * with /memory regions, insert_resource later on will take > >> > > > * care of it. > >> > > > */ > >> > > > for_each_reserved_mem_region(region) { > >> > > > - res = memblock_alloc(sizeof(struct resource), > >> > > > SMP_CACHE_BYTES); > >> > > > - if (!res) > >> > > > - panic("%s: Failed to allocate %zu bytes\n", > >> > > > __func__, > >> > > > - sizeof(struct resource)); > >> > > > + res = &mem_res[i++]; > >> > > > > >> > > > res->name = "Reserved"; > >> > > > res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > >> > > > @@ -181,11 +184,8 @@ static void __init init_resources(void) > >> > > > > >> > > > /* Add /memory regions to the resource tree */ > >> > > > for_each_mem_region(region) { > >> > > > - res = memblock_alloc(sizeof(struct resource), > >> > > > SMP_CACHE_BYTES); > >> > > > - if (!res) > >> > > > - panic("%s: Failed to allocate %zu bytes\n", > >> > > > __func__, > >> > > > - sizeof(struct resource)); > >> > > > > >> > > > + res = &mem_res[i++]; > >> > > > > >> > > > If this looks okay to you, I will send the patch. > >> > > > > >> > > > >> > > The problem is that we don't want to include all reserved regions within > >> > > /memory such as the device tree or initramfs, since they'll get modified > >> > > and/or freed later on. So pre-allocating resources for all reserved > >> > > regions doesn't seem the right thing to do. My goal here was to allocate > >> > > a resource on each itteration and free it if not needed / on failure, I > >> > > free the failed resource on failure but not if it's not needed as you > >> > > noted. > >> > > > >> > > As for the issue of memblock_alloc() calling memblock_reserve(), we > >> > > don't add any new reserved regions at this point directly through > >> > > memblock_reserve(), and memblock_reserve() will run on neighboring > >> > > regions that memblock_merge_regions() will later on merge together, so > >> > > it'll work on a single growing region instead of creating new ones. > >> > >> Forgot to add this: > >> The contiguous region memory allocation and merging happens for RV64 > >> but doesn't happen for RV32. > >> Here is a boot log with additional printk messages which clearly > >> indicate that memory regions are not contiguous. > >> > >> [ 0.000000] memblock_reserve: [0xc03fff80-0xc03fffff] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03fff40-0xc03fff5f] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 5 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03fff00-0xc03fff1f] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 6 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03ffec0-0xc03ffedf] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 7 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03ffe80-0xc03ffe9f] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 8 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03ffe40-0xc03ffe5f] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 9 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03ffe00-0xc03ffe1f] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 10 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03ffdc0-0xc03ffddf] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 11 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03ffd80-0xc03ffd9f] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 12 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03ffd40-0xc03ffd5f] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 13 > >> [ 0.000000] memblock_alloc_try_nid: 32 bytes align=0x40 nid=-1 > >> from=0x00000000 max_addr=0x00000000 setup_arch+0x19c/0x3fe > >> [ 0.000000] memblock_reserve: [0xc03ffd00-0xc03ffd1f] > >> memblock_alloc_range_nid+0xa8/0x112 > >> [ 0.000000] init_resources: resvd region memory.cnt 1 reserved.cnt > >> 14 > >> > >> I am looking into the memblock allocation code to find out if this is > >> expected. > >> > > > > Ahh found it. memblock_align is called with SMP_CACHE_BYTES (64) bytes > > alignment. > > Using a 32byte alignment solves the memory resource warning I was > > seeing on RV32 boot. > > > > I still prefer one time allocation before the loop instead of > > memblock_alloc invocation > > in every iteration (both mem_region & reserved_mem_region). This is > > happening during boot time. > > The less work it does, it is better. Let me know if you think there > > are any downsides to this approach. > > > > Otherwise, I will revise my patch and send it. > > > > ACK, we'll loose some memory for the reserved regions we'll ignore but > it's safer / simpler that way. > Thanks for debugging this, I guess we'll need a patch for the > SMP_CACHE_BYTES issue, we'll need a cache.h. > yeah. I think defining L1_CACHE_SHIFT to 5 for RV32 is sufficient. I will send both patches soon. > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 2c6dd3293..450f0142f 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -4,6 +4,8 @@ * Chen Liqin <liqin.chen@sunplusct.com> * Lennox Wu <lennox.wu@sunplusct.com> * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2020 FORTH-ICS/CARV + * Nick Kossifidis <mick@ics.forth.gr> */ #include <linux/init.h> @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata); unsigned long boot_cpu_hartid; static DEFINE_PER_CPU(struct cpu, cpu_devices); +/* + * Place kernel memory regions on the resource tree so that + * kexec-tools can retrieve them from /proc/iomem. While there + * also add "System RAM" regions for compatibility with other + * archs, and the rest of the known regions for completeness. + */ +static struct resource code_res = { .name = "Kernel code", }; +static struct resource data_res = { .name = "Kernel data", }; +static struct resource rodata_res = { .name = "Kernel rodata", }; +static struct resource bss_res = { .name = "Kernel bss", }; + +static int __init add_resource(struct resource *parent, + struct resource *res) +{ + int ret = 0; + + ret = insert_resource(parent, res); + if (ret < 0) { + pr_err("Failed to add a %s resource at %llx\n", + res->name, (unsigned long long) res->start); + return ret; + } + + return 1; +} + +static int __init add_kernel_resources(struct resource *res) +{ + int ret = 0; + + /* + * The memory region of the kernel image is continuous and + * was reserved on setup_bootmem, find it here and register + * it as a resource, then register the various segments of + * the image as child nodes + */ + if (!(res->start <= code_res.start && res->end >= data_res.end)) + return 0; + + res->name = "Kernel image"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + /* + * We removed a part of this region on setup_bootmem so + * we need to expand the resource for the bss to fit in. + */ + res->end = bss_res.end; + + ret = add_resource(&iomem_resource, res); + if (ret < 0) + return ret; + + ret = add_resource(res, &code_res); + if (ret < 0) + return ret; + + ret = add_resource(res, &rodata_res); + if (ret < 0) + return ret; + + ret = add_resource(res, &data_res); + if (ret < 0) + return ret; + + ret = add_resource(res, &bss_res); + + return ret; +} + +static void __init init_resources(void) +{ + struct memblock_region *region = NULL; + struct resource *res = NULL; + int ret = 0; + + code_res.start = __pa_symbol(_text); + code_res.end = __pa_symbol(_etext) - 1; + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + rodata_res.start = __pa_symbol(__start_rodata); + rodata_res.end = __pa_symbol(__end_rodata) - 1; + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + data_res.start = __pa_symbol(_data); + data_res.end = __pa_symbol(_edata) - 1; + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + bss_res.start = __pa_symbol(__bss_start); + bss_res.end = __pa_symbol(__bss_stop) - 1; + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + /* + * Start by adding the reserved regions, if they overlap + * with /memory regions, insert_resource later on will take + * care of it. + */ + for_each_memblock(reserved, region) { + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); + if (!res) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct resource)); + + res->name = "Reserved"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; + + ret = add_kernel_resources(res); + if (ret < 0) + goto error; + else if (ret) + continue; + + /* + * Ignore any other reserved regions within + * system memory. + */ + if (memblock_is_memory(res->start)) + continue; + + ret = add_resource(&iomem_resource, res); + if (ret < 0) + goto error; + } + + /* Add /memory regions to the resource tree */ + for_each_memblock(memory, region) { + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); + if (!res) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct resource)); + + if (unlikely(memblock_is_nomap(region))) { + res->name = "Reserved"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + } else { + res->name = "System RAM"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + } + + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + + ret = add_resource(&iomem_resource, res); + if (ret < 0) + goto error; + } + + return; + + error: + memblock_free((phys_addr_t) res, sizeof(struct resource)); + /* Better an empty resource tree than an inconsistent one */ + release_child_resources(&iomem_resource); +} + + void __init parse_dtb(void) { if (early_init_dt_scan(dtb_early_va)) @@ -73,6 +232,7 @@ void __init setup_arch(char **cmdline_p) setup_bootmem(); paging_init(); + init_resources(); #if IS_ENABLED(CONFIG_BUILTIN_DTB) unflatten_and_copy_device_tree(); #else diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f750e012d..05d6cf0c2 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -541,39 +541,12 @@ void mark_rodata_ro(void) } #endif -static void __init resource_init(void) -{ - struct memblock_region *region; - - for_each_memblock(memory, region) { - struct resource *res; - - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); - - if (memblock_is_nomap(region)) { - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - } else { - res->name = "System RAM"; - res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - } - res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); - res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - - request_resource(&iomem_resource, res); - } -} - void __init paging_init(void) { setup_vm_final(); sparse_init(); setup_zero_page(); zone_sizes_init(); - resource_init(); } #ifdef CONFIG_SPARSEMEM_VMEMMAP
This patch (previously part of my kexec/kdump series) populates /proc/iomem with the various sections of the kernel image. We need this for kexec-tools to be able to prepare the crashkernel image for kdump to work. Since resource tree initialization is not related to memory initialization I added the code to kernel/setup.c and removed the original code (derived from the arm64 tree) from mm/init.c. Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> --- arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++ arch/riscv/mm/init.c | 27 ------- 2 files changed, 160 insertions(+), 27 deletions(-)