diff mbox series

[v2] riscv: mm: add paging_check() before paging_init()

Message ID 20240725074155.20565-1-cuiyunhui@bytedance.com (mailing list archive)
State New
Headers show
Series [v2] riscv: mm: add paging_check() before paging_init() | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR fail PR summary
conchuod/vmtest-fixes-PR fail PR summary
conchuod/patch-1-test-1 fail .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-1-test-6 success .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh

Commit Message

Yunhui Cui July 25, 2024, 7:41 a.m. UTC
When establishing a linear mapping, the virtual address is obtained
through __va(). If the physical address is too large, such as 1TB, then
the virtual address will overflow in the address space of sv39.
The log is as follows:
[    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdf7ad8
[    0.000000] [000000d97fdf7ad8] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
[    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdfaff0
[    0.000000] [000000d97fdfaff0] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
...
[    0.000000] Insufficient stack space to handle exception!
[    0.000000] Task stack:     [0xffffffff81400000..0xffffffff81404000]
[    0.000000] Overflow stack: [0xffffffff80c67370..0xffffffff80c68370]
[    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
[    0.000000] epc : die_kernel_fault+0x158/0x1c8
[    0.000000]  ra : die_kernel_fault+0x12a/0x1c8
[    0.000000] epc : ffffffff808cde36 ra : ffffffff808cde08 sp : ffffffff813fff80
[    0.000000]  gp : ffffffff815a1678 tp : 0000000000000000 t0 : 0000003130386537
[    0.000000]  t1 : 0000000000000031 t2 : 6537666637303430 s0 : ffffffff813fffc0
[    0.000000]  s1 : ffffffff815b0b28 a0 : 0000000000000016 a1 : ffffffff81495298
[    0.000000]  a2 : 0000000000000010 a3 : ffffffff81495298 a4 : 00000000000001fe
[    0.000000]  a5 : 000000d97fdfa000 a6 : ffffffff814250d0 a7 : 0000000000000030
[    0.000000]  s2 : 000000d97fdfaff0 s3 : ffffffff81400040 s4 : 000000d97fdfaff0
[    0.000000]  s5 : ffffffff815a0ed0 s6 : 0000000000000000 s7 : 000000008f604390
[    0.000000]  s8 : 0000000000000000 s9 : ffffffffffffffff s10: 0000000000000000
[    0.000000]  s11: 0000000000000000 t3 : ffffffff815baa9b t4 : ffffffff815baa9b
[    0.000000]  t5 : ffffffff815baa88 t6 : ffffffff813ffda8
[    0.000000] status: 0000000200000100 badaddr: 000000d97fdfaff0 cause: 000000000000000d
[    0.000000] Kernel panic - not syncing: Kernel stack overflow
[    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
[    0.000000] Call Trace:
[    0.000000] [<ffffffff800066bc>] dump_backtrace+0x28/0x30
[    0.000000] [<ffffffff808cdac8>] show_stack+0x38/0x44
[    0.000000] [<ffffffff808d9d40>] dump_stack_lvl+0x44/0x5c
[    0.000000] [<ffffffff808d9d70>] dump_stack+0x18/0x20
[    0.000000] [<ffffffff808cdfb6>] panic+0x110/0x2f2
[    0.000000] [<ffffffff80006532>] walk_stackframe+0x0/0x120
[    0.000000] [<ffffffff808cde08>] die_kernel_fault+0x12a/0x1c8
[    0.000000] ---[ end Kernel panic - not syncing: Kernel stack overflow ]---

In other words, the maximum value of the physical address needs to meet
Documentation/riscv/vm-layout.rst to ensure that there is no overflow.
For sv48/57, the actual virtual address space is huge, so this problem
is generally not triggered, but it is also checked in the code.

We give a warning for the overflowed physical address region and reverve it
so that the kernel can bringup successfully.

Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
 arch/riscv/include/asm/page.h    |  9 +++++++++
 arch/riscv/include/asm/pgtable.h |  1 +
 arch/riscv/kernel/setup.c        |  1 +
 arch/riscv/mm/init.c             | 30 ++++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+)

Comments

Alexandre Ghiti Aug. 1, 2024, 8:01 a.m. UTC | #1
Hi Yunhui,

On Thu, Jul 25, 2024 at 9:42 AM Yunhui Cui <cuiyunhui@bytedance.com> wrote:
>
> When establishing a linear mapping, the virtual address is obtained
> through __va(). If the physical address is too large, such as 1TB, then
> the virtual address will overflow in the address space of sv39.
> The log is as follows:
> [    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdf7ad8
> [    0.000000] [000000d97fdf7ad8] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
> [    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdfaff0
> [    0.000000] [000000d97fdfaff0] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
> ...
> [    0.000000] Insufficient stack space to handle exception!
> [    0.000000] Task stack:     [0xffffffff81400000..0xffffffff81404000]
> [    0.000000] Overflow stack: [0xffffffff80c67370..0xffffffff80c68370]
> [    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
> [    0.000000] epc : die_kernel_fault+0x158/0x1c8
> [    0.000000]  ra : die_kernel_fault+0x12a/0x1c8
> [    0.000000] epc : ffffffff808cde36 ra : ffffffff808cde08 sp : ffffffff813fff80
> [    0.000000]  gp : ffffffff815a1678 tp : 0000000000000000 t0 : 0000003130386537
> [    0.000000]  t1 : 0000000000000031 t2 : 6537666637303430 s0 : ffffffff813fffc0
> [    0.000000]  s1 : ffffffff815b0b28 a0 : 0000000000000016 a1 : ffffffff81495298
> [    0.000000]  a2 : 0000000000000010 a3 : ffffffff81495298 a4 : 00000000000001fe
> [    0.000000]  a5 : 000000d97fdfa000 a6 : ffffffff814250d0 a7 : 0000000000000030
> [    0.000000]  s2 : 000000d97fdfaff0 s3 : ffffffff81400040 s4 : 000000d97fdfaff0
> [    0.000000]  s5 : ffffffff815a0ed0 s6 : 0000000000000000 s7 : 000000008f604390
> [    0.000000]  s8 : 0000000000000000 s9 : ffffffffffffffff s10: 0000000000000000
> [    0.000000]  s11: 0000000000000000 t3 : ffffffff815baa9b t4 : ffffffff815baa9b
> [    0.000000]  t5 : ffffffff815baa88 t6 : ffffffff813ffda8
> [    0.000000] status: 0000000200000100 badaddr: 000000d97fdfaff0 cause: 000000000000000d
> [    0.000000] Kernel panic - not syncing: Kernel stack overflow
> [    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
> [    0.000000] Call Trace:
> [    0.000000] [<ffffffff800066bc>] dump_backtrace+0x28/0x30
> [    0.000000] [<ffffffff808cdac8>] show_stack+0x38/0x44
> [    0.000000] [<ffffffff808d9d40>] dump_stack_lvl+0x44/0x5c
> [    0.000000] [<ffffffff808d9d70>] dump_stack+0x18/0x20
> [    0.000000] [<ffffffff808cdfb6>] panic+0x110/0x2f2
> [    0.000000] [<ffffffff80006532>] walk_stackframe+0x0/0x120
> [    0.000000] [<ffffffff808cde08>] die_kernel_fault+0x12a/0x1c8
> [    0.000000] ---[ end Kernel panic - not syncing: Kernel stack overflow ]---
>
> In other words, the maximum value of the physical address needs to meet
> Documentation/riscv/vm-layout.rst to ensure that there is no overflow.
> For sv48/57, the actual virtual address space is huge, so this problem
> is generally not triggered, but it is also checked in the code.
>
> We give a warning for the overflowed physical address region and reverve it
> so that the kernel can bringup successfully.
>
> Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
> ---
>  arch/riscv/include/asm/page.h    |  9 +++++++++
>  arch/riscv/include/asm/pgtable.h |  1 +
>  arch/riscv/kernel/setup.c        |  1 +
>  arch/riscv/mm/init.c             | 30 ++++++++++++++++++++++++++++++
>  4 files changed, 41 insertions(+)
>
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 57e887bfa34c..f6c0f6e14ecb 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -38,6 +38,15 @@
>   */
>  #define PAGE_OFFSET_L4         _AC(0xffffaf8000000000, UL)
>  #define PAGE_OFFSET_L3         _AC(0xffffffd800000000, UL)
> +
> +/*
> + * See vm-layout.rst, the size of L3 direct mapping of all physical
> + * memory 124GB, L4 is 64TB, L5 is 32PB.
> + */
> +#define MAX_PFN_MEM_ADDR_L5    (0x80000000000000ULL)
> +#define MAX_PFN_MEM_ADDR_L4    (0x400000000000ULL)
> +#define MAX_PFN_MEM_ADDR_L3    (0x1F00000000ULL)
> +
>  #else
>  #define PAGE_OFFSET            _AC(CONFIG_PAGE_OFFSET, UL)
>  #endif /* CONFIG_64BIT */
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index c8e8867c42f6..e4835de5a743 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -915,6 +915,7 @@ extern uintptr_t _dtb_early_pa;
>  #endif /* CONFIG_XIP_KERNEL */
>  extern u64 satp_mode;
>
> +void paging_check(void);
>  void paging_init(void);
>  void misc_mem_init(void);
>
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 2a79d4ed2660..366918578544 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -273,6 +273,7 @@ void __init setup_arch(char **cmdline_p)
>         parse_early_param();
>
>         efi_init();
> +       paging_check();
>         paging_init();
>
>         /* Parse the ACPI tables for possible boot-time configuration */
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index b0cc28f7595f..aa25dcf8a0ff 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -1482,6 +1482,36 @@ static void __init reserve_crashkernel(void)
>         crashk_res.end = crash_base + crash_size - 1;
>  }
>
> +static void __init phymem_addr_overflow(void)
> +{
> +       phys_addr_t end = memblock_end_of_DRAM();
> +
> +       if (pgtable_l5_enabled) {
> +               if (end > MAX_PFN_MEM_ADDR_L5) {
> +                       memblock_reserve(MAX_PFN_MEM_ADDR_L5, end - MAX_PFN_MEM_ADDR_L5);
> +                       WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> +                            end, MAX_PFN_MEM_ADDR_L5, end);
> +               }
> +       }
> +       if (pgtable_l4_enabled) {
> +               if (end > MAX_PFN_MEM_ADDR_L4) {
> +                       memblock_reserve(MAX_PFN_MEM_ADDR_L4, end - MAX_PFN_MEM_ADDR_L4);
> +                       WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> +                            end, MAX_PFN_MEM_ADDR_L4, end);
> +               }
> +       }
> +       if (end > MAX_PFN_MEM_ADDR_L3) {
> +               memblock_reserve(MAX_PFN_MEM_ADDR_L3, end - MAX_PFN_MEM_ADDR_L3);
> +               WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> +                    end, MAX_PFN_MEM_ADDR_L3, end);
> +       }
> +}
> +
> +void __init paging_check(void)
> +{
> +       phymem_addr_overflow();
> +}
> +
>  void __init paging_init(void)
>  {
>         setup_bootmem();
> --
> 2.39.2
>

So the following patch should fix your issue and was posted some time
ago https://lore.kernel.org/linux-riscv/bdfbed9b-ea04-4415-8416-d6e9d0a643a3@ghiti.fr/T/#me26a82cf32f46cae12e2ea8892a3bc16d4fc37e3.
I prefer this solution as it does not introduce a bunch of hardcoded
defines and relies on the already existing memblock API.

Let me know if that's not the case!

Thanks,

Alex
Yunhui Cui Aug. 3, 2024, 6:28 a.m. UTC | #2
Hi Alex,

On Thu, Aug 1, 2024 at 4:02 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Hi Yunhui,
>
> On Thu, Jul 25, 2024 at 9:42 AM Yunhui Cui <cuiyunhui@bytedance.com> wrote:
> >
> > When establishing a linear mapping, the virtual address is obtained
> > through __va(). If the physical address is too large, such as 1TB, then
> > the virtual address will overflow in the address space of sv39.
> > The log is as follows:
> > [    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdf7ad8
> > [    0.000000] [000000d97fdf7ad8] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
> > [    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdfaff0
> > [    0.000000] [000000d97fdfaff0] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
> > ...
> > [    0.000000] Insufficient stack space to handle exception!
> > [    0.000000] Task stack:     [0xffffffff81400000..0xffffffff81404000]
> > [    0.000000] Overflow stack: [0xffffffff80c67370..0xffffffff80c68370]
> > [    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
> > [    0.000000] epc : die_kernel_fault+0x158/0x1c8
> > [    0.000000]  ra : die_kernel_fault+0x12a/0x1c8
> > [    0.000000] epc : ffffffff808cde36 ra : ffffffff808cde08 sp : ffffffff813fff80
> > [    0.000000]  gp : ffffffff815a1678 tp : 0000000000000000 t0 : 0000003130386537
> > [    0.000000]  t1 : 0000000000000031 t2 : 6537666637303430 s0 : ffffffff813fffc0
> > [    0.000000]  s1 : ffffffff815b0b28 a0 : 0000000000000016 a1 : ffffffff81495298
> > [    0.000000]  a2 : 0000000000000010 a3 : ffffffff81495298 a4 : 00000000000001fe
> > [    0.000000]  a5 : 000000d97fdfa000 a6 : ffffffff814250d0 a7 : 0000000000000030
> > [    0.000000]  s2 : 000000d97fdfaff0 s3 : ffffffff81400040 s4 : 000000d97fdfaff0
> > [    0.000000]  s5 : ffffffff815a0ed0 s6 : 0000000000000000 s7 : 000000008f604390
> > [    0.000000]  s8 : 0000000000000000 s9 : ffffffffffffffff s10: 0000000000000000
> > [    0.000000]  s11: 0000000000000000 t3 : ffffffff815baa9b t4 : ffffffff815baa9b
> > [    0.000000]  t5 : ffffffff815baa88 t6 : ffffffff813ffda8
> > [    0.000000] status: 0000000200000100 badaddr: 000000d97fdfaff0 cause: 000000000000000d
> > [    0.000000] Kernel panic - not syncing: Kernel stack overflow
> > [    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
> > [    0.000000] Call Trace:
> > [    0.000000] [<ffffffff800066bc>] dump_backtrace+0x28/0x30
> > [    0.000000] [<ffffffff808cdac8>] show_stack+0x38/0x44
> > [    0.000000] [<ffffffff808d9d40>] dump_stack_lvl+0x44/0x5c
> > [    0.000000] [<ffffffff808d9d70>] dump_stack+0x18/0x20
> > [    0.000000] [<ffffffff808cdfb6>] panic+0x110/0x2f2
> > [    0.000000] [<ffffffff80006532>] walk_stackframe+0x0/0x120
> > [    0.000000] [<ffffffff808cde08>] die_kernel_fault+0x12a/0x1c8
> > [    0.000000] ---[ end Kernel panic - not syncing: Kernel stack overflow ]---
> >
> > In other words, the maximum value of the physical address needs to meet
> > Documentation/riscv/vm-layout.rst to ensure that there is no overflow.
> > For sv48/57, the actual virtual address space is huge, so this problem
> > is generally not triggered, but it is also checked in the code.
> >
> > We give a warning for the overflowed physical address region and reverve it
> > so that the kernel can bringup successfully.
> >
> > Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
> > ---
> >  arch/riscv/include/asm/page.h    |  9 +++++++++
> >  arch/riscv/include/asm/pgtable.h |  1 +
> >  arch/riscv/kernel/setup.c        |  1 +
> >  arch/riscv/mm/init.c             | 30 ++++++++++++++++++++++++++++++
> >  4 files changed, 41 insertions(+)
> >
> > diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> > index 57e887bfa34c..f6c0f6e14ecb 100644
> > --- a/arch/riscv/include/asm/page.h
> > +++ b/arch/riscv/include/asm/page.h
> > @@ -38,6 +38,15 @@
> >   */
> >  #define PAGE_OFFSET_L4         _AC(0xffffaf8000000000, UL)
> >  #define PAGE_OFFSET_L3         _AC(0xffffffd800000000, UL)
> > +
> > +/*
> > + * See vm-layout.rst, the size of L3 direct mapping of all physical
> > + * memory 124GB, L4 is 64TB, L5 is 32PB.
> > + */
> > +#define MAX_PFN_MEM_ADDR_L5    (0x80000000000000ULL)
> > +#define MAX_PFN_MEM_ADDR_L4    (0x400000000000ULL)
> > +#define MAX_PFN_MEM_ADDR_L3    (0x1F00000000ULL)
> > +
> >  #else
> >  #define PAGE_OFFSET            _AC(CONFIG_PAGE_OFFSET, UL)
> >  #endif /* CONFIG_64BIT */
> > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> > index c8e8867c42f6..e4835de5a743 100644
> > --- a/arch/riscv/include/asm/pgtable.h
> > +++ b/arch/riscv/include/asm/pgtable.h
> > @@ -915,6 +915,7 @@ extern uintptr_t _dtb_early_pa;
> >  #endif /* CONFIG_XIP_KERNEL */
> >  extern u64 satp_mode;
> >
> > +void paging_check(void);
> >  void paging_init(void);
> >  void misc_mem_init(void);
> >
> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > index 2a79d4ed2660..366918578544 100644
> > --- a/arch/riscv/kernel/setup.c
> > +++ b/arch/riscv/kernel/setup.c
> > @@ -273,6 +273,7 @@ void __init setup_arch(char **cmdline_p)
> >         parse_early_param();
> >
> >         efi_init();
> > +       paging_check();
> >         paging_init();
> >
> >         /* Parse the ACPI tables for possible boot-time configuration */
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index b0cc28f7595f..aa25dcf8a0ff 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -1482,6 +1482,36 @@ static void __init reserve_crashkernel(void)
> >         crashk_res.end = crash_base + crash_size - 1;
> >  }
> >
> > +static void __init phymem_addr_overflow(void)
> > +{
> > +       phys_addr_t end = memblock_end_of_DRAM();
> > +
> > +       if (pgtable_l5_enabled) {
> > +               if (end > MAX_PFN_MEM_ADDR_L5) {
> > +                       memblock_reserve(MAX_PFN_MEM_ADDR_L5, end - MAX_PFN_MEM_ADDR_L5);
> > +                       WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> > +                            end, MAX_PFN_MEM_ADDR_L5, end);
> > +               }
> > +       }
> > +       if (pgtable_l4_enabled) {
> > +               if (end > MAX_PFN_MEM_ADDR_L4) {
> > +                       memblock_reserve(MAX_PFN_MEM_ADDR_L4, end - MAX_PFN_MEM_ADDR_L4);
> > +                       WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> > +                            end, MAX_PFN_MEM_ADDR_L4, end);
> > +               }
> > +       }
> > +       if (end > MAX_PFN_MEM_ADDR_L3) {
> > +               memblock_reserve(MAX_PFN_MEM_ADDR_L3, end - MAX_PFN_MEM_ADDR_L3);
> > +               WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> > +                    end, MAX_PFN_MEM_ADDR_L3, end);
> > +       }
> > +}
> > +
> > +void __init paging_check(void)
> > +{
> > +       phymem_addr_overflow();
> > +}
> > +
> >  void __init paging_init(void)
> >  {
> >         setup_bootmem();
> > --
> > 2.39.2
> >
>
> So the following patch should fix your issue and was posted some time
> ago https://lore.kernel.org/linux-riscv/bdfbed9b-ea04-4415-8416-d6e9d0a643a3@ghiti.fr/T/#me26a82cf32f46cae12e2ea8892a3bc16d4fc37e3.
> I prefer this solution as it does not introduce a bunch of hardcoded
> defines and relies on the already existing memblock API.
>
> Let me know if that's not the case!
I understand that there is no problem with the functionality of this
patch, but since the actual physical memory is lost, we need a Warning
message to avoid confusion about memory disappearance for no reason.

>
> Thanks,
>
> Alex

Thanks,
Yunhui
Alexandre Ghiti Aug. 13, 2024, 2:21 p.m. UTC | #3
Hi Yunhui,

Sorry I was off last week.

On Sat, Aug 3, 2024 at 8:28 AM yunhui cui <cuiyunhui@bytedance.com> wrote:
>
> Hi Alex,
>
> On Thu, Aug 1, 2024 at 4:02 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > Hi Yunhui,
> >
> > On Thu, Jul 25, 2024 at 9:42 AM Yunhui Cui <cuiyunhui@bytedance.com> wrote:
> > >
> > > When establishing a linear mapping, the virtual address is obtained
> > > through __va(). If the physical address is too large, such as 1TB, then
> > > the virtual address will overflow in the address space of sv39.
> > > The log is as follows:
> > > [    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdf7ad8
> > > [    0.000000] [000000d97fdf7ad8] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
> > > [    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdfaff0
> > > [    0.000000] [000000d97fdfaff0] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
> > > ...
> > > [    0.000000] Insufficient stack space to handle exception!
> > > [    0.000000] Task stack:     [0xffffffff81400000..0xffffffff81404000]
> > > [    0.000000] Overflow stack: [0xffffffff80c67370..0xffffffff80c68370]
> > > [    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
> > > [    0.000000] epc : die_kernel_fault+0x158/0x1c8
> > > [    0.000000]  ra : die_kernel_fault+0x12a/0x1c8
> > > [    0.000000] epc : ffffffff808cde36 ra : ffffffff808cde08 sp : ffffffff813fff80
> > > [    0.000000]  gp : ffffffff815a1678 tp : 0000000000000000 t0 : 0000003130386537
> > > [    0.000000]  t1 : 0000000000000031 t2 : 6537666637303430 s0 : ffffffff813fffc0
> > > [    0.000000]  s1 : ffffffff815b0b28 a0 : 0000000000000016 a1 : ffffffff81495298
> > > [    0.000000]  a2 : 0000000000000010 a3 : ffffffff81495298 a4 : 00000000000001fe
> > > [    0.000000]  a5 : 000000d97fdfa000 a6 : ffffffff814250d0 a7 : 0000000000000030
> > > [    0.000000]  s2 : 000000d97fdfaff0 s3 : ffffffff81400040 s4 : 000000d97fdfaff0
> > > [    0.000000]  s5 : ffffffff815a0ed0 s6 : 0000000000000000 s7 : 000000008f604390
> > > [    0.000000]  s8 : 0000000000000000 s9 : ffffffffffffffff s10: 0000000000000000
> > > [    0.000000]  s11: 0000000000000000 t3 : ffffffff815baa9b t4 : ffffffff815baa9b
> > > [    0.000000]  t5 : ffffffff815baa88 t6 : ffffffff813ffda8
> > > [    0.000000] status: 0000000200000100 badaddr: 000000d97fdfaff0 cause: 000000000000000d
> > > [    0.000000] Kernel panic - not syncing: Kernel stack overflow
> > > [    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
> > > [    0.000000] Call Trace:
> > > [    0.000000] [<ffffffff800066bc>] dump_backtrace+0x28/0x30
> > > [    0.000000] [<ffffffff808cdac8>] show_stack+0x38/0x44
> > > [    0.000000] [<ffffffff808d9d40>] dump_stack_lvl+0x44/0x5c
> > > [    0.000000] [<ffffffff808d9d70>] dump_stack+0x18/0x20
> > > [    0.000000] [<ffffffff808cdfb6>] panic+0x110/0x2f2
> > > [    0.000000] [<ffffffff80006532>] walk_stackframe+0x0/0x120
> > > [    0.000000] [<ffffffff808cde08>] die_kernel_fault+0x12a/0x1c8
> > > [    0.000000] ---[ end Kernel panic - not syncing: Kernel stack overflow ]---
> > >
> > > In other words, the maximum value of the physical address needs to meet
> > > Documentation/riscv/vm-layout.rst to ensure that there is no overflow.
> > > For sv48/57, the actual virtual address space is huge, so this problem
> > > is generally not triggered, but it is also checked in the code.
> > >
> > > We give a warning for the overflowed physical address region and reverve it
> > > so that the kernel can bringup successfully.
> > >
> > > Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
> > > ---
> > >  arch/riscv/include/asm/page.h    |  9 +++++++++
> > >  arch/riscv/include/asm/pgtable.h |  1 +
> > >  arch/riscv/kernel/setup.c        |  1 +
> > >  arch/riscv/mm/init.c             | 30 ++++++++++++++++++++++++++++++
> > >  4 files changed, 41 insertions(+)
> > >
> > > diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> > > index 57e887bfa34c..f6c0f6e14ecb 100644
> > > --- a/arch/riscv/include/asm/page.h
> > > +++ b/arch/riscv/include/asm/page.h
> > > @@ -38,6 +38,15 @@
> > >   */
> > >  #define PAGE_OFFSET_L4         _AC(0xffffaf8000000000, UL)
> > >  #define PAGE_OFFSET_L3         _AC(0xffffffd800000000, UL)
> > > +
> > > +/*
> > > + * See vm-layout.rst, the size of L3 direct mapping of all physical
> > > + * memory 124GB, L4 is 64TB, L5 is 32PB.
> > > + */
> > > +#define MAX_PFN_MEM_ADDR_L5    (0x80000000000000ULL)
> > > +#define MAX_PFN_MEM_ADDR_L4    (0x400000000000ULL)
> > > +#define MAX_PFN_MEM_ADDR_L3    (0x1F00000000ULL)
> > > +
> > >  #else
> > >  #define PAGE_OFFSET            _AC(CONFIG_PAGE_OFFSET, UL)
> > >  #endif /* CONFIG_64BIT */
> > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> > > index c8e8867c42f6..e4835de5a743 100644
> > > --- a/arch/riscv/include/asm/pgtable.h
> > > +++ b/arch/riscv/include/asm/pgtable.h
> > > @@ -915,6 +915,7 @@ extern uintptr_t _dtb_early_pa;
> > >  #endif /* CONFIG_XIP_KERNEL */
> > >  extern u64 satp_mode;
> > >
> > > +void paging_check(void);
> > >  void paging_init(void);
> > >  void misc_mem_init(void);
> > >
> > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > index 2a79d4ed2660..366918578544 100644
> > > --- a/arch/riscv/kernel/setup.c
> > > +++ b/arch/riscv/kernel/setup.c
> > > @@ -273,6 +273,7 @@ void __init setup_arch(char **cmdline_p)
> > >         parse_early_param();
> > >
> > >         efi_init();
> > > +       paging_check();
> > >         paging_init();
> > >
> > >         /* Parse the ACPI tables for possible boot-time configuration */
> > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > index b0cc28f7595f..aa25dcf8a0ff 100644
> > > --- a/arch/riscv/mm/init.c
> > > +++ b/arch/riscv/mm/init.c
> > > @@ -1482,6 +1482,36 @@ static void __init reserve_crashkernel(void)
> > >         crashk_res.end = crash_base + crash_size - 1;
> > >  }
> > >
> > > +static void __init phymem_addr_overflow(void)
> > > +{
> > > +       phys_addr_t end = memblock_end_of_DRAM();
> > > +
> > > +       if (pgtable_l5_enabled) {
> > > +               if (end > MAX_PFN_MEM_ADDR_L5) {
> > > +                       memblock_reserve(MAX_PFN_MEM_ADDR_L5, end - MAX_PFN_MEM_ADDR_L5);
> > > +                       WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> > > +                            end, MAX_PFN_MEM_ADDR_L5, end);
> > > +               }
> > > +       }
> > > +       if (pgtable_l4_enabled) {
> > > +               if (end > MAX_PFN_MEM_ADDR_L4) {
> > > +                       memblock_reserve(MAX_PFN_MEM_ADDR_L4, end - MAX_PFN_MEM_ADDR_L4);
> > > +                       WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> > > +                            end, MAX_PFN_MEM_ADDR_L4, end);
> > > +               }
> > > +       }
> > > +       if (end > MAX_PFN_MEM_ADDR_L3) {
> > > +               memblock_reserve(MAX_PFN_MEM_ADDR_L3, end - MAX_PFN_MEM_ADDR_L3);
> > > +               WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> > > +                    end, MAX_PFN_MEM_ADDR_L3, end);
> > > +       }
> > > +}
> > > +
> > > +void __init paging_check(void)
> > > +{
> > > +       phymem_addr_overflow();
> > > +}
> > > +
> > >  void __init paging_init(void)
> > >  {
> > >         setup_bootmem();
> > > --
> > > 2.39.2
> > >
> >
> > So the following patch should fix your issue and was posted some time
> > ago https://lore.kernel.org/linux-riscv/bdfbed9b-ea04-4415-8416-d6e9d0a643a3@ghiti.fr/T/#me26a82cf32f46cae12e2ea8892a3bc16d4fc37e3.
> > I prefer this solution as it does not introduce a bunch of hardcoded
> > defines and relies on the already existing memblock API.
> >
> > Let me know if that's not the case!
> I understand that there is no problem with the functionality of this
> patch, but since the actual physical memory is lost, we need a Warning
> message to avoid confusion about memory disappearance for no reason.

Yes, you're right, we should advertise people when something like this
happens. What do you think of the following instead?

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index de8a608eec1a..29c8e321eadc 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -254,8 +254,11 @@ static void __init setup_bootmem(void)
         */
        if (IS_ENABLED(CONFIG_64BIT)) {
                max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE;
-               memblock_cap_memory_range(phys_ram_base,
-                                         max_mapped_addr - phys_ram_base);
+               if (memblock_end_of_DRAM() > max_mapped_addr) {
+                       memblock_cap_memory_range(phys_ram_base,
+                                                 max_mapped_addr -
phys_ram_base);
+                       pr_warn("Physical memory overflows the linear
mapping size: region above 0x%llx removed", max_mapped_addr);
+               }
        }

        /*

Thanks,

Alex

>
> >
> > Thanks,
> >
> > Alex
>
> Thanks,
> Yunhui
Yunhui Cui Aug. 14, 2024, 4:15 a.m. UTC | #4
Hi Alex,

On Tue, Aug 13, 2024 at 10:21 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Hi Yunhui,
>
> Sorry I was off last week.
>
> On Sat, Aug 3, 2024 at 8:28 AM yunhui cui <cuiyunhui@bytedance.com> wrote:
> >
> > Hi Alex,
> >
> > On Thu, Aug 1, 2024 at 4:02 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > Hi Yunhui,
> > >
> > > On Thu, Jul 25, 2024 at 9:42 AM Yunhui Cui <cuiyunhui@bytedance.com> wrote:
> > > >
> > > > When establishing a linear mapping, the virtual address is obtained
> > > > through __va(). If the physical address is too large, such as 1TB, then
> > > > the virtual address will overflow in the address space of sv39.
> > > > The log is as follows:
> > > > [    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdf7ad8
> > > > [    0.000000] [000000d97fdf7ad8] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
> > > > [    0.000000] Unable to handle kernel paging request at virtual address 000000d97fdfaff0
> > > > [    0.000000] [000000d97fdfaff0] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801
> > > > ...
> > > > [    0.000000] Insufficient stack space to handle exception!
> > > > [    0.000000] Task stack:     [0xffffffff81400000..0xffffffff81404000]
> > > > [    0.000000] Overflow stack: [0xffffffff80c67370..0xffffffff80c68370]
> > > > [    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
> > > > [    0.000000] epc : die_kernel_fault+0x158/0x1c8
> > > > [    0.000000]  ra : die_kernel_fault+0x12a/0x1c8
> > > > [    0.000000] epc : ffffffff808cde36 ra : ffffffff808cde08 sp : ffffffff813fff80
> > > > [    0.000000]  gp : ffffffff815a1678 tp : 0000000000000000 t0 : 0000003130386537
> > > > [    0.000000]  t1 : 0000000000000031 t2 : 6537666637303430 s0 : ffffffff813fffc0
> > > > [    0.000000]  s1 : ffffffff815b0b28 a0 : 0000000000000016 a1 : ffffffff81495298
> > > > [    0.000000]  a2 : 0000000000000010 a3 : ffffffff81495298 a4 : 00000000000001fe
> > > > [    0.000000]  a5 : 000000d97fdfa000 a6 : ffffffff814250d0 a7 : 0000000000000030
> > > > [    0.000000]  s2 : 000000d97fdfaff0 s3 : ffffffff81400040 s4 : 000000d97fdfaff0
> > > > [    0.000000]  s5 : ffffffff815a0ed0 s6 : 0000000000000000 s7 : 000000008f604390
> > > > [    0.000000]  s8 : 0000000000000000 s9 : ffffffffffffffff s10: 0000000000000000
> > > > [    0.000000]  s11: 0000000000000000 t3 : ffffffff815baa9b t4 : ffffffff815baa9b
> > > > [    0.000000]  t5 : ffffffff815baa88 t6 : ffffffff813ffda8
> > > > [    0.000000] status: 0000000200000100 badaddr: 000000d97fdfaff0 cause: 000000000000000d
> > > > [    0.000000] Kernel panic - not syncing: Kernel stack overflow
> > > > [    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W          6.6.3-00133-g60497fad461d-dirty #71
> > > > [    0.000000] Call Trace:
> > > > [    0.000000] [<ffffffff800066bc>] dump_backtrace+0x28/0x30
> > > > [    0.000000] [<ffffffff808cdac8>] show_stack+0x38/0x44
> > > > [    0.000000] [<ffffffff808d9d40>] dump_stack_lvl+0x44/0x5c
> > > > [    0.000000] [<ffffffff808d9d70>] dump_stack+0x18/0x20
> > > > [    0.000000] [<ffffffff808cdfb6>] panic+0x110/0x2f2
> > > > [    0.000000] [<ffffffff80006532>] walk_stackframe+0x0/0x120
> > > > [    0.000000] [<ffffffff808cde08>] die_kernel_fault+0x12a/0x1c8
> > > > [    0.000000] ---[ end Kernel panic - not syncing: Kernel stack overflow ]---
> > > >
> > > > In other words, the maximum value of the physical address needs to meet
> > > > Documentation/riscv/vm-layout.rst to ensure that there is no overflow.
> > > > For sv48/57, the actual virtual address space is huge, so this problem
> > > > is generally not triggered, but it is also checked in the code.
> > > >
> > > > We give a warning for the overflowed physical address region and reverve it
> > > > so that the kernel can bringup successfully.
> > > >
> > > > Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
> > > > ---
> > > >  arch/riscv/include/asm/page.h    |  9 +++++++++
> > > >  arch/riscv/include/asm/pgtable.h |  1 +
> > > >  arch/riscv/kernel/setup.c        |  1 +
> > > >  arch/riscv/mm/init.c             | 30 ++++++++++++++++++++++++++++++
> > > >  4 files changed, 41 insertions(+)
> > > >
> > > > diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> > > > index 57e887bfa34c..f6c0f6e14ecb 100644
> > > > --- a/arch/riscv/include/asm/page.h
> > > > +++ b/arch/riscv/include/asm/page.h
> > > > @@ -38,6 +38,15 @@
> > > >   */
> > > >  #define PAGE_OFFSET_L4         _AC(0xffffaf8000000000, UL)
> > > >  #define PAGE_OFFSET_L3         _AC(0xffffffd800000000, UL)
> > > > +
> > > > +/*
> > > > + * See vm-layout.rst, the size of L3 direct mapping of all physical
> > > > + * memory 124GB, L4 is 64TB, L5 is 32PB.
> > > > + */
> > > > +#define MAX_PFN_MEM_ADDR_L5    (0x80000000000000ULL)
> > > > +#define MAX_PFN_MEM_ADDR_L4    (0x400000000000ULL)
> > > > +#define MAX_PFN_MEM_ADDR_L3    (0x1F00000000ULL)
> > > > +
> > > >  #else
> > > >  #define PAGE_OFFSET            _AC(CONFIG_PAGE_OFFSET, UL)
> > > >  #endif /* CONFIG_64BIT */
> > > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> > > > index c8e8867c42f6..e4835de5a743 100644
> > > > --- a/arch/riscv/include/asm/pgtable.h
> > > > +++ b/arch/riscv/include/asm/pgtable.h
> > > > @@ -915,6 +915,7 @@ extern uintptr_t _dtb_early_pa;
> > > >  #endif /* CONFIG_XIP_KERNEL */
> > > >  extern u64 satp_mode;
> > > >
> > > > +void paging_check(void);
> > > >  void paging_init(void);
> > > >  void misc_mem_init(void);
> > > >
> > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > index 2a79d4ed2660..366918578544 100644
> > > > --- a/arch/riscv/kernel/setup.c
> > > > +++ b/arch/riscv/kernel/setup.c
> > > > @@ -273,6 +273,7 @@ void __init setup_arch(char **cmdline_p)
> > > >         parse_early_param();
> > > >
> > > >         efi_init();
> > > > +       paging_check();
> > > >         paging_init();
> > > >
> > > >         /* Parse the ACPI tables for possible boot-time configuration */
> > > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > > index b0cc28f7595f..aa25dcf8a0ff 100644
> > > > --- a/arch/riscv/mm/init.c
> > > > +++ b/arch/riscv/mm/init.c
> > > > @@ -1482,6 +1482,36 @@ static void __init reserve_crashkernel(void)
> > > >         crashk_res.end = crash_base + crash_size - 1;
> > > >  }
> > > >
> > > > +static void __init phymem_addr_overflow(void)
> > > > +{
> > > > +       phys_addr_t end = memblock_end_of_DRAM();
> > > > +
> > > > +       if (pgtable_l5_enabled) {
> > > > +               if (end > MAX_PFN_MEM_ADDR_L5) {
> > > > +                       memblock_reserve(MAX_PFN_MEM_ADDR_L5, end - MAX_PFN_MEM_ADDR_L5);
> > > > +                       WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> > > > +                            end, MAX_PFN_MEM_ADDR_L5, end);
> > > > +               }
> > > > +       }
> > > > +       if (pgtable_l4_enabled) {
> > > > +               if (end > MAX_PFN_MEM_ADDR_L4) {
> > > > +                       memblock_reserve(MAX_PFN_MEM_ADDR_L4, end - MAX_PFN_MEM_ADDR_L4);
> > > > +                       WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> > > > +                            end, MAX_PFN_MEM_ADDR_L4, end);
> > > > +               }
> > > > +       }
> > > > +       if (end > MAX_PFN_MEM_ADDR_L3) {
> > > > +               memblock_reserve(MAX_PFN_MEM_ADDR_L3, end - MAX_PFN_MEM_ADDR_L3);
> > > > +               WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
> > > > +                    end, MAX_PFN_MEM_ADDR_L3, end);
> > > > +       }
> > > > +}
> > > > +
> > > > +void __init paging_check(void)
> > > > +{
> > > > +       phymem_addr_overflow();
> > > > +}
> > > > +
> > > >  void __init paging_init(void)
> > > >  {
> > > >         setup_bootmem();
> > > > --
> > > > 2.39.2
> > > >
> > >
> > > So the following patch should fix your issue and was posted some time
> > > ago https://lore.kernel.org/linux-riscv/bdfbed9b-ea04-4415-8416-d6e9d0a643a3@ghiti.fr/T/#me26a82cf32f46cae12e2ea8892a3bc16d4fc37e3.
> > > I prefer this solution as it does not introduce a bunch of hardcoded
> > > defines and relies on the already existing memblock API.
> > >
> > > Let me know if that's not the case!
> > I understand that there is no problem with the functionality of this
> > patch, but since the actual physical memory is lost, we need a Warning
> > message to avoid confusion about memory disappearance for no reason.
>
> Yes, you're right, we should advertise people when something like this
> happens. What do you think of the following instead?
>
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index de8a608eec1a..29c8e321eadc 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -254,8 +254,11 @@ static void __init setup_bootmem(void)
>          */
>         if (IS_ENABLED(CONFIG_64BIT)) {
>                 max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE;
> -               memblock_cap_memory_range(phys_ram_base,
> -                                         max_mapped_addr - phys_ram_base);
> +               if (memblock_end_of_DRAM() > max_mapped_addr) {
> +                       memblock_cap_memory_range(phys_ram_base,
> +                                                 max_mapped_addr -
> phys_ram_base);
> +                       pr_warn("Physical memory overflows the linear
> mapping size: region above 0x%llx removed", max_mapped_addr);
> +               }
Yeah, but it's better to be more eye-catching, All right, I'll update
this patch.

>         }
>
>         /*
>
> Thanks,
>
> Alex
>
> >
> > >
> > > Thanks,
> > >
> > > Alex
> >
> > Thanks,
> > Yunhui

Thanks,
Yunhui
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 57e887bfa34c..f6c0f6e14ecb 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -38,6 +38,15 @@ 
  */
 #define PAGE_OFFSET_L4		_AC(0xffffaf8000000000, UL)
 #define PAGE_OFFSET_L3		_AC(0xffffffd800000000, UL)
+
+/*
+ * See vm-layout.rst, the size of L3 direct mapping of all physical
+ * memory 124GB, L4 is 64TB, L5 is 32PB.
+ */
+#define MAX_PFN_MEM_ADDR_L5	(0x80000000000000ULL)
+#define MAX_PFN_MEM_ADDR_L4	(0x400000000000ULL)
+#define MAX_PFN_MEM_ADDR_L3	(0x1F00000000ULL)
+
 #else
 #define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
 #endif /* CONFIG_64BIT */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index c8e8867c42f6..e4835de5a743 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -915,6 +915,7 @@  extern uintptr_t _dtb_early_pa;
 #endif /* CONFIG_XIP_KERNEL */
 extern u64 satp_mode;
 
+void paging_check(void);
 void paging_init(void);
 void misc_mem_init(void);
 
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 2a79d4ed2660..366918578544 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -273,6 +273,7 @@  void __init setup_arch(char **cmdline_p)
 	parse_early_param();
 
 	efi_init();
+	paging_check();
 	paging_init();
 
 	/* Parse the ACPI tables for possible boot-time configuration */
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b0cc28f7595f..aa25dcf8a0ff 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1482,6 +1482,36 @@  static void __init reserve_crashkernel(void)
 	crashk_res.end = crash_base + crash_size - 1;
 }
 
+static void __init phymem_addr_overflow(void)
+{
+	phys_addr_t end = memblock_end_of_DRAM();
+
+	if (pgtable_l5_enabled) {
+		if (end > MAX_PFN_MEM_ADDR_L5) {
+			memblock_reserve(MAX_PFN_MEM_ADDR_L5, end - MAX_PFN_MEM_ADDR_L5);
+			WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
+			     end, MAX_PFN_MEM_ADDR_L5, end);
+		}
+	}
+	if (pgtable_l4_enabled) {
+		if (end > MAX_PFN_MEM_ADDR_L4) {
+			memblock_reserve(MAX_PFN_MEM_ADDR_L4, end - MAX_PFN_MEM_ADDR_L4);
+			WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
+			     end, MAX_PFN_MEM_ADDR_L4, end);
+		}
+	}
+	if (end > MAX_PFN_MEM_ADDR_L3) {
+		memblock_reserve(MAX_PFN_MEM_ADDR_L3, end - MAX_PFN_MEM_ADDR_L3);
+		WARN(true, "Phymem addr 0x%llx overflowed, reserve [0x%llx-0x%llx] directly.",
+		     end, MAX_PFN_MEM_ADDR_L3, end);
+	}
+}
+
+void __init paging_check(void)
+{
+	phymem_addr_overflow();
+}
+
 void __init paging_init(void)
 {
 	setup_bootmem();