diff mbox series

[v4] riscv: mm: Fix the out of bound issue of vmemmap address

Message ID 20241209122617.53341-1-luxu.kernel@bytedance.com (mailing list archive)
State New
Headers show
Series [v4] riscv: mm: Fix the out of bound issue of vmemmap address | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh took 169.27s
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh took 2050.85s
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh took 2388.40s
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh took 66.02s
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh took 67.44s
conchuod/patch-1-test-6 warning .github/scripts/patches/tests/checkpatch.sh took 0.78s
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh took 40.16s
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh took 0.00s
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh took 0.49s
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh took 0.01s
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh took 0.02s
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh took 0.02s

Commit Message

Xu Lu Dec. 9, 2024, 12:26 p.m. UTC
In sparse vmemmap model, the virtual address of vmemmap is calculated as:
((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)).
And the struct page's va can be calculated with an offset:
(vmemmap + (pfn)).

However, when initializing struct pages, kernel actually starts from the
first page from the same section that phys_ram_base belongs to. If the
first page's physical address is not (phys_ram_base >> PAGE_SHIFT), then
we get an va below VMEMMAP_START when calculating va for it's struct page.

For example, if phys_ram_base starts from 0x82000000 with pfn 0x82000, the
first page in the same section is actually pfn 0x80000. During
init_unavailable_range(), we will initialize struct page for pfn 0x80000
with virtual address ((struct page *)VMEMMAP_START - 0x2000), which is
below VMEMMAP_START as well as PCI_IO_END.

This commit fixes this bug by introducing a new variable
'vmemmap_start_pfn' which is aligned with memory section size and using
it to calculate vmemmap address instead of phys_ram_base.

Fixes: a11dd49dcb93 ("riscv: Sparse-Memory/vmemmap out-of-bounds fix")
Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
 arch/riscv/include/asm/page.h    |  1 +
 arch/riscv/include/asm/pgtable.h |  2 +-
 arch/riscv/mm/init.c             | 17 ++++++++++++++++-
 3 files changed, 18 insertions(+), 2 deletions(-)

Comments

Alexandre Ghiti Dec. 9, 2024, 1:06 p.m. UTC | #1
Hi Xu,

On Mon, Dec 9, 2024 at 1:26 PM Xu Lu <luxu.kernel@bytedance.com> wrote:
>
> In sparse vmemmap model, the virtual address of vmemmap is calculated as:
> ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)).
> And the struct page's va can be calculated with an offset:
> (vmemmap + (pfn)).
>
> However, when initializing struct pages, kernel actually starts from the
> first page from the same section that phys_ram_base belongs to. If the
> first page's physical address is not (phys_ram_base >> PAGE_SHIFT), then
> we get an va below VMEMMAP_START when calculating va for it's struct page.
>
> For example, if phys_ram_base starts from 0x82000000 with pfn 0x82000, the
> first page in the same section is actually pfn 0x80000. During
> init_unavailable_range(), we will initialize struct page for pfn 0x80000
> with virtual address ((struct page *)VMEMMAP_START - 0x2000), which is
> below VMEMMAP_START as well as PCI_IO_END.
>
> This commit fixes this bug by introducing a new variable
> 'vmemmap_start_pfn' which is aligned with memory section size and using
> it to calculate vmemmap address instead of phys_ram_base.
>
> Fixes: a11dd49dcb93 ("riscv: Sparse-Memory/vmemmap out-of-bounds fix")
> Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> ---
>  arch/riscv/include/asm/page.h    |  1 +
>  arch/riscv/include/asm/pgtable.h |  2 +-
>  arch/riscv/mm/init.c             | 17 ++++++++++++++++-
>  3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 71aabc5c6713..125f5ecd9565 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -122,6 +122,7 @@ struct kernel_mapping {
>
>  extern struct kernel_mapping kernel_map;
>  extern phys_addr_t phys_ram_base;
> +extern unsigned long vmemmap_start_pfn;
>
>  #define is_kernel_mapping(x)   \
>         ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index d4e99eef90ac..050fdc49b5ad 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -87,7 +87,7 @@
>   * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
>   * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
>   */
> -#define vmemmap                ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
> +#define vmemmap                ((struct page *)VMEMMAP_START - vmemmap_start_pfn)
>
>  #define PCI_IO_SIZE      SZ_16M
>  #define PCI_IO_END       VMEMMAP_START
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 0e8c20adcd98..d93271cb97b1 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -33,6 +33,7 @@
>  #include <asm/pgtable.h>
>  #include <asm/sections.h>
>  #include <asm/soc.h>
> +#include <asm/sparsemem.h>
>  #include <asm/tlbflush.h>
>
>  #include "../kernel/head.h"
> @@ -62,6 +63,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled);
>  phys_addr_t phys_ram_base __ro_after_init;
>  EXPORT_SYMBOL(phys_ram_base);
>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> +#define VMEMMAP_ADDR_ALIGN     (1ULL << SECTION_SIZE_BITS)
> +
> +unsigned long vmemmap_start_pfn __ro_after_init;
> +EXPORT_SYMBOL(vmemmap_start_pfn);
> +#endif
> +
>  unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
>                                                         __page_aligned_bss;
>  EXPORT_SYMBOL(empty_zero_page);
> @@ -240,8 +248,12 @@ static void __init setup_bootmem(void)
>          * Make sure we align the start of the memory on a PMD boundary so that
>          * at worst, we map the linear mapping with PMD mappings.
>          */
> -       if (!IS_ENABLED(CONFIG_XIP_KERNEL))
> +       if (!IS_ENABLED(CONFIG_XIP_KERNEL)) {
>                 phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> +               vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
> +#endif
> +       }
>
>         /*
>          * In 64-bit, any use of __va/__pa before this point is wrong as we
> @@ -1101,6 +1113,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>         kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
>
>         phys_ram_base = CONFIG_PHYS_RAM_BASE;
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> +       vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
> +#endif
>         kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
>         kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
>
> --
> 2.20.1
>

Thanks for the multiple revisions!

It looks good to me, so:

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>

Thanks again,

Alex
Björn Töpel Dec. 9, 2024, 1:10 p.m. UTC | #2
Xu Lu <luxu.kernel@bytedance.com> writes:

> In sparse vmemmap model, the virtual address of vmemmap is calculated as:
> ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)).
> And the struct page's va can be calculated with an offset:
> (vmemmap + (pfn)).
>
> However, when initializing struct pages, kernel actually starts from the
> first page from the same section that phys_ram_base belongs to. If the
> first page's physical address is not (phys_ram_base >> PAGE_SHIFT), then
> we get an va below VMEMMAP_START when calculating va for it's struct page.
>
> For example, if phys_ram_base starts from 0x82000000 with pfn 0x82000, the
> first page in the same section is actually pfn 0x80000. During
> init_unavailable_range(), we will initialize struct page for pfn 0x80000
> with virtual address ((struct page *)VMEMMAP_START - 0x2000), which is
> below VMEMMAP_START as well as PCI_IO_END.
>
> This commit fixes this bug by introducing a new variable
> 'vmemmap_start_pfn' which is aligned with memory section size and using
> it to calculate vmemmap address instead of phys_ram_base.
>
> Fixes: a11dd49dcb93 ("riscv: Sparse-Memory/vmemmap out-of-bounds fix")
> Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>

Verified that the kernel is not trying to access vmemmap/struct pages
below VMEMMAP_START.

Tested-by: Björn Töpel <bjorn@rivosinc.com>
Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 71aabc5c6713..125f5ecd9565 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -122,6 +122,7 @@  struct kernel_mapping {
 
 extern struct kernel_mapping kernel_map;
 extern phys_addr_t phys_ram_base;
+extern unsigned long vmemmap_start_pfn;
 
 #define is_kernel_mapping(x)	\
 	((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index d4e99eef90ac..050fdc49b5ad 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -87,7 +87,7 @@ 
  * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
  * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
  */
-#define vmemmap		((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
+#define vmemmap		((struct page *)VMEMMAP_START - vmemmap_start_pfn)
 
 #define PCI_IO_SIZE      SZ_16M
 #define PCI_IO_END       VMEMMAP_START
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 0e8c20adcd98..d93271cb97b1 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -33,6 +33,7 @@ 
 #include <asm/pgtable.h>
 #include <asm/sections.h>
 #include <asm/soc.h>
+#include <asm/sparsemem.h>
 #include <asm/tlbflush.h>
 
 #include "../kernel/head.h"
@@ -62,6 +63,13 @@  EXPORT_SYMBOL(pgtable_l5_enabled);
 phys_addr_t phys_ram_base __ro_after_init;
 EXPORT_SYMBOL(phys_ram_base);
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#define VMEMMAP_ADDR_ALIGN	(1ULL << SECTION_SIZE_BITS)
+
+unsigned long vmemmap_start_pfn __ro_after_init;
+EXPORT_SYMBOL(vmemmap_start_pfn);
+#endif
+
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
 							__page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
@@ -240,8 +248,12 @@  static void __init setup_bootmem(void)
 	 * Make sure we align the start of the memory on a PMD boundary so that
 	 * at worst, we map the linear mapping with PMD mappings.
 	 */
-	if (!IS_ENABLED(CONFIG_XIP_KERNEL))
+	if (!IS_ENABLED(CONFIG_XIP_KERNEL)) {
 		phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+		vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
+#endif
+	}
 
 	/*
 	 * In 64-bit, any use of __va/__pa before this point is wrong as we
@@ -1101,6 +1113,9 @@  asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
 
 	phys_ram_base = CONFIG_PHYS_RAM_BASE;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
+#endif
 	kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
 	kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);