diff mbox series

[v2,16/19] arm64: kasan: Reduce minimum shadow alignment and enable 5 level paging

Message ID 20221124123932.2648991-17-ardb@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: Enable LPA2 support for 4k and 16k pages | expand

Commit Message

Ard Biesheuvel Nov. 24, 2022, 12:39 p.m. UTC
Allow the KASAN init code to deal with 5 levels of paging, and relax the
requirement that the shadow region is aligned to the top level pgd_t
size. This is necessary for LPA2 based 52-bit virtual addressing, where
the KASAN shadow will never be aligned to the pgd_t size. Allowing this
also enables the 16k/48-bit case for KASAN, which is a nice bonus.

This involves some hackery to manipulate the root and next level page
tables without having to distinguish all the various configurations,
including 16k/48-bits (which has a two entry pgd_t level), and LPA2
configurations running with one translation level less on non-LPA2
hardware.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/Kconfig         |   2 +-
 arch/arm64/mm/kasan_init.c | 124 ++++++++++++++++++--
 2 files changed, 112 insertions(+), 14 deletions(-)

Comments

Ard Biesheuvel Nov. 24, 2022, 5:44 p.m. UTC | #1
On Thu, 24 Nov 2022 at 13:40, Ard Biesheuvel <ardb@kernel.org> wrote:
>
> Allow the KASAN init code to deal with 5 levels of paging, and relax the
> requirement that the shadow region is aligned to the top level pgd_t
> size. This is necessary for LPA2 based 52-bit virtual addressing, where
> the KASAN shadow will never be aligned to the pgd_t size. Allowing this
> also enables the 16k/48-bit case for KASAN, which is a nice bonus.
>
> This involves some hackery to manipulate the root and next level page
> tables without having to distinguish all the various configurations,
> including 16k/48-bits (which has a two entry pgd_t level), and LPA2
> configurations running with one translation level less on non-LPA2
> hardware.
>

This patch is not entirely correct: to safely allow the start of the
kasan shadow region to be misaligned wrt the top level block size, we
need to install a next level table that covers it before we map the
early shadow, or otherwise, we may end up mapping parts of the linear
map into the zero shadow page tables.

I have a fix that I will incorporate the next time around.

> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>  arch/arm64/Kconfig         |   2 +-
>  arch/arm64/mm/kasan_init.c | 124 ++++++++++++++++++--
>  2 files changed, 112 insertions(+), 14 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 6d299c6c0a56..901f4d73476d 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -153,7 +153,7 @@ config ARM64
>         select HAVE_ARCH_HUGE_VMAP
>         select HAVE_ARCH_JUMP_LABEL
>         select HAVE_ARCH_JUMP_LABEL_RELATIVE
> -       select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
> +       select HAVE_ARCH_KASAN
>         select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
>         select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
>         select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
> index 7e32f21fb8e1..c422952e439b 100644
> --- a/arch/arm64/mm/kasan_init.c
> +++ b/arch/arm64/mm/kasan_init.c
> @@ -23,7 +23,7 @@
>
>  #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
>
> -static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
> +static pgd_t tmp_pg_dir[PTRS_PER_PTE] __initdata __aligned(PAGE_SIZE);
>
>  /*
>   * The p*d_populate functions call virt_to_phys implicitly so they can't be used
> @@ -99,6 +99,19 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node,
>         return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr);
>  }
>
> +static p4d_t *__init kasan_p4d_offset(pgd_t *pgdp, unsigned long addr, int node,
> +                                     bool early)
> +{
> +       if (pgd_none(READ_ONCE(*pgdp))) {
> +               phys_addr_t p4d_phys = early ?
> +                               __pa_symbol(kasan_early_shadow_p4d)
> +                                       : kasan_alloc_zeroed_page(node);
> +               __pgd_populate(pgdp, p4d_phys, PGD_TYPE_TABLE);
> +       }
> +
> +       return early ? p4d_offset_kimg(pgdp, addr) : p4d_offset(pgdp, addr);
> +}
> +
>  static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
>                                       unsigned long end, int node, bool early)
>  {
> @@ -144,7 +157,7 @@ static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
>                                       unsigned long end, int node, bool early)
>  {
>         unsigned long next;
> -       p4d_t *p4dp = p4d_offset(pgdp, addr);
> +       p4d_t *p4dp = kasan_p4d_offset(pgdp, addr, node, early);
>
>         do {
>                 next = p4d_addr_end(addr, end);
> @@ -165,14 +178,20 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
>         } while (pgdp++, addr = next, addr != end);
>  }
>
> +#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS > 4
> +#define SHADOW_ALIGN   P4D_SIZE
> +#else
> +#define SHADOW_ALIGN   PUD_SIZE
> +#endif
> +
>  /* The early shadow maps everything to a single page of zeroes */
>  asmlinkage void __init kasan_early_init(void)
>  {
>         BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
>                 KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
> -       BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE));
> -       BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE));
> -       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
> +       BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), SHADOW_ALIGN));
> +       BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), SHADOW_ALIGN));
> +       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, SHADOW_ALIGN));
>         kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE,
>                            true);
>  }
> @@ -184,20 +203,86 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
>         kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
>  }
>
> -static void __init clear_pgds(unsigned long start,
> -                       unsigned long end)
> +/*
> + * Return whether 'addr' is aligned to the size covered by a top level
> + * descriptor.
> + */
> +static bool __init top_level_aligned(u64 addr)
> +{
> +       int shift = (VA_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3);
> +
> +       return (addr % (PAGE_SIZE << shift)) == 0;
> +}
> +
> +/*
> + * Return the descriptor index of 'addr' in the top level table
> + */
> +static int __init top_level_idx(u64 addr)
>  {
>         /*
> -        * Remove references to kasan page tables from
> -        * swapper_pg_dir. pgd_clear() can't be used
> -        * here because it's nop on 2,3-level pagetable setups
> +        * On 64k pages, the TTBR1 range root tables are extended for 52-bit
> +        * virtual addressing, and TTBR1 will simply point to the pgd_t entry
> +        * that covers the start of the 48-bit addressable VA space if LVA is
> +        * not implemented. This means we need to index the table as usual,
> +        * instead of masking off bits based on vabits_actual.
>          */
> -       for (; start < end; start += PGDIR_SIZE)
> -               set_pgd(pgd_offset_k(start), __pgd(0));
> +       u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS
> +                                                       : vabits_actual;
> +       int shift = (VA_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3);
> +
> +       return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT);
> +}
> +
> +/*
> + * Clone a next level table from swapper_pg_dir into tmp_pg_dir
> + */
> +static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud)
> +{
> +       int idx = top_level_idx(addr);
> +       pgd_t pgd = READ_ONCE(swapper_pg_dir[idx]);
> +       pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd));
> +
> +       memcpy(pud, pudp, PAGE_SIZE);
> +       tmp_pg_dir[idx] = __pgd(__phys_to_pgd_val(__pa_symbol(pud)) |
> +                               PUD_TYPE_TABLE);
> +}
> +
> +/*
> + * Return the descriptor index of 'addr' in the next level table
> + */
> +static int __init next_level_idx(u64 addr)
> +{
> +       int shift = (VA_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3);
> +
> +       return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE;
> +}
> +
> +/*
> + * Dereference the table descriptor at 'pgd_idx' and clear the entries from
> + * 'start' to 'end' from the table.
> + */
> +static void __init clear_next_level(int pgd_idx, int start, int end)
> +{
> +       pgd_t pgd = READ_ONCE(swapper_pg_dir[pgd_idx]);
> +       pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd));
> +
> +       memset(&pudp[start], 0, (end - start) * sizeof(pud_t));
> +}
> +
> +static void __init clear_shadow(u64 start, u64 end)
> +{
> +       int l = top_level_idx(start), m = top_level_idx(end);
> +
> +       if (!top_level_aligned(start))
> +               clear_next_level(l++, next_level_idx(start), PTRS_PER_PTE - 1);
> +       if (!top_level_aligned(end))
> +               clear_next_level(m, 0, next_level_idx(end));
> +       memset(&swapper_pg_dir[l], 0, (m - l) * sizeof(pgd_t));
>  }
>
>  static void __init kasan_init_shadow(void)
>  {
> +       static pud_t pud[2][PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
>         u64 kimg_shadow_start, kimg_shadow_end;
>         u64 mod_shadow_start, mod_shadow_end;
>         u64 vmalloc_shadow_end;
> @@ -220,10 +305,23 @@ static void __init kasan_init_shadow(void)
>          * setup will be finished.
>          */
>         memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> +
> +       /*
> +        * If the start or end address of the shadow region is not aligned to
> +        * the top level size, we have to allocate a temporary next-level table
> +        * in each case, clone the next level of descriptors, and install the
> +        * table into tmp_pg_dir. Note that with 5 levels of paging, the next
> +        * level will in fact be p4d_t, but that makes no difference in this
> +        * case.
> +        */
> +       if (!top_level_aligned(KASAN_SHADOW_START))
> +               clone_next_level(KASAN_SHADOW_START, tmp_pg_dir, pud[0]);
> +       if (!top_level_aligned(KASAN_SHADOW_END))
> +               clone_next_level(KASAN_SHADOW_END, tmp_pg_dir, pud[1]);
>         dsb(ishst);
>         cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
>
> -       clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
> +       clear_shadow(KASAN_SHADOW_START, KASAN_SHADOW_END);
>
>         kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
>                            early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START))));
> --
> 2.38.1.584.g0f3c55d4c2-goog
>
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6d299c6c0a56..901f4d73476d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -153,7 +153,7 @@  config ARM64
 	select HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
-	select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
+	select HAVE_ARCH_KASAN
 	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
 	select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
 	select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 7e32f21fb8e1..c422952e439b 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -23,7 +23,7 @@ 
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
-static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+static pgd_t tmp_pg_dir[PTRS_PER_PTE] __initdata __aligned(PAGE_SIZE);
 
 /*
  * The p*d_populate functions call virt_to_phys implicitly so they can't be used
@@ -99,6 +99,19 @@  static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node,
 	return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr);
 }
 
+static p4d_t *__init kasan_p4d_offset(pgd_t *pgdp, unsigned long addr, int node,
+				      bool early)
+{
+	if (pgd_none(READ_ONCE(*pgdp))) {
+		phys_addr_t p4d_phys = early ?
+				__pa_symbol(kasan_early_shadow_p4d)
+					: kasan_alloc_zeroed_page(node);
+		__pgd_populate(pgdp, p4d_phys, PGD_TYPE_TABLE);
+	}
+
+	return early ? p4d_offset_kimg(pgdp, addr) : p4d_offset(pgdp, addr);
+}
+
 static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
 				      unsigned long end, int node, bool early)
 {
@@ -144,7 +157,7 @@  static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
 				      unsigned long end, int node, bool early)
 {
 	unsigned long next;
-	p4d_t *p4dp = p4d_offset(pgdp, addr);
+	p4d_t *p4dp = kasan_p4d_offset(pgdp, addr, node, early);
 
 	do {
 		next = p4d_addr_end(addr, end);
@@ -165,14 +178,20 @@  static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
 	} while (pgdp++, addr = next, addr != end);
 }
 
+#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS > 4
+#define SHADOW_ALIGN	P4D_SIZE
+#else
+#define SHADOW_ALIGN	PUD_SIZE
+#endif
+
 /* The early shadow maps everything to a single page of zeroes */
 asmlinkage void __init kasan_early_init(void)
 {
 	BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
 		KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
-	BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE));
-	BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE));
-	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+	BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), SHADOW_ALIGN));
+	BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), SHADOW_ALIGN));
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, SHADOW_ALIGN));
 	kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE,
 			   true);
 }
@@ -184,20 +203,86 @@  static void __init kasan_map_populate(unsigned long start, unsigned long end,
 	kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
 }
 
-static void __init clear_pgds(unsigned long start,
-			unsigned long end)
+/*
+ * Return whether 'addr' is aligned to the size covered by a top level
+ * descriptor.
+ */
+static bool __init top_level_aligned(u64 addr)
+{
+	int shift = (VA_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3);
+
+	return (addr % (PAGE_SIZE << shift)) == 0;
+}
+
+/*
+ * Return the descriptor index of 'addr' in the top level table
+ */
+static int __init top_level_idx(u64 addr)
 {
 	/*
-	 * Remove references to kasan page tables from
-	 * swapper_pg_dir. pgd_clear() can't be used
-	 * here because it's nop on 2,3-level pagetable setups
+	 * On 64k pages, the TTBR1 range root tables are extended for 52-bit
+	 * virtual addressing, and TTBR1 will simply point to the pgd_t entry
+	 * that covers the start of the 48-bit addressable VA space if LVA is
+	 * not implemented. This means we need to index the table as usual,
+	 * instead of masking off bits based on vabits_actual.
 	 */
-	for (; start < end; start += PGDIR_SIZE)
-		set_pgd(pgd_offset_k(start), __pgd(0));
+	u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS
+							: vabits_actual;
+	int shift = (VA_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3);
+
+	return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT);
+}
+
+/*
+ * Clone a next level table from swapper_pg_dir into tmp_pg_dir
+ */
+static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud)
+{
+	int idx = top_level_idx(addr);
+	pgd_t pgd = READ_ONCE(swapper_pg_dir[idx]);
+	pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd));
+
+	memcpy(pud, pudp, PAGE_SIZE);
+	tmp_pg_dir[idx] = __pgd(__phys_to_pgd_val(__pa_symbol(pud)) |
+				PUD_TYPE_TABLE);
+}
+
+/*
+ * Return the descriptor index of 'addr' in the next level table
+ */
+static int __init next_level_idx(u64 addr)
+{
+	int shift = (VA_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3);
+
+	return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE;
+}
+
+/*
+ * Dereference the table descriptor at 'pgd_idx' and clear the entries from
+ * 'start' to 'end' from the table.
+ */
+static void __init clear_next_level(int pgd_idx, int start, int end)
+{
+	pgd_t pgd = READ_ONCE(swapper_pg_dir[pgd_idx]);
+	pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd));
+
+	memset(&pudp[start], 0, (end - start) * sizeof(pud_t));
+}
+
+static void __init clear_shadow(u64 start, u64 end)
+{
+	int l = top_level_idx(start), m = top_level_idx(end);
+
+	if (!top_level_aligned(start))
+		clear_next_level(l++, next_level_idx(start), PTRS_PER_PTE - 1);
+	if (!top_level_aligned(end))
+		clear_next_level(m, 0, next_level_idx(end));
+	memset(&swapper_pg_dir[l], 0, (m - l) * sizeof(pgd_t));
 }
 
 static void __init kasan_init_shadow(void)
 {
+	static pud_t pud[2][PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
 	u64 kimg_shadow_start, kimg_shadow_end;
 	u64 mod_shadow_start, mod_shadow_end;
 	u64 vmalloc_shadow_end;
@@ -220,10 +305,23 @@  static void __init kasan_init_shadow(void)
 	 * setup will be finished.
 	 */
 	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+
+	/*
+	 * If the start or end address of the shadow region is not aligned to
+	 * the top level size, we have to allocate a temporary next-level table
+	 * in each case, clone the next level of descriptors, and install the
+	 * table into tmp_pg_dir. Note that with 5 levels of paging, the next
+	 * level will in fact be p4d_t, but that makes no difference in this
+	 * case.
+	 */
+	if (!top_level_aligned(KASAN_SHADOW_START))
+		clone_next_level(KASAN_SHADOW_START, tmp_pg_dir, pud[0]);
+	if (!top_level_aligned(KASAN_SHADOW_END))
+		clone_next_level(KASAN_SHADOW_END, tmp_pg_dir, pud[1]);
 	dsb(ishst);
 	cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
 
-	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+	clear_shadow(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
 	kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
 			   early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START))));