Message ID | 20221124123932.2648991-17-ardb@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Enable LPA2 support for 4k and 16k pages | expand |
On Thu, 24 Nov 2022 at 13:40, Ard Biesheuvel <ardb@kernel.org> wrote: > > Allow the KASAN init code to deal with 5 levels of paging, and relax the > requirement that the shadow region is aligned to the top level pgd_t > size. This is necessary for LPA2 based 52-bit virtual addressing, where > the KASAN shadow will never be aligned to the pgd_t size. Allowing this > also enables the 16k/48-bit case for KASAN, which is a nice bonus. > > This involves some hackery to manipulate the root and next level page > tables without having to distinguish all the various configurations, > including 16k/48-bits (which has a two entry pgd_t level), and LPA2 > configurations running with one translation level less on non-LPA2 > hardware. > This patch is not entirely correct: to safely allow the start of the kasan shadow region to be misaligned wrt the top level block size, we need to install a next level table that covers it before we map the early shadow, or otherwise, we may end up mapping parts of the linear map into the zero shadow page tables. I have a fix that I will incorporate the next time around. > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > --- > arch/arm64/Kconfig | 2 +- > arch/arm64/mm/kasan_init.c | 124 ++++++++++++++++++-- > 2 files changed, 112 insertions(+), 14 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 6d299c6c0a56..901f4d73476d 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -153,7 +153,7 @@ config ARM64 > select HAVE_ARCH_HUGE_VMAP > select HAVE_ARCH_JUMP_LABEL > select HAVE_ARCH_JUMP_LABEL_RELATIVE > - select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) > + select HAVE_ARCH_KASAN > select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN > select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN > select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) > diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c > index 7e32f21fb8e1..c422952e439b 100644 > --- a/arch/arm64/mm/kasan_init.c > +++ b/arch/arm64/mm/kasan_init.c > @@ -23,7 +23,7 @@ > > #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) > > -static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); > +static pgd_t tmp_pg_dir[PTRS_PER_PTE] __initdata __aligned(PAGE_SIZE); > > /* > * The p*d_populate functions call virt_to_phys implicitly so they can't be used > @@ -99,6 +99,19 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, > return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr); > } > > +static p4d_t *__init kasan_p4d_offset(pgd_t *pgdp, unsigned long addr, int node, > + bool early) > +{ > + if (pgd_none(READ_ONCE(*pgdp))) { > + phys_addr_t p4d_phys = early ? > + __pa_symbol(kasan_early_shadow_p4d) > + : kasan_alloc_zeroed_page(node); > + __pgd_populate(pgdp, p4d_phys, PGD_TYPE_TABLE); > + } > + > + return early ? p4d_offset_kimg(pgdp, addr) : p4d_offset(pgdp, addr); > +} > + > static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, > unsigned long end, int node, bool early) > { > @@ -144,7 +157,7 @@ static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr, > unsigned long end, int node, bool early) > { > unsigned long next; > - p4d_t *p4dp = p4d_offset(pgdp, addr); > + p4d_t *p4dp = kasan_p4d_offset(pgdp, addr, node, early); > > do { > next = p4d_addr_end(addr, end); > @@ -165,14 +178,20 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, > } while (pgdp++, addr = next, addr != end); > } > > +#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS > 4 > +#define SHADOW_ALIGN P4D_SIZE > +#else > +#define SHADOW_ALIGN PUD_SIZE > +#endif > + > /* The early shadow maps everything to a single page of zeroes */ > asmlinkage void __init kasan_early_init(void) > { > BUILD_BUG_ON(KASAN_SHADOW_OFFSET != > KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); > - BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE)); > - BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE)); > - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); > + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), SHADOW_ALIGN)); > + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), SHADOW_ALIGN)); > + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, SHADOW_ALIGN)); > kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, > true); > } > @@ -184,20 +203,86 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end, > kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false); > } > > -static void __init clear_pgds(unsigned long start, > - unsigned long end) > +/* > + * Return whether 'addr' is aligned to the size covered by a top level > + * descriptor. > + */ > +static bool __init top_level_aligned(u64 addr) > +{ > + int shift = (VA_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3); > + > + return (addr % (PAGE_SIZE << shift)) == 0; > +} > + > +/* > + * Return the descriptor index of 'addr' in the top level table > + */ > +static int __init top_level_idx(u64 addr) > { > /* > - * Remove references to kasan page tables from > - * swapper_pg_dir. pgd_clear() can't be used > - * here because it's nop on 2,3-level pagetable setups > + * On 64k pages, the TTBR1 range root tables are extended for 52-bit > + * virtual addressing, and TTBR1 will simply point to the pgd_t entry > + * that covers the start of the 48-bit addressable VA space if LVA is > + * not implemented. This means we need to index the table as usual, > + * instead of masking off bits based on vabits_actual. > */ > - for (; start < end; start += PGDIR_SIZE) > - set_pgd(pgd_offset_k(start), __pgd(0)); > + u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS > + : vabits_actual; > + int shift = (VA_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3); > + > + return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT); > +} > + > +/* > + * Clone a next level table from swapper_pg_dir into tmp_pg_dir > + */ > +static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud) > +{ > + int idx = top_level_idx(addr); > + pgd_t pgd = READ_ONCE(swapper_pg_dir[idx]); > + pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd)); > + > + memcpy(pud, pudp, PAGE_SIZE); > + tmp_pg_dir[idx] = __pgd(__phys_to_pgd_val(__pa_symbol(pud)) | > + PUD_TYPE_TABLE); > +} > + > +/* > + * Return the descriptor index of 'addr' in the next level table > + */ > +static int __init next_level_idx(u64 addr) > +{ > + int shift = (VA_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3); > + > + return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE; > +} > + > +/* > + * Dereference the table descriptor at 'pgd_idx' and clear the entries from > + * 'start' to 'end' from the table. > + */ > +static void __init clear_next_level(int pgd_idx, int start, int end) > +{ > + pgd_t pgd = READ_ONCE(swapper_pg_dir[pgd_idx]); > + pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd)); > + > + memset(&pudp[start], 0, (end - start) * sizeof(pud_t)); > +} > + > +static void __init clear_shadow(u64 start, u64 end) > +{ > + int l = top_level_idx(start), m = top_level_idx(end); > + > + if (!top_level_aligned(start)) > + clear_next_level(l++, next_level_idx(start), PTRS_PER_PTE - 1); > + if (!top_level_aligned(end)) > + clear_next_level(m, 0, next_level_idx(end)); > + memset(&swapper_pg_dir[l], 0, (m - l) * sizeof(pgd_t)); > } > > static void __init kasan_init_shadow(void) > { > + static pud_t pud[2][PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); > u64 kimg_shadow_start, kimg_shadow_end; > u64 mod_shadow_start, mod_shadow_end; > u64 vmalloc_shadow_end; > @@ -220,10 +305,23 @@ static void __init kasan_init_shadow(void) > * setup will be finished. > */ > memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); > + > + /* > + * If the start or end address of the shadow region is not aligned to > + * the top level size, we have to allocate a temporary next-level table > + * in each case, clone the next level of descriptors, and install the > + * table into tmp_pg_dir. Note that with 5 levels of paging, the next > + * level will in fact be p4d_t, but that makes no difference in this > + * case. > + */ > + if (!top_level_aligned(KASAN_SHADOW_START)) > + clone_next_level(KASAN_SHADOW_START, tmp_pg_dir, pud[0]); > + if (!top_level_aligned(KASAN_SHADOW_END)) > + clone_next_level(KASAN_SHADOW_END, tmp_pg_dir, pud[1]); > dsb(ishst); > cpu_replace_ttbr1(lm_alias(tmp_pg_dir)); > > - clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); > + clear_shadow(KASAN_SHADOW_START, KASAN_SHADOW_END); > > kasan_map_populate(kimg_shadow_start, kimg_shadow_end, > early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START)))); > -- > 2.38.1.584.g0f3c55d4c2-goog >
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d299c6c0a56..901f4d73476d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -153,7 +153,7 @@ config ARM64 select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE - select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) + select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 7e32f21fb8e1..c422952e439b 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -23,7 +23,7 @@ #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) -static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); +static pgd_t tmp_pg_dir[PTRS_PER_PTE] __initdata __aligned(PAGE_SIZE); /* * The p*d_populate functions call virt_to_phys implicitly so they can't be used @@ -99,6 +99,19 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr); } +static p4d_t *__init kasan_p4d_offset(pgd_t *pgdp, unsigned long addr, int node, + bool early) +{ + if (pgd_none(READ_ONCE(*pgdp))) { + phys_addr_t p4d_phys = early ? + __pa_symbol(kasan_early_shadow_p4d) + : kasan_alloc_zeroed_page(node); + __pgd_populate(pgdp, p4d_phys, PGD_TYPE_TABLE); + } + + return early ? p4d_offset_kimg(pgdp, addr) : p4d_offset(pgdp, addr); +} + static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, unsigned long end, int node, bool early) { @@ -144,7 +157,7 @@ static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr, unsigned long end, int node, bool early) { unsigned long next; - p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t *p4dp = kasan_p4d_offset(pgdp, addr, node, early); do { next = p4d_addr_end(addr, end); @@ -165,14 +178,20 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, } while (pgdp++, addr = next, addr != end); } +#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS > 4 +#define SHADOW_ALIGN P4D_SIZE +#else +#define SHADOW_ALIGN PUD_SIZE +#endif + /* The early shadow maps everything to a single page of zeroes */ asmlinkage void __init kasan_early_init(void) { BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); - BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), SHADOW_ALIGN)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), SHADOW_ALIGN)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, SHADOW_ALIGN)); kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); } @@ -184,20 +203,86 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end, kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false); } -static void __init clear_pgds(unsigned long start, - unsigned long end) +/* + * Return whether 'addr' is aligned to the size covered by a top level + * descriptor. + */ +static bool __init top_level_aligned(u64 addr) +{ + int shift = (VA_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3); + + return (addr % (PAGE_SIZE << shift)) == 0; +} + +/* + * Return the descriptor index of 'addr' in the top level table + */ +static int __init top_level_idx(u64 addr) { /* - * Remove references to kasan page tables from - * swapper_pg_dir. pgd_clear() can't be used - * here because it's nop on 2,3-level pagetable setups + * On 64k pages, the TTBR1 range root tables are extended for 52-bit + * virtual addressing, and TTBR1 will simply point to the pgd_t entry + * that covers the start of the 48-bit addressable VA space if LVA is + * not implemented. This means we need to index the table as usual, + * instead of masking off bits based on vabits_actual. */ - for (; start < end; start += PGDIR_SIZE) - set_pgd(pgd_offset_k(start), __pgd(0)); + u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS + : vabits_actual; + int shift = (VA_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3); + + return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT); +} + +/* + * Clone a next level table from swapper_pg_dir into tmp_pg_dir + */ +static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud) +{ + int idx = top_level_idx(addr); + pgd_t pgd = READ_ONCE(swapper_pg_dir[idx]); + pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd)); + + memcpy(pud, pudp, PAGE_SIZE); + tmp_pg_dir[idx] = __pgd(__phys_to_pgd_val(__pa_symbol(pud)) | + PUD_TYPE_TABLE); +} + +/* + * Return the descriptor index of 'addr' in the next level table + */ +static int __init next_level_idx(u64 addr) +{ + int shift = (VA_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3); + + return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE; +} + +/* + * Dereference the table descriptor at 'pgd_idx' and clear the entries from + * 'start' to 'end' from the table. + */ +static void __init clear_next_level(int pgd_idx, int start, int end) +{ + pgd_t pgd = READ_ONCE(swapper_pg_dir[pgd_idx]); + pud_t *pudp = (pud_t *)__phys_to_kimg(__pgd_to_phys(pgd)); + + memset(&pudp[start], 0, (end - start) * sizeof(pud_t)); +} + +static void __init clear_shadow(u64 start, u64 end) +{ + int l = top_level_idx(start), m = top_level_idx(end); + + if (!top_level_aligned(start)) + clear_next_level(l++, next_level_idx(start), PTRS_PER_PTE - 1); + if (!top_level_aligned(end)) + clear_next_level(m, 0, next_level_idx(end)); + memset(&swapper_pg_dir[l], 0, (m - l) * sizeof(pgd_t)); } static void __init kasan_init_shadow(void) { + static pud_t pud[2][PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); u64 kimg_shadow_start, kimg_shadow_end; u64 mod_shadow_start, mod_shadow_end; u64 vmalloc_shadow_end; @@ -220,10 +305,23 @@ static void __init kasan_init_shadow(void) * setup will be finished. */ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); + + /* + * If the start or end address of the shadow region is not aligned to + * the top level size, we have to allocate a temporary next-level table + * in each case, clone the next level of descriptors, and install the + * table into tmp_pg_dir. Note that with 5 levels of paging, the next + * level will in fact be p4d_t, but that makes no difference in this + * case. + */ + if (!top_level_aligned(KASAN_SHADOW_START)) + clone_next_level(KASAN_SHADOW_START, tmp_pg_dir, pud[0]); + if (!top_level_aligned(KASAN_SHADOW_END)) + clone_next_level(KASAN_SHADOW_END, tmp_pg_dir, pud[1]); dsb(ishst); cpu_replace_ttbr1(lm_alias(tmp_pg_dir)); - clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + clear_shadow(KASAN_SHADOW_START, KASAN_SHADOW_END); kasan_map_populate(kimg_shadow_start, kimg_shadow_end, early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START))));
Allow the KASAN init code to deal with 5 levels of paging, and relax the requirement that the shadow region is aligned to the top level pgd_t size. This is necessary for LPA2 based 52-bit virtual addressing, where the KASAN shadow will never be aligned to the pgd_t size. Allowing this also enables the 16k/48-bit case for KASAN, which is a nice bonus. This involves some hackery to manipulate the root and next level page tables without having to distinguish all the various configurations, including 16k/48-bits (which has a two entry pgd_t level), and LPA2 configurations running with one translation level less on non-LPA2 hardware. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> --- arch/arm64/Kconfig | 2 +- arch/arm64/mm/kasan_init.c | 124 ++++++++++++++++++-- 2 files changed, 112 insertions(+), 14 deletions(-)