Message ID | 20221124123932.2648991-13-ardb@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Enable LPA2 support for 4k and 16k pages | expand |
On 24/11/2022 12:39, Ard Biesheuvel wrote: > Add the required types and descriptor accessors to support 5 levels of > paging in the common code. This is one of the prerequisites for > supporting 52-bit virtual addressing with 4k pages. > > Note that this does not cover the code that handles kernel mappings or > the fixmap. > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > --- > arch/arm64/include/asm/pgalloc.h | 41 +++++++++++ > arch/arm64/include/asm/pgtable-hwdef.h | 22 +++++- > arch/arm64/include/asm/pgtable-types.h | 6 ++ > arch/arm64/include/asm/pgtable.h | 75 +++++++++++++++++++- > arch/arm64/mm/mmu.c | 31 +++++++- > arch/arm64/mm/pgd.c | 15 +++- > 6 files changed, 181 insertions(+), 9 deletions(-) > > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > index 237224484d0f..cae8c648f462 100644 > --- a/arch/arm64/include/asm/pgalloc.h > +++ b/arch/arm64/include/asm/pgalloc.h > @@ -60,6 +60,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) > } > #endif /* CONFIG_PGTABLE_LEVELS > 3 */ > > +#if CONFIG_PGTABLE_LEVELS > 4 > + > +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) > +{ > + if (pgtable_l5_enabled()) > + set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot)); > +} > + > +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) > +{ > + pgdval_t pgdval = PGD_TYPE_TABLE; > + > + pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; > + __pgd_populate(pgdp, __pa(p4dp), pgdval); > +} > + > +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) > +{ > + gfp_t gfp = GFP_PGTABLE_USER; > + > + if (mm == &init_mm) > + gfp = GFP_PGTABLE_KERNEL; > + return (p4d_t *)get_zeroed_page(gfp); > +} > + > +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) > +{ > + if (!pgtable_l5_enabled()) > + return; > + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); > + free_page((unsigned long)p4d); > +} > + > +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) > +#else > +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) > +{ > + BUILD_BUG(); > +} > +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ > + > extern pgd_t *pgd_alloc(struct mm_struct *mm); > extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); > > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h > index b91fe4781b06..b364b02e696b 100644 > --- a/arch/arm64/include/asm/pgtable-hwdef.h > +++ b/arch/arm64/include/asm/pgtable-hwdef.h > @@ -26,10 +26,10 @@ > #define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) > > /* > - * Size mapped by an entry at level n ( 0 <= n <= 3) > + * Size mapped by an entry at level n ( -1 <= n <= 3) > * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits > * in the final page. The maximum number of translation levels supported by > - * the architecture is 4. Hence, starting at level n, we have further > + * the architecture is 5. Hence, starting at level n, we have further > * ((4 - n) - 1) levels of translation excluding the offset within the page. > * So, the total number of bits mapped by an entry at level n is : > * Is it neccessary to represent the levels as (-1 - 3) in the kernel or are you open to switching to (0 - 4)? There are a couple of other places where translation level is used, which I found and fixed up for the KVM LPA2 support work. It got a bit messy to represent the levels using the architectural range (-1 - 3) so I ended up representing them as (0 - 4). The main issue was that KVM represents level as unsigned so that change would have looked quite big. Most of this is confined to KVM and the only place it really crosses over with the kernel is at __tlbi_level(). Which makes me think you might be missing some required changes (I didn't notice these in your other patches): Looking at the TLB management stuff, I think there are some places you will need to fix up to correctly handle the extra level in the kernel (e.g. tlb_get_level(), flush_tlb_range()). There are some new ecodings for level in the FSC field in the ESR. You might need to update the fault_info array in fault.c to represent these and correctly handle user space faults for the new level? > [...] Thanks, Ryan
On Mon, 28 Nov 2022 at 17:17, Ryan Roberts <ryan.roberts@arm.com> wrote: > > On 24/11/2022 12:39, Ard Biesheuvel wrote: > > Add the required types and descriptor accessors to support 5 levels of > > paging in the common code. This is one of the prerequisites for > > supporting 52-bit virtual addressing with 4k pages. > > > > Note that this does not cover the code that handles kernel mappings or > > the fixmap. > > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > > --- > > arch/arm64/include/asm/pgalloc.h | 41 +++++++++++ > > arch/arm64/include/asm/pgtable-hwdef.h | 22 +++++- > > arch/arm64/include/asm/pgtable-types.h | 6 ++ > > arch/arm64/include/asm/pgtable.h | 75 +++++++++++++++++++- > > arch/arm64/mm/mmu.c | 31 +++++++- > > arch/arm64/mm/pgd.c | 15 +++- > > 6 files changed, 181 insertions(+), 9 deletions(-) > > > > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > > index 237224484d0f..cae8c648f462 100644 > > --- a/arch/arm64/include/asm/pgalloc.h > > +++ b/arch/arm64/include/asm/pgalloc.h > > @@ -60,6 +60,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) > > } > > #endif /* CONFIG_PGTABLE_LEVELS > 3 */ > > > > +#if CONFIG_PGTABLE_LEVELS > 4 > > + > > +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) > > +{ > > + if (pgtable_l5_enabled()) > > + set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot)); > > +} > > + > > +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) > > +{ > > + pgdval_t pgdval = PGD_TYPE_TABLE; > > + > > + pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; > > + __pgd_populate(pgdp, __pa(p4dp), pgdval); > > +} > > + > > +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) > > +{ > > + gfp_t gfp = GFP_PGTABLE_USER; > > + > > + if (mm == &init_mm) > > + gfp = GFP_PGTABLE_KERNEL; > > + return (p4d_t *)get_zeroed_page(gfp); > > +} > > + > > +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) > > +{ > > + if (!pgtable_l5_enabled()) > > + return; > > + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); > > + free_page((unsigned long)p4d); > > +} > > + > > +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) > > +#else > > +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) > > +{ > > + BUILD_BUG(); > > +} > > +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ > > + > > extern pgd_t *pgd_alloc(struct mm_struct *mm); > > extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); > > > > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h > > index b91fe4781b06..b364b02e696b 100644 > > --- a/arch/arm64/include/asm/pgtable-hwdef.h > > +++ b/arch/arm64/include/asm/pgtable-hwdef.h > > @@ -26,10 +26,10 @@ > > #define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) > > > > /* > > - * Size mapped by an entry at level n ( 0 <= n <= 3) > > + * Size mapped by an entry at level n ( -1 <= n <= 3) > > * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits > > * in the final page. The maximum number of translation levels supported by > > - * the architecture is 4. Hence, starting at level n, we have further > > + * the architecture is 5. Hence, starting at level n, we have further > > * ((4 - n) - 1) levels of translation excluding the offset within the page. > > * So, the total number of bits mapped by an entry at level n is : > > * > > Is it neccessary to represent the levels as (-1 - 3) in the kernel or are you > open to switching to (0 - 4)? > > There are a couple of other places where translation level is used, which I > found and fixed up for the KVM LPA2 support work. It got a bit messy to > represent the levels using the architectural range (-1 - 3) so I ended up > representing them as (0 - 4). The main issue was that KVM represents level as > unsigned so that change would have looked quite big. > > Most of this is confined to KVM and the only place it really crosses over with > the kernel is at __tlbi_level(). Which makes me think you might be missing some > required changes (I didn't notice these in your other patches): > > Looking at the TLB management stuff, I think there are some places you will need > to fix up to correctly handle the extra level in the kernel (e.g. > tlb_get_level(), flush_tlb_range()). > > There are some new ecodings for level in the FSC field in the ESR. You might > need to update the fault_info array in fault.c to represent these and correctly > handle user space faults for the new level? > Hi Ryan, Thanks for pointing this out. Once I have educated myself a bit more about all of this, I should be able to answer your questions :-) I did not do any user space testing in anger on this series, on the assumption that we already support 52-bit VAs, but I completely missed the fact that the additional level of paging requires additional attention. As for the level indexing: I have a slight preference for sticking with the architectural range, but I don't deeply care either way.
On 2022-11-28 16:22, Ard Biesheuvel wrote: > On Mon, 28 Nov 2022 at 17:17, Ryan Roberts <ryan.roberts@arm.com> > wrote: >> >> On 24/11/2022 12:39, Ard Biesheuvel wrote: >> > Add the required types and descriptor accessors to support 5 levels of >> > paging in the common code. This is one of the prerequisites for >> > supporting 52-bit virtual addressing with 4k pages. >> > >> > Note that this does not cover the code that handles kernel mappings or >> > the fixmap. >> > >> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> >> > --- >> > arch/arm64/include/asm/pgalloc.h | 41 +++++++++++ >> > arch/arm64/include/asm/pgtable-hwdef.h | 22 +++++- >> > arch/arm64/include/asm/pgtable-types.h | 6 ++ >> > arch/arm64/include/asm/pgtable.h | 75 +++++++++++++++++++- >> > arch/arm64/mm/mmu.c | 31 +++++++- >> > arch/arm64/mm/pgd.c | 15 +++- >> > 6 files changed, 181 insertions(+), 9 deletions(-) >> > >> > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h >> > index 237224484d0f..cae8c648f462 100644 >> > --- a/arch/arm64/include/asm/pgalloc.h >> > +++ b/arch/arm64/include/asm/pgalloc.h >> > @@ -60,6 +60,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) >> > } >> > #endif /* CONFIG_PGTABLE_LEVELS > 3 */ >> > >> > +#if CONFIG_PGTABLE_LEVELS > 4 >> > + >> > +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) >> > +{ >> > + if (pgtable_l5_enabled()) >> > + set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot)); >> > +} >> > + >> > +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) >> > +{ >> > + pgdval_t pgdval = PGD_TYPE_TABLE; >> > + >> > + pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; >> > + __pgd_populate(pgdp, __pa(p4dp), pgdval); >> > +} >> > + >> > +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) >> > +{ >> > + gfp_t gfp = GFP_PGTABLE_USER; >> > + >> > + if (mm == &init_mm) >> > + gfp = GFP_PGTABLE_KERNEL; >> > + return (p4d_t *)get_zeroed_page(gfp); >> > +} >> > + >> > +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) >> > +{ >> > + if (!pgtable_l5_enabled()) >> > + return; >> > + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); >> > + free_page((unsigned long)p4d); >> > +} >> > + >> > +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) >> > +#else >> > +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) >> > +{ >> > + BUILD_BUG(); >> > +} >> > +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ >> > + >> > extern pgd_t *pgd_alloc(struct mm_struct *mm); >> > extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); >> > >> > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h >> > index b91fe4781b06..b364b02e696b 100644 >> > --- a/arch/arm64/include/asm/pgtable-hwdef.h >> > +++ b/arch/arm64/include/asm/pgtable-hwdef.h >> > @@ -26,10 +26,10 @@ >> > #define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) >> > >> > /* >> > - * Size mapped by an entry at level n ( 0 <= n <= 3) >> > + * Size mapped by an entry at level n ( -1 <= n <= 3) >> > * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits >> > * in the final page. The maximum number of translation levels supported by >> > - * the architecture is 4. Hence, starting at level n, we have further >> > + * the architecture is 5. Hence, starting at level n, we have further >> > * ((4 - n) - 1) levels of translation excluding the offset within the page. >> > * So, the total number of bits mapped by an entry at level n is : >> > * >> >> Is it neccessary to represent the levels as (-1 - 3) in the kernel or >> are you >> open to switching to (0 - 4)? >> >> There are a couple of other places where translation level is used, >> which I >> found and fixed up for the KVM LPA2 support work. It got a bit messy >> to >> represent the levels using the architectural range (-1 - 3) so I ended >> up >> representing them as (0 - 4). The main issue was that KVM represents >> level as >> unsigned so that change would have looked quite big. >> >> Most of this is confined to KVM and the only place it really crosses >> over with >> the kernel is at __tlbi_level(). Which makes me think you might be >> missing some >> required changes (I didn't notice these in your other patches): >> >> Looking at the TLB management stuff, I think there are some places you >> will need >> to fix up to correctly handle the extra level in the kernel (e.g. >> tlb_get_level(), flush_tlb_range()). >> >> There are some new ecodings for level in the FSC field in the ESR. You >> might >> need to update the fault_info array in fault.c to represent these and >> correctly >> handle user space faults for the new level? >> > > Hi Ryan, > > Thanks for pointing this out. Once I have educated myself a bit more > about all of this, I should be able to answer your questions :-) > > I did not do any user space testing in anger on this series, on the > assumption that we already support 52-bit VAs, but I completely missed > the fact that the additional level of paging requires additional > attention. > > As for the level indexing: I have a slight preference for sticking > with the architectural range, but I don't deeply care either way. I'd really like to stick to the architectural representation, as there is an ingrained knowledge of the relation between a base granule size, a level, and a block mapping size. The nice thing about level '-1' is that it preserve this behaviour, and doesn't force everyone to adjust. It also makes it extremely easy to compare the code and the spec. So let's please stick to the [-1;3] range. It will save everyone a lot of trouble. Thanks, M.
On 28/11/2022 18:00, Marc Zyngier wrote: > On 2022-11-28 16:22, Ard Biesheuvel wrote: >> On Mon, 28 Nov 2022 at 17:17, Ryan Roberts <ryan.roberts@arm.com> wrote: >>> >>> On 24/11/2022 12:39, Ard Biesheuvel wrote: >>> > Add the required types and descriptor accessors to support 5 levels of >>> > paging in the common code. This is one of the prerequisites for >>> > supporting 52-bit virtual addressing with 4k pages. >>> > >>> > Note that this does not cover the code that handles kernel mappings or >>> > the fixmap. >>> > >>> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> >>> > --- >>> > arch/arm64/include/asm/pgalloc.h | 41 +++++++++++ >>> > arch/arm64/include/asm/pgtable-hwdef.h | 22 +++++- >>> > arch/arm64/include/asm/pgtable-types.h | 6 ++ >>> > arch/arm64/include/asm/pgtable.h | 75 +++++++++++++++++++- >>> > arch/arm64/mm/mmu.c | 31 +++++++- >>> > arch/arm64/mm/pgd.c | 15 +++- >>> > 6 files changed, 181 insertions(+), 9 deletions(-) >>> > >>> > diff --git a/arch/arm64/include/asm/pgalloc.h >>> b/arch/arm64/include/asm/pgalloc.h >>> > index 237224484d0f..cae8c648f462 100644 >>> > --- a/arch/arm64/include/asm/pgalloc.h >>> > +++ b/arch/arm64/include/asm/pgalloc.h >>> > @@ -60,6 +60,47 @@ static inline void __p4d_populate(p4d_t *p4dp, >>> phys_addr_t pudp, p4dval_t prot) >>> > } >>> > #endif /* CONFIG_PGTABLE_LEVELS > 3 */ >>> > >>> > +#if CONFIG_PGTABLE_LEVELS > 4 >>> > + >>> > +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t >>> prot) >>> > +{ >>> > + if (pgtable_l5_enabled()) >>> > + set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot)); >>> > +} >>> > + >>> > +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t >>> *p4dp) >>> > +{ >>> > + pgdval_t pgdval = PGD_TYPE_TABLE; >>> > + >>> > + pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; >>> > + __pgd_populate(pgdp, __pa(p4dp), pgdval); >>> > +} >>> > + >>> > +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) >>> > +{ >>> > + gfp_t gfp = GFP_PGTABLE_USER; >>> > + >>> > + if (mm == &init_mm) >>> > + gfp = GFP_PGTABLE_KERNEL; >>> > + return (p4d_t *)get_zeroed_page(gfp); >>> > +} >>> > + >>> > +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) >>> > +{ >>> > + if (!pgtable_l5_enabled()) >>> > + return; >>> > + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); >>> > + free_page((unsigned long)p4d); >>> > +} >>> > + >>> > +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) >>> > +#else >>> > +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t >>> prot) >>> > +{ >>> > + BUILD_BUG(); >>> > +} >>> > +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ >>> > + >>> > extern pgd_t *pgd_alloc(struct mm_struct *mm); >>> > extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); >>> > >>> > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h >>> b/arch/arm64/include/asm/pgtable-hwdef.h >>> > index b91fe4781b06..b364b02e696b 100644 >>> > --- a/arch/arm64/include/asm/pgtable-hwdef.h >>> > +++ b/arch/arm64/include/asm/pgtable-hwdef.h >>> > @@ -26,10 +26,10 @@ >>> > #define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) >>> > >>> > /* >>> > - * Size mapped by an entry at level n ( 0 <= n <= 3) >>> > + * Size mapped by an entry at level n ( -1 <= n <= 3) >>> > * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits >>> > * in the final page. The maximum number of translation levels supported by >>> > - * the architecture is 4. Hence, starting at level n, we have further >>> > + * the architecture is 5. Hence, starting at level n, we have further >>> > * ((4 - n) - 1) levels of translation excluding the offset within the page. >>> > * So, the total number of bits mapped by an entry at level n is : >>> > * >>> >>> Is it neccessary to represent the levels as (-1 - 3) in the kernel or are you >>> open to switching to (0 - 4)? >>> >>> There are a couple of other places where translation level is used, which I >>> found and fixed up for the KVM LPA2 support work. It got a bit messy to >>> represent the levels using the architectural range (-1 - 3) so I ended up >>> representing them as (0 - 4). The main issue was that KVM represents level as >>> unsigned so that change would have looked quite big. >>> >>> Most of this is confined to KVM and the only place it really crosses over with >>> the kernel is at __tlbi_level(). Which makes me think you might be missing some >>> required changes (I didn't notice these in your other patches): >>> >>> Looking at the TLB management stuff, I think there are some places you will need >>> to fix up to correctly handle the extra level in the kernel (e.g. >>> tlb_get_level(), flush_tlb_range()). >>> >>> There are some new ecodings for level in the FSC field in the ESR. You might >>> need to update the fault_info array in fault.c to represent these and correctly >>> handle user space faults for the new level? >>> >> >> Hi Ryan, >> >> Thanks for pointing this out. Once I have educated myself a bit more >> about all of this, I should be able to answer your questions :-) >> >> I did not do any user space testing in anger on this series, on the >> assumption that we already support 52-bit VAs, but I completely missed >> the fact that the additional level of paging requires additional >> attention. >> >> As for the level indexing: I have a slight preference for sticking >> with the architectural range, but I don't deeply care either way. > > I'd really like to stick to the architectural representation, as > there is an ingrained knowledge of the relation between a base > granule size, a level, and a block mapping size. > > The nice thing about level '-1' is that it preserve this behaviour, > and doesn't force everyone to adjust. It also makes it extremely > easy to compare the code and the spec. > > So let's please stick to the [-1;3] range. It will save everyone > a lot of trouble. Fair point. It will mean a bigger patch, but I'll rework my stuff to make it all work with [-1;3] before I post it. > > Thanks, > > M.
On 28/11/2022 16:22, Ard Biesheuvel wrote: > On Mon, 28 Nov 2022 at 17:17, Ryan Roberts <ryan.roberts@arm.com> wrote: >> >> On 24/11/2022 12:39, Ard Biesheuvel wrote: >>> Add the required types and descriptor accessors to support 5 levels of >>> paging in the common code. This is one of the prerequisites for >>> supporting 52-bit virtual addressing with 4k pages. >>> >>> Note that this does not cover the code that handles kernel mappings or >>> the fixmap. >>> >>> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> >>> --- >>> arch/arm64/include/asm/pgalloc.h | 41 +++++++++++ >>> arch/arm64/include/asm/pgtable-hwdef.h | 22 +++++- >>> arch/arm64/include/asm/pgtable-types.h | 6 ++ >>> arch/arm64/include/asm/pgtable.h | 75 +++++++++++++++++++- >>> arch/arm64/mm/mmu.c | 31 +++++++- >>> arch/arm64/mm/pgd.c | 15 +++- >>> 6 files changed, 181 insertions(+), 9 deletions(-) >>> >>> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h >>> index 237224484d0f..cae8c648f462 100644 >>> --- a/arch/arm64/include/asm/pgalloc.h >>> +++ b/arch/arm64/include/asm/pgalloc.h >>> @@ -60,6 +60,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) >>> } >>> #endif /* CONFIG_PGTABLE_LEVELS > 3 */ >>> >>> +#if CONFIG_PGTABLE_LEVELS > 4 >>> + >>> +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) >>> +{ >>> + if (pgtable_l5_enabled()) >>> + set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot)); >>> +} >>> + >>> +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) >>> +{ >>> + pgdval_t pgdval = PGD_TYPE_TABLE; >>> + >>> + pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; >>> + __pgd_populate(pgdp, __pa(p4dp), pgdval); >>> +} >>> + >>> +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) >>> +{ >>> + gfp_t gfp = GFP_PGTABLE_USER; >>> + >>> + if (mm == &init_mm) >>> + gfp = GFP_PGTABLE_KERNEL; >>> + return (p4d_t *)get_zeroed_page(gfp); >>> +} >>> + >>> +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) >>> +{ >>> + if (!pgtable_l5_enabled()) >>> + return; >>> + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); >>> + free_page((unsigned long)p4d); >>> +} >>> + >>> +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) >>> +#else >>> +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) >>> +{ >>> + BUILD_BUG(); >>> +} >>> +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ >>> + >>> extern pgd_t *pgd_alloc(struct mm_struct *mm); >>> extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); >>> >>> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h >>> index b91fe4781b06..b364b02e696b 100644 >>> --- a/arch/arm64/include/asm/pgtable-hwdef.h >>> +++ b/arch/arm64/include/asm/pgtable-hwdef.h >>> @@ -26,10 +26,10 @@ >>> #define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) >>> >>> /* >>> - * Size mapped by an entry at level n ( 0 <= n <= 3) >>> + * Size mapped by an entry at level n ( -1 <= n <= 3) >>> * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits >>> * in the final page. The maximum number of translation levels supported by >>> - * the architecture is 4. Hence, starting at level n, we have further >>> + * the architecture is 5. Hence, starting at level n, we have further >>> * ((4 - n) - 1) levels of translation excluding the offset within the page. >>> * So, the total number of bits mapped by an entry at level n is : >>> * >> >> Is it neccessary to represent the levels as (-1 - 3) in the kernel or are you >> open to switching to (0 - 4)? >> >> There are a couple of other places where translation level is used, which I >> found and fixed up for the KVM LPA2 support work. It got a bit messy to >> represent the levels using the architectural range (-1 - 3) so I ended up >> representing them as (0 - 4). The main issue was that KVM represents level as >> unsigned so that change would have looked quite big. >> >> Most of this is confined to KVM and the only place it really crosses over with >> the kernel is at __tlbi_level(). Which makes me think you might be missing some >> required changes (I didn't notice these in your other patches): >> >> Looking at the TLB management stuff, I think there are some places you will need >> to fix up to correctly handle the extra level in the kernel (e.g. >> tlb_get_level(), flush_tlb_range()). >> >> There are some new ecodings for level in the FSC field in the ESR. You might >> need to update the fault_info array in fault.c to represent these and correctly >> handle user space faults for the new level? >> > > Hi Ryan, > > Thanks for pointing this out. Once I have educated myself a bit more > about all of this, I should be able to answer your questions :-) I've just noticed one more thing: get_user_mapping_size() in arch/arm64/kvm/mmu.c uses CONFIG_PGTABLE_LEVELS to calculate the start level of a user space page table. I guess that will need some attention now that the runtime value might be smaller than this macro on systems that don't support LPA2? > > I did not do any user space testing in anger on this series, on the > assumption that we already support 52-bit VAs, but I completely missed > the fact that the additional level of paging requires additional > attention. > > As for the level indexing: I have a slight preference for sticking > with the architectural range, but I don't deeply care either way.
On Tue, 29 Nov 2022 at 16:46, Ryan Roberts <ryan.roberts@arm.com> wrote: > > On 28/11/2022 16:22, Ard Biesheuvel wrote: > > On Mon, 28 Nov 2022 at 17:17, Ryan Roberts <ryan.roberts@arm.com> wrote: > >> > >> On 24/11/2022 12:39, Ard Biesheuvel wrote: > >>> Add the required types and descriptor accessors to support 5 levels of > >>> paging in the common code. This is one of the prerequisites for > >>> supporting 52-bit virtual addressing with 4k pages. > >>> > >>> Note that this does not cover the code that handles kernel mappings or > >>> the fixmap. > >>> > >>> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > >>> --- > >>> arch/arm64/include/asm/pgalloc.h | 41 +++++++++++ > >>> arch/arm64/include/asm/pgtable-hwdef.h | 22 +++++- > >>> arch/arm64/include/asm/pgtable-types.h | 6 ++ > >>> arch/arm64/include/asm/pgtable.h | 75 +++++++++++++++++++- > >>> arch/arm64/mm/mmu.c | 31 +++++++- > >>> arch/arm64/mm/pgd.c | 15 +++- > >>> 6 files changed, 181 insertions(+), 9 deletions(-) > >>> > >>> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > >>> index 237224484d0f..cae8c648f462 100644 > >>> --- a/arch/arm64/include/asm/pgalloc.h > >>> +++ b/arch/arm64/include/asm/pgalloc.h > >>> @@ -60,6 +60,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) > >>> } > >>> #endif /* CONFIG_PGTABLE_LEVELS > 3 */ > >>> > >>> +#if CONFIG_PGTABLE_LEVELS > 4 > >>> + > >>> +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) > >>> +{ > >>> + if (pgtable_l5_enabled()) > >>> + set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot)); > >>> +} > >>> + > >>> +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) > >>> +{ > >>> + pgdval_t pgdval = PGD_TYPE_TABLE; > >>> + > >>> + pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; > >>> + __pgd_populate(pgdp, __pa(p4dp), pgdval); > >>> +} > >>> + > >>> +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) > >>> +{ > >>> + gfp_t gfp = GFP_PGTABLE_USER; > >>> + > >>> + if (mm == &init_mm) > >>> + gfp = GFP_PGTABLE_KERNEL; > >>> + return (p4d_t *)get_zeroed_page(gfp); > >>> +} > >>> + > >>> +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) > >>> +{ > >>> + if (!pgtable_l5_enabled()) > >>> + return; > >>> + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); > >>> + free_page((unsigned long)p4d); > >>> +} > >>> + > >>> +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) > >>> +#else > >>> +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) > >>> +{ > >>> + BUILD_BUG(); > >>> +} > >>> +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ > >>> + > >>> extern pgd_t *pgd_alloc(struct mm_struct *mm); > >>> extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); > >>> > >>> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h > >>> index b91fe4781b06..b364b02e696b 100644 > >>> --- a/arch/arm64/include/asm/pgtable-hwdef.h > >>> +++ b/arch/arm64/include/asm/pgtable-hwdef.h > >>> @@ -26,10 +26,10 @@ > >>> #define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) > >>> > >>> /* > >>> - * Size mapped by an entry at level n ( 0 <= n <= 3) > >>> + * Size mapped by an entry at level n ( -1 <= n <= 3) > >>> * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits > >>> * in the final page. The maximum number of translation levels supported by > >>> - * the architecture is 4. Hence, starting at level n, we have further > >>> + * the architecture is 5. Hence, starting at level n, we have further > >>> * ((4 - n) - 1) levels of translation excluding the offset within the page. > >>> * So, the total number of bits mapped by an entry at level n is : > >>> * > >> > >> Is it neccessary to represent the levels as (-1 - 3) in the kernel or are you > >> open to switching to (0 - 4)? > >> > >> There are a couple of other places where translation level is used, which I > >> found and fixed up for the KVM LPA2 support work. It got a bit messy to > >> represent the levels using the architectural range (-1 - 3) so I ended up > >> representing them as (0 - 4). The main issue was that KVM represents level as > >> unsigned so that change would have looked quite big. > >> > >> Most of this is confined to KVM and the only place it really crosses over with > >> the kernel is at __tlbi_level(). Which makes me think you might be missing some > >> required changes (I didn't notice these in your other patches): > >> > >> Looking at the TLB management stuff, I think there are some places you will need > >> to fix up to correctly handle the extra level in the kernel (e.g. > >> tlb_get_level(), flush_tlb_range()). > >> > >> There are some new ecodings for level in the FSC field in the ESR. You might > >> need to update the fault_info array in fault.c to represent these and correctly > >> handle user space faults for the new level? > >> > > > > Hi Ryan, > > > > Thanks for pointing this out. Once I have educated myself a bit more > > about all of this, I should be able to answer your questions :-) > > I've just noticed one more thing: get_user_mapping_size() in > arch/arm64/kvm/mmu.c uses CONFIG_PGTABLE_LEVELS to calculate the start level of > a user space page table. I guess that will need some attention now that the > runtime value might be smaller than this macro on systems that don't support LPA2? Indeed. In general, every reference to that quantity should now take pgtable_l4_enabled() and pgtable_l5_enabled() into account as well.
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 237224484d0f..cae8c648f462 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -60,6 +60,47 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#if CONFIG_PGTABLE_LEVELS > 4 + +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) +{ + if (pgtable_l5_enabled()) + set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot)); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) +{ + pgdval_t pgdval = PGD_TYPE_TABLE; + + pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; + __pgd_populate(pgdp, __pa(p4dp), pgdval); +} + +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + gfp_t gfp = GFP_PGTABLE_USER; + + if (mm == &init_mm) + gfp = GFP_PGTABLE_KERNEL; + return (p4d_t *)get_zeroed_page(gfp); +} + +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (!pgtable_l5_enabled()) + return; + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); + free_page((unsigned long)p4d); +} + +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) +#else +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) +{ + BUILD_BUG(); +} +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ + extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index b91fe4781b06..b364b02e696b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -26,10 +26,10 @@ #define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) /* - * Size mapped by an entry at level n ( 0 <= n <= 3) + * Size mapped by an entry at level n ( -1 <= n <= 3) * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits * in the final page. The maximum number of translation levels supported by - * the architecture is 4. Hence, starting at level n, we have further + * the architecture is 5. Hence, starting at level n, we have further * ((4 - n) - 1) levels of translation excluding the offset within the page. * So, the total number of bits mapped by an entry at level n is : * @@ -62,9 +62,16 @@ #define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3)) #endif +#if CONFIG_PGTABLE_LEVELS > 4 +#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0) +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) +#define PTRS_PER_P4D (1 << (PAGE_SHIFT - 3)) +#endif + /* * PGDIR_SHIFT determines the size a top-level page table entry can map - * (depending on the configuration, this level can be 0, 1 or 2). + * (depending on the configuration, this level can be -1, 0, 1 or 2). */ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) @@ -87,6 +94,15 @@ /* * Hardware page table definitions. * + * Level -1 descriptor (PGD). + */ +#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0) +#define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1) +#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0) +#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59) +#define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60) + +/* * Level 0 descriptor (P4D). */ #define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0) diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index b8f158ae2527..6d6d4065b0cb 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -36,6 +36,12 @@ typedef struct { pudval_t pud; } pud_t; #define __pud(x) ((pud_t) { (x) } ) #endif +#if CONFIG_PGTABLE_LEVELS > 4 +typedef struct { p4dval_t p4d; } p4d_t; +#define p4d_val(x) ((x).p4d) +#define __p4d(x) ((p4d_t) { (x) } ) +#endif + typedef struct { pgdval_t pgd; } pgd_t; #define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) } ) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 666db7173d0f..2f7202d03d98 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -793,7 +793,6 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) #else #define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;}) -#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ #define pud_set_fixmap(addr) NULL @@ -804,6 +803,80 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) #endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#if CONFIG_PGTABLE_LEVELS > 4 + +static __always_inline bool pgtable_l5_enabled(void) +{ + if (!alternative_has_feature_likely(ARM64_ALWAYS_BOOT)) + return vabits_actual == VA_BITS; + return alternative_has_feature_unlikely(ARM64_HAS_LVA); +} + +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + return !pgtable_l5_enabled(); +} +#define mm_p4d_folded mm_p4d_folded + +#define p4d_ERROR(e) \ + pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e)) + +#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd)) +#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & 2)) +#define pgd_present(pgd) (!pgd_none(pgd)) + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (in_swapper_pgdir(pgdp)) { + set_swapper_pgd(pgdp, __pgd(pgd_val(pgd))); + return; + } + + WRITE_ONCE(*pgdp, pgd); + dsb(ishst); + isb(); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ + if (pgtable_l5_enabled()) + set_pgd(pgdp, __pgd(0)); +} + +static inline phys_addr_t pgd_page_paddr(pgd_t pgd) +{ + return __pgd_to_phys(pgd); +} + +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + +static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr) +{ + return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr); +} + +static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr) +{ + BUG_ON(!pgtable_l5_enabled()); + + return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t); +} + +static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr) +{ + if (!pgtable_l5_enabled()) + return pgd_to_folded_p4d(pgdp, addr); + return (p4d_t *)__va(p4d_offset_phys(pgdp, addr)); +} + +#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) + +#else + +static inline bool pgtable_l5_enabled(void) { return false; } + +#endif /* CONFIG_PGTABLE_LEVELS > 4 */ + #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index bcf617f956cb..d089bc78e592 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1049,7 +1049,7 @@ static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr, if (CONFIG_PGTABLE_LEVELS <= 3) return; - if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK)) + if (!pgtable_range_aligned(start, end, floor, ceiling, P4D_MASK)) return; /* @@ -1072,8 +1072,8 @@ static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { - unsigned long next; p4d_t *p4dp, p4d; + unsigned long i, next, start = addr; do { next = p4d_addr_end(addr, end); @@ -1085,6 +1085,27 @@ static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr, WARN_ON(!p4d_present(p4d)); free_empty_pud_table(p4dp, addr, next, floor, ceiling); } while (addr = next, addr < end); + + if (!pgtable_l5_enabled()) + return; + + if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK)) + return; + + /* + * Check whether we can free the p4d page if the rest of the + * entries are empty. Overlap with other regions have been + * handled by the floor/ceiling check. + */ + p4dp = p4d_offset(pgdp, 0UL); + for (i = 0; i < PTRS_PER_P4D; i++) { + if (!p4d_none(READ_ONCE(p4dp[i]))) + return; + } + + pgd_clear(pgdp); + __flush_tlb_kernel_pgtable(start); + free_hotplug_pgtable_page(virt_to_page(p4dp)); } static void free_empty_tables(unsigned long addr, unsigned long end, @@ -1351,6 +1372,12 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) return 1; } +#ifndef __PAGETABLE_P4D_FOLDED +void p4d_clear_huge(p4d_t *p4dp) +{ +} +#endif + int pud_clear_huge(pud_t *pudp) { if (!pud_sect(READ_ONCE(*pudp))) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 4a64089e5771..3c4f8a279d2b 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -17,11 +17,20 @@ static struct kmem_cache *pgd_cache __ro_after_init; +static bool pgdir_is_page_size(void) +{ + if (PGD_SIZE == PAGE_SIZE) + return true; + if (CONFIG_PGTABLE_LEVELS == 5) + return !pgtable_l5_enabled(); + return false; +} + pgd_t *pgd_alloc(struct mm_struct *mm) { gfp_t gfp = GFP_PGTABLE_USER; - if (PGD_SIZE == PAGE_SIZE) + if (pgdir_is_page_size()) return (pgd_t *)__get_free_page(gfp); else return kmem_cache_alloc(pgd_cache, gfp); @@ -29,7 +38,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - if (PGD_SIZE == PAGE_SIZE) + if (pgdir_is_page_size()) free_page((unsigned long)pgd); else kmem_cache_free(pgd_cache, pgd); @@ -37,7 +46,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) void __init pgtable_cache_init(void) { - if (PGD_SIZE == PAGE_SIZE) + if (pgdir_is_page_size()) return; #ifdef CONFIG_ARM64_PA_BITS_52
Add the required types and descriptor accessors to support 5 levels of paging in the common code. This is one of the prerequisites for supporting 52-bit virtual addressing with 4k pages. Note that this does not cover the code that handles kernel mappings or the fixmap. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> --- arch/arm64/include/asm/pgalloc.h | 41 +++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 22 +++++- arch/arm64/include/asm/pgtable-types.h | 6 ++ arch/arm64/include/asm/pgtable.h | 75 +++++++++++++++++++- arch/arm64/mm/mmu.c | 31 +++++++- arch/arm64/mm/pgd.c | 15 +++- 6 files changed, 181 insertions(+), 9 deletions(-)