diff mbox series

[v2,2/3] arm64: mm: use XN table mapping attributes for the linear region

Message ID 20210308181535.16230-3-ardb@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: use hierarchical XN permissions for all page tables | expand

Commit Message

Ard Biesheuvel March 8, 2021, 6:15 p.m. UTC
The way the arm64 kernel virtual address space is constructed guarantees
that swapper PGD entries are never shared between the linear region on
the one hand, and the vmalloc region on the other, which is where all
kernel text, module text and BPF text mappings reside.

This means that mappings in the linear region (which never require
executable permissions) never share any table entries at any level with
mappings that do require executable permissions, and so we can set the
table-level PXN attributes for all table entries that are created while
setting up mappings in the linear region. Since swapper's PGD level page
table is mapped r/o itself, this adds another layer of robustness to the
way the kernel manages its own page tables. While at it, set the UXN
attribute as well for all kernel mappings created at boot.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
---
 arch/arm64/include/asm/pgtable-hwdef.h |  6 +++++
 arch/arm64/mm/mmu.c                    | 27 +++++++++++++++-----
 2 files changed, 26 insertions(+), 7 deletions(-)

Comments

Anshuman Khandual March 9, 2021, 5:09 a.m. UTC | #1
On 3/8/21 11:45 PM, Ard Biesheuvel wrote:
> The way the arm64 kernel virtual address space is constructed guarantees
> that swapper PGD entries are never shared between the linear region on
> the one hand, and the vmalloc region on the other, which is where all
> kernel text, module text and BPF text mappings reside.

While here, wondering if it would be better to validate this assumption
via a BUILD_BUG_ON() or something similar ?

> 
> This means that mappings in the linear region (which never require
> executable permissions) never share any table entries at any level with
> mappings that do require executable permissions, and so we can set the
> table-level PXN attributes for all table entries that are created while
> setting up mappings in the linear region. Since swapper's PGD level page
> table is mapped r/o itself, this adds another layer of robustness to the
> way the kernel manages its own page tables. While at it, set the UXN
> attribute as well for all kernel mappings created at boot.
> 
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> Acked-by: Mark Rutland <mark.rutland@arm.com>
> ---
>  arch/arm64/include/asm/pgtable-hwdef.h |  6 +++++
>  arch/arm64/mm/mmu.c                    | 27 +++++++++++++++-----
>  2 files changed, 26 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
> index e64e77a345b2..b82575a33f8b 100644
> --- a/arch/arm64/include/asm/pgtable-hwdef.h
> +++ b/arch/arm64/include/asm/pgtable-hwdef.h
> @@ -101,6 +101,8 @@
>  #define P4D_TYPE_MASK		(_AT(p4dval_t, 3) << 0)
>  #define P4D_TYPE_SECT		(_AT(p4dval_t, 1) << 0)
>  #define P4D_SECT_RDONLY		(_AT(p4dval_t, 1) << 7)		/* AP[2] */
> +#define P4D_TABLE_PXN		(_AT(p4dval_t, 1) << 59)
> +#define P4D_TABLE_UXN		(_AT(p4dval_t, 1) << 60)
>  
>  /*
>   * Level 1 descriptor (PUD).
> @@ -110,6 +112,8 @@
>  #define PUD_TYPE_MASK		(_AT(pudval_t, 3) << 0)
>  #define PUD_TYPE_SECT		(_AT(pudval_t, 1) << 0)
>  #define PUD_SECT_RDONLY		(_AT(pudval_t, 1) << 7)		/* AP[2] */
> +#define PUD_TABLE_PXN		(_AT(pudval_t, 1) << 59)
> +#define PUD_TABLE_UXN		(_AT(pudval_t, 1) << 60)
>  
>  /*
>   * Level 2 descriptor (PMD).
> @@ -131,6 +135,8 @@
>  #define PMD_SECT_CONT		(_AT(pmdval_t, 1) << 52)
>  #define PMD_SECT_PXN		(_AT(pmdval_t, 1) << 53)
>  #define PMD_SECT_UXN		(_AT(pmdval_t, 1) << 54)
> +#define PMD_TABLE_PXN		(_AT(pmdval_t, 1) << 59)
> +#define PMD_TABLE_UXN		(_AT(pmdval_t, 1) << 60)
>  
>  /*
>   * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 029091474042..9de59fce0450 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -39,6 +39,7 @@
>  
>  #define NO_BLOCK_MAPPINGS	BIT(0)
>  #define NO_CONT_MAPPINGS	BIT(1)
> +#define NO_EXEC_MAPPINGS	BIT(2)

NO_EXEC_MAPPINGS flag basically selects PXX_TABLE_PXN because PXX_TABLE_UXN
is now a default, which is already included as a protection flag. Should not
this be renamed as NO_KERNEL_EXEC_MAPPINGS matching what PXX_TABLE_PXN is
going to achieve.

>  
>  u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
>  u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
> @@ -185,10 +186,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
>  
>  	BUG_ON(pmd_sect(pmd));
>  	if (pmd_none(pmd)) {
> +		pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN;
>  		phys_addr_t pte_phys;
> +
> +		if (flags & NO_EXEC_MAPPINGS)
> +			pmdval |= PMD_TABLE_PXN;
>  		BUG_ON(!pgtable_alloc);
>  		pte_phys = pgtable_alloc(PAGE_SHIFT);
> -		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
> +		__pmd_populate(pmdp, pte_phys, pmdval);
>  		pmd = READ_ONCE(*pmdp);
>  	}
>  	BUG_ON(pmd_bad(pmd));
> @@ -259,10 +264,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
>  	 */
>  	BUG_ON(pud_sect(pud));
>  	if (pud_none(pud)) {
> +		pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN;
>  		phys_addr_t pmd_phys;
> +
> +		if (flags & NO_EXEC_MAPPINGS)
> +			pudval |= PUD_TABLE_PXN;
>  		BUG_ON(!pgtable_alloc);
>  		pmd_phys = pgtable_alloc(PMD_SHIFT);
> -		__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
> +		__pud_populate(pudp, pmd_phys, pudval);
>  		pud = READ_ONCE(*pudp);
>  	}
>  	BUG_ON(pud_bad(pud));
> @@ -306,10 +315,14 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
>  	p4d_t p4d = READ_ONCE(*p4dp);
>  
>  	if (p4d_none(p4d)) {
> +		p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
>  		phys_addr_t pud_phys;
> +
> +		if (flags & NO_EXEC_MAPPINGS)
> +			p4dval |= P4D_TABLE_PXN;
>  		BUG_ON(!pgtable_alloc);
>  		pud_phys = pgtable_alloc(PUD_SHIFT);
> -		__p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE);
> +		__p4d_populate(p4dp, pud_phys, p4dval);
>  		p4d = READ_ONCE(*p4dp);
>  	}
>  	BUG_ON(p4d_bad(p4d));
> @@ -489,11 +502,11 @@ static void __init map_mem(pgd_t *pgdp)
>  	phys_addr_t kernel_start = __pa_symbol(_stext);
>  	phys_addr_t kernel_end = __pa_symbol(__init_begin);
>  	phys_addr_t start, end;
> -	int flags = 0;
> +	int flags = NO_EXEC_MAPPINGS;
>  	u64 i;
>  
>  	if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
> -		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> +		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
>  
>  	/*
>  	 * Take care not to create a writable alias for the
> @@ -1462,7 +1475,7 @@ struct range arch_get_mappable_range(void)
>  int arch_add_memory(int nid, u64 start, u64 size,
>  		    struct mhp_params *params)
>  {
> -	int ret, flags = 0;
> +	int ret, flags = NO_EXEC_MAPPINGS;
>  
>  	VM_BUG_ON(!mhp_range_allowed(start, size, true));
>  
> @@ -1472,7 +1485,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
>  	 */
>  	if (rodata_full || debug_pagealloc_enabled() ||
>  	    IS_ENABLED(CONFIG_KFENCE))
> -		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> +		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
>  
>  	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
>  			     size, params->pgprot, __pgd_pgtable_alloc,
>
Anshuman Khandual March 9, 2021, 5:52 a.m. UTC | #2
On 3/8/21 11:45 PM, Ard Biesheuvel wrote:
> The way the arm64 kernel virtual address space is constructed guarantees
> that swapper PGD entries are never shared between the linear region on
> the one hand, and the vmalloc region on the other, which is where all
> kernel text, module text and BPF text mappings reside.
> 
> This means that mappings in the linear region (which never require
> executable permissions) never share any table entries at any level with
> mappings that do require executable permissions, and so we can set the
> table-level PXN attributes for all table entries that are created while
> setting up mappings in the linear region. Since swapper's PGD level page
> table is mapped r/o itself, this adds another layer of robustness to the
> way the kernel manages its own page tables. While at it, set the UXN
> attribute as well for all kernel mappings created at boot.

What happens when FEAT_HPDS is implemented and also being enabled ? Would
there be any adverse affect here or at least break the assumption that the
linear mapping page table entries are safer than before ? Hence, it might
be still better to check FEAT_HPDS feature enablement here, even it it is
not being used right now.
Ard Biesheuvel March 9, 2021, 12:36 p.m. UTC | #3
On Tue, 9 Mar 2021 at 06:08, Anshuman Khandual
<anshuman.khandual@arm.com> wrote:
>
>
>
> On 3/8/21 11:45 PM, Ard Biesheuvel wrote:
> > The way the arm64 kernel virtual address space is constructed guarantees
> > that swapper PGD entries are never shared between the linear region on
> > the one hand, and the vmalloc region on the other, which is where all
> > kernel text, module text and BPF text mappings reside.
>
> While here, wondering if it would be better to validate this assumption
> via a BUILD_BUG_ON() or something similar ?
>

Sure. Something like

BUILD_BUG_ON(pgd_index(_PAGE_END(VA_BITS_MIN) - 1) ==
pgd_index(_PAGE_END(VA_BITS_MIN)));

perhaps?

> >
> > This means that mappings in the linear region (which never require
> > executable permissions) never share any table entries at any level with
> > mappings that do require executable permissions, and so we can set the
> > table-level PXN attributes for all table entries that are created while
> > setting up mappings in the linear region. Since swapper's PGD level page
> > table is mapped r/o itself, this adds another layer of robustness to the
> > way the kernel manages its own page tables. While at it, set the UXN
> > attribute as well for all kernel mappings created at boot.
> >
> What happens when FEAT_HPDS is implemented and also being enabled ? Would
> there be any adverse affect here or at least break the assumption that the
> linear mapping page table entries are safer than before ? Hence, it might
> be still better to check FEAT_HPDS feature enablement here, even it it is
> not being used right now.

I would assume that enabling HPDS is only done to free up those bits
for some other purpose, and until that time, I don't think we need to
worry about this, given that the values are just going to be ignored
otherwise.

> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > Acked-by: Mark Rutland <mark.rutland@arm.com>
> > ---
> >  arch/arm64/include/asm/pgtable-hwdef.h |  6 +++++
> >  arch/arm64/mm/mmu.c                    | 27 +++++++++++++++-----
> >  2 files changed, 26 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
> > index e64e77a345b2..b82575a33f8b 100644
> > --- a/arch/arm64/include/asm/pgtable-hwdef.h
> > +++ b/arch/arm64/include/asm/pgtable-hwdef.h
> > @@ -101,6 +101,8 @@
> >  #define P4D_TYPE_MASK                (_AT(p4dval_t, 3) << 0)
> >  #define P4D_TYPE_SECT                (_AT(p4dval_t, 1) << 0)
> >  #define P4D_SECT_RDONLY              (_AT(p4dval_t, 1) << 7)         /* AP[2] */
> > +#define P4D_TABLE_PXN                (_AT(p4dval_t, 1) << 59)
> > +#define P4D_TABLE_UXN                (_AT(p4dval_t, 1) << 60)
> >
> >  /*
> >   * Level 1 descriptor (PUD).
> > @@ -110,6 +112,8 @@
> >  #define PUD_TYPE_MASK                (_AT(pudval_t, 3) << 0)
> >  #define PUD_TYPE_SECT                (_AT(pudval_t, 1) << 0)
> >  #define PUD_SECT_RDONLY              (_AT(pudval_t, 1) << 7)         /* AP[2] */
> > +#define PUD_TABLE_PXN                (_AT(pudval_t, 1) << 59)
> > +#define PUD_TABLE_UXN                (_AT(pudval_t, 1) << 60)
> >
> >  /*
> >   * Level 2 descriptor (PMD).
> > @@ -131,6 +135,8 @@
> >  #define PMD_SECT_CONT                (_AT(pmdval_t, 1) << 52)
> >  #define PMD_SECT_PXN         (_AT(pmdval_t, 1) << 53)
> >  #define PMD_SECT_UXN         (_AT(pmdval_t, 1) << 54)
> > +#define PMD_TABLE_PXN                (_AT(pmdval_t, 1) << 59)
> > +#define PMD_TABLE_UXN                (_AT(pmdval_t, 1) << 60)
> >
> >  /*
> >   * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > index 029091474042..9de59fce0450 100644
> > --- a/arch/arm64/mm/mmu.c
> > +++ b/arch/arm64/mm/mmu.c
> > @@ -39,6 +39,7 @@
> >
> >  #define NO_BLOCK_MAPPINGS    BIT(0)
> >  #define NO_CONT_MAPPINGS     BIT(1)
> > +#define NO_EXEC_MAPPINGS     BIT(2)
>
> NO_EXEC_MAPPINGS flag basically selects PXX_TABLE_PXN because PXX_TABLE_UXN
> is now a default, which is already included as a protection flag. Should not
> this be renamed as NO_KERNEL_EXEC_MAPPINGS matching what PXX_TABLE_PXN is
> going to achieve.
>

I don't think renaming this flag is going to make things more clear tbh.
> >
> >  u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
> >  u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
> > @@ -185,10 +186,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
> >
> >       BUG_ON(pmd_sect(pmd));
> >       if (pmd_none(pmd)) {
> > +             pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN;
> >               phys_addr_t pte_phys;
> > +
> > +             if (flags & NO_EXEC_MAPPINGS)
> > +                     pmdval |= PMD_TABLE_PXN;
> >               BUG_ON(!pgtable_alloc);
> >               pte_phys = pgtable_alloc(PAGE_SHIFT);
> > -             __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
> > +             __pmd_populate(pmdp, pte_phys, pmdval);
> >               pmd = READ_ONCE(*pmdp);
> >       }
> >       BUG_ON(pmd_bad(pmd));
> > @@ -259,10 +264,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
> >        */
> >       BUG_ON(pud_sect(pud));
> >       if (pud_none(pud)) {
> > +             pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN;
> >               phys_addr_t pmd_phys;
> > +
> > +             if (flags & NO_EXEC_MAPPINGS)
> > +                     pudval |= PUD_TABLE_PXN;
> >               BUG_ON(!pgtable_alloc);
> >               pmd_phys = pgtable_alloc(PMD_SHIFT);
> > -             __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
> > +             __pud_populate(pudp, pmd_phys, pudval);
> >               pud = READ_ONCE(*pudp);
> >       }
> >       BUG_ON(pud_bad(pud));
> > @@ -306,10 +315,14 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
> >       p4d_t p4d = READ_ONCE(*p4dp);
> >
> >       if (p4d_none(p4d)) {
> > +             p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
> >               phys_addr_t pud_phys;
> > +
> > +             if (flags & NO_EXEC_MAPPINGS)
> > +                     p4dval |= P4D_TABLE_PXN;
> >               BUG_ON(!pgtable_alloc);
> >               pud_phys = pgtable_alloc(PUD_SHIFT);
> > -             __p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE);
> > +             __p4d_populate(p4dp, pud_phys, p4dval);
> >               p4d = READ_ONCE(*p4dp);
> >       }
> >       BUG_ON(p4d_bad(p4d));
> > @@ -489,11 +502,11 @@ static void __init map_mem(pgd_t *pgdp)
> >       phys_addr_t kernel_start = __pa_symbol(_stext);
> >       phys_addr_t kernel_end = __pa_symbol(__init_begin);
> >       phys_addr_t start, end;
> > -     int flags = 0;
> > +     int flags = NO_EXEC_MAPPINGS;
> >       u64 i;
> >
> >       if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
> > -             flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> > +             flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> >
> >       /*
> >        * Take care not to create a writable alias for the
> > @@ -1462,7 +1475,7 @@ struct range arch_get_mappable_range(void)
> >  int arch_add_memory(int nid, u64 start, u64 size,
> >                   struct mhp_params *params)
> >  {
> > -     int ret, flags = 0;
> > +     int ret, flags = NO_EXEC_MAPPINGS;
> >
> >       VM_BUG_ON(!mhp_range_allowed(start, size, true));
> >
> > @@ -1472,7 +1485,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
> >        */
> >       if (rodata_full || debug_pagealloc_enabled() ||
> >           IS_ENABLED(CONFIG_KFENCE))
> > -             flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> > +             flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> >
> >       __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
> >                            size, params->pgprot, __pgd_pgtable_alloc,
> >
Anshuman Khandual March 10, 2021, 6:48 a.m. UTC | #4
On 3/9/21 6:06 PM, Ard Biesheuvel wrote:
> On Tue, 9 Mar 2021 at 06:08, Anshuman Khandual
> <anshuman.khandual@arm.com> wrote:
>>
>>
>>
>> On 3/8/21 11:45 PM, Ard Biesheuvel wrote:
>>> The way the arm64 kernel virtual address space is constructed guarantees
>>> that swapper PGD entries are never shared between the linear region on
>>> the one hand, and the vmalloc region on the other, which is where all
>>> kernel text, module text and BPF text mappings reside.
>>
>> While here, wondering if it would be better to validate this assumption
>> via a BUILD_BUG_ON() or something similar ?
>>
> 
> Sure. Something like
> 
> BUILD_BUG_ON(pgd_index(_PAGE_END(VA_BITS_MIN) - 1) ==
> pgd_index(_PAGE_END(VA_BITS_MIN)));
> 
> perhaps?

Yes, looks good. But probably with a comment explaining how this clear
separation in the kernel page table enables the use of hierarchical
permissions structure.

> 
>>>
>>> This means that mappings in the linear region (which never require
>>> executable permissions) never share any table entries at any level with
>>> mappings that do require executable permissions, and so we can set the
>>> table-level PXN attributes for all table entries that are created while
>>> setting up mappings in the linear region. Since swapper's PGD level page
>>> table is mapped r/o itself, this adds another layer of robustness to the
>>> way the kernel manages its own page tables. While at it, set the UXN
>>> attribute as well for all kernel mappings created at boot.
>>>
>> What happens when FEAT_HPDS is implemented and also being enabled ? Would
>> there be any adverse affect here or at least break the assumption that the
>> linear mapping page table entries are safer than before ? Hence, it might
>> be still better to check FEAT_HPDS feature enablement here, even it it is
>> not being used right now.
> 
> I would assume that enabling HPDS is only done to free up those bits
> for some other purpose, and until that time, I don't think we need to
> worry about this, given that the values are just going to be ignored
> otherwise.

Right, it is definitely safe because the values are going to be ignored
otherwise. But would not it be better to have a small comment explaining
this possible interaction with HPDS ?

> 
>>> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
>>> Acked-by: Mark Rutland <mark.rutland@arm.com>
>>> ---
>>>  arch/arm64/include/asm/pgtable-hwdef.h |  6 +++++
>>>  arch/arm64/mm/mmu.c                    | 27 +++++++++++++++-----
>>>  2 files changed, 26 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
>>> index e64e77a345b2..b82575a33f8b 100644
>>> --- a/arch/arm64/include/asm/pgtable-hwdef.h
>>> +++ b/arch/arm64/include/asm/pgtable-hwdef.h
>>> @@ -101,6 +101,8 @@
>>>  #define P4D_TYPE_MASK                (_AT(p4dval_t, 3) << 0)
>>>  #define P4D_TYPE_SECT                (_AT(p4dval_t, 1) << 0)
>>>  #define P4D_SECT_RDONLY              (_AT(p4dval_t, 1) << 7)         /* AP[2] */
>>> +#define P4D_TABLE_PXN                (_AT(p4dval_t, 1) << 59)
>>> +#define P4D_TABLE_UXN                (_AT(p4dval_t, 1) << 60)
>>>
>>>  /*
>>>   * Level 1 descriptor (PUD).
>>> @@ -110,6 +112,8 @@
>>>  #define PUD_TYPE_MASK                (_AT(pudval_t, 3) << 0)
>>>  #define PUD_TYPE_SECT                (_AT(pudval_t, 1) << 0)
>>>  #define PUD_SECT_RDONLY              (_AT(pudval_t, 1) << 7)         /* AP[2] */
>>> +#define PUD_TABLE_PXN                (_AT(pudval_t, 1) << 59)
>>> +#define PUD_TABLE_UXN                (_AT(pudval_t, 1) << 60)
>>>
>>>  /*
>>>   * Level 2 descriptor (PMD).
>>> @@ -131,6 +135,8 @@
>>>  #define PMD_SECT_CONT                (_AT(pmdval_t, 1) << 52)
>>>  #define PMD_SECT_PXN         (_AT(pmdval_t, 1) << 53)
>>>  #define PMD_SECT_UXN         (_AT(pmdval_t, 1) << 54)
>>> +#define PMD_TABLE_PXN                (_AT(pmdval_t, 1) << 59)
>>> +#define PMD_TABLE_UXN                (_AT(pmdval_t, 1) << 60)
>>>
>>>  /*
>>>   * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
>>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>>> index 029091474042..9de59fce0450 100644
>>> --- a/arch/arm64/mm/mmu.c
>>> +++ b/arch/arm64/mm/mmu.c
>>> @@ -39,6 +39,7 @@
>>>
>>>  #define NO_BLOCK_MAPPINGS    BIT(0)
>>>  #define NO_CONT_MAPPINGS     BIT(1)
>>> +#define NO_EXEC_MAPPINGS     BIT(2)
>>
>> NO_EXEC_MAPPINGS flag basically selects PXX_TABLE_PXN because PXX_TABLE_UXN
>> is now a default, which is already included as a protection flag. Should not
>> this be renamed as NO_KERNEL_EXEC_MAPPINGS matching what PXX_TABLE_PXN is
>> going to achieve.
>>
> 
> I don't think renaming this flag is going to make things more clear tbh.

Alright, no problem.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index e64e77a345b2..b82575a33f8b 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -101,6 +101,8 @@ 
 #define P4D_TYPE_MASK		(_AT(p4dval_t, 3) << 0)
 #define P4D_TYPE_SECT		(_AT(p4dval_t, 1) << 0)
 #define P4D_SECT_RDONLY		(_AT(p4dval_t, 1) << 7)		/* AP[2] */
+#define P4D_TABLE_PXN		(_AT(p4dval_t, 1) << 59)
+#define P4D_TABLE_UXN		(_AT(p4dval_t, 1) << 60)
 
 /*
  * Level 1 descriptor (PUD).
@@ -110,6 +112,8 @@ 
 #define PUD_TYPE_MASK		(_AT(pudval_t, 3) << 0)
 #define PUD_TYPE_SECT		(_AT(pudval_t, 1) << 0)
 #define PUD_SECT_RDONLY		(_AT(pudval_t, 1) << 7)		/* AP[2] */
+#define PUD_TABLE_PXN		(_AT(pudval_t, 1) << 59)
+#define PUD_TABLE_UXN		(_AT(pudval_t, 1) << 60)
 
 /*
  * Level 2 descriptor (PMD).
@@ -131,6 +135,8 @@ 
 #define PMD_SECT_CONT		(_AT(pmdval_t, 1) << 52)
 #define PMD_SECT_PXN		(_AT(pmdval_t, 1) << 53)
 #define PMD_SECT_UXN		(_AT(pmdval_t, 1) << 54)
+#define PMD_TABLE_PXN		(_AT(pmdval_t, 1) << 59)
+#define PMD_TABLE_UXN		(_AT(pmdval_t, 1) << 60)
 
 /*
  * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 029091474042..9de59fce0450 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -39,6 +39,7 @@ 
 
 #define NO_BLOCK_MAPPINGS	BIT(0)
 #define NO_CONT_MAPPINGS	BIT(1)
+#define NO_EXEC_MAPPINGS	BIT(2)
 
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
@@ -185,10 +186,14 @@  static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
 
 	BUG_ON(pmd_sect(pmd));
 	if (pmd_none(pmd)) {
+		pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN;
 		phys_addr_t pte_phys;
+
+		if (flags & NO_EXEC_MAPPINGS)
+			pmdval |= PMD_TABLE_PXN;
 		BUG_ON(!pgtable_alloc);
 		pte_phys = pgtable_alloc(PAGE_SHIFT);
-		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+		__pmd_populate(pmdp, pte_phys, pmdval);
 		pmd = READ_ONCE(*pmdp);
 	}
 	BUG_ON(pmd_bad(pmd));
@@ -259,10 +264,14 @@  static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
 	 */
 	BUG_ON(pud_sect(pud));
 	if (pud_none(pud)) {
+		pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN;
 		phys_addr_t pmd_phys;
+
+		if (flags & NO_EXEC_MAPPINGS)
+			pudval |= PUD_TABLE_PXN;
 		BUG_ON(!pgtable_alloc);
 		pmd_phys = pgtable_alloc(PMD_SHIFT);
-		__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+		__pud_populate(pudp, pmd_phys, pudval);
 		pud = READ_ONCE(*pudp);
 	}
 	BUG_ON(pud_bad(pud));
@@ -306,10 +315,14 @@  static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 	p4d_t p4d = READ_ONCE(*p4dp);
 
 	if (p4d_none(p4d)) {
+		p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
 		phys_addr_t pud_phys;
+
+		if (flags & NO_EXEC_MAPPINGS)
+			p4dval |= P4D_TABLE_PXN;
 		BUG_ON(!pgtable_alloc);
 		pud_phys = pgtable_alloc(PUD_SHIFT);
-		__p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE);
+		__p4d_populate(p4dp, pud_phys, p4dval);
 		p4d = READ_ONCE(*p4dp);
 	}
 	BUG_ON(p4d_bad(p4d));
@@ -489,11 +502,11 @@  static void __init map_mem(pgd_t *pgdp)
 	phys_addr_t kernel_start = __pa_symbol(_stext);
 	phys_addr_t kernel_end = __pa_symbol(__init_begin);
 	phys_addr_t start, end;
-	int flags = 0;
+	int flags = NO_EXEC_MAPPINGS;
 	u64 i;
 
 	if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
-		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	/*
 	 * Take care not to create a writable alias for the
@@ -1462,7 +1475,7 @@  struct range arch_get_mappable_range(void)
 int arch_add_memory(int nid, u64 start, u64 size,
 		    struct mhp_params *params)
 {
-	int ret, flags = 0;
+	int ret, flags = NO_EXEC_MAPPINGS;
 
 	VM_BUG_ON(!mhp_range_allowed(start, size, true));
 
@@ -1472,7 +1485,7 @@  int arch_add_memory(int nid, u64 start, u64 size,
 	 */
 	if (rodata_full || debug_pagealloc_enabled() ||
 	    IS_ENABLED(CONFIG_KFENCE))
-		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
 			     size, params->pgprot, __pgd_pgtable_alloc,