Message ID | 1407949593-16121-8-git-send-email-keescook@chromium.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Aug 13, 2014 at 06:06:32PM +0100, Kees Cook wrote: > Adds CONFIG_ARM_KERNMEM_PERMS to separate the kernel memory regions > into section-sized areas that can have different permisions. Performs > the NX permission changes during free_initmem, so that init memory can be > reclaimed. > > This uses section size instead of PMD size to reduce memory lost to > padding on non-LPAE systems. > > Based on work by Brad Spengler, Larry Bassel, and Laura Abbott. [...] > diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S > index 6f57cb94367f..a3d07ca2bbb4 100644 > --- a/arch/arm/kernel/vmlinux.lds.S > +++ b/arch/arm/kernel/vmlinux.lds.S > @@ -8,6 +8,9 @@ > #include <asm/thread_info.h> > #include <asm/memory.h> > #include <asm/page.h> > +#ifdef CONFIG_ARM_KERNMEM_PERMS > +#include <asm/pgtable.h> > +#endif > > #define PROC_INFO \ > . = ALIGN(4); \ > @@ -90,6 +93,11 @@ SECTIONS > _text = .; > HEAD_TEXT > } > + > +#ifdef CONFIG_ARM_KERNMEM_PERMS > + . = ALIGN(1<<SECTION_SHIFT); > +#endif > + > .text : { /* Real text segment */ > _stext = .; /* Text and read-only data */ > __exception_text_start = .; > @@ -145,7 +153,11 @@ SECTIONS > _etext = .; /* End of text and rodata section */ > > #ifndef CONFIG_XIP_KERNEL > +# ifdef CONFIG_ARM_KERNMEM_PERMS > + . = ALIGN(1<<SECTION_SHIFT); > +# else > . = ALIGN(PAGE_SIZE); > +# endif This might be cleaner if we had a single macro (ALIGN_MIN?) that expanded to ALIGN(1 << SECTION_SHIFT) #ifdef CONFIG_ARM_KERNMEM_PERMS. > diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c > index ad82c05bfc3a..ccf392ef40d4 100644 > --- a/arch/arm/mm/init.c > +++ b/arch/arm/mm/init.c > @@ -32,6 +32,11 @@ > #include <asm/tlb.h> > #include <asm/fixmap.h> > > +#ifdef CONFIG_ARM_KERNMEM_PERMS > +#include <asm/system_info.h> > +#include <asm/cp15.h> > +#endif We already #include cp15.h in this file. If you need system_info.h, I don't think you should bother making the include conditional. > +/* > + * Updates section permissions only for the current mm (sections are > + * copied into each mm). During startup, this is the init_mm. > + */ > +static inline void section_update(unsigned long addr, pmdval_t mask, > + pmdval_t prot) > +{ > + struct mm_struct *mm; > + pmd_t *pmd; > + > + mm = current->active_mm; > + pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); > + > +#ifdef CONFIG_ARM_LPAE > + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); > +#else > + if (addr & SECTION_SIZE) > + pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot); > + else > + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); > +#endif > + flush_pmd_entry(pmd); > + local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE); Why only a local flush? You're changing global mappings here, right? > +} > + > +/* Make sure extended page tables are in use. */ > +static inline bool arch_has_strict_perms(void) > +{ > + unsigned int cr; > + > + if (cpu_architecture() < CPU_ARCH_ARMv6) > + return false; > + > + cr = get_cr(); > + if (!(cr & CR_XP)) > + return false; > + > + return true; return !!(get_cr() & CR_XP)? > +} > + > +#define set_section_perms(perms, field) { \ > + size_t i; \ > + unsigned long addr; \ > + \ > + if (!arch_has_strict_perms()) \ > + return; \ > + \ > + for (i = 0; i < ARRAY_SIZE(perms); i++) { \ > + if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || \ > + !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { \ > + pr_err("BUG: section %lx-%lx not aligned to %lx\n", \ > + perms[i].start, perms[i].end, \ > + SECTION_SIZE); \ > + continue; \ > + } \ > + \ > + for (addr = perms[i].start; \ > + addr < perms[i].end; \ > + addr += SECTION_SIZE) \ > + section_update(addr, perms[i].mask, \ > + perms[i].field); \ > + } \ > +} > + > +static inline void fix_kernmem_perms(void) > +{ > + set_section_perms(nx_perms, prot); > +} > +#else > +static inline void fix_kernmem_perms(void) { } > +#endif /* CONFIG_ARM_KERNMEM_PERMS */ > + > void free_initmem(void) > { > #ifdef CONFIG_HAVE_TCM > extern char __tcm_start, __tcm_end; > +#endif > + > + fix_kernmem_perms(); > > +#ifdef CONFIG_HAVE_TCM You could avoid the double #ifdef by moving the tcm stuff into another function (free_tcmmem?) Will
On Tue, Aug 19, 2014 at 7:33 AM, Will Deacon <will.deacon@arm.com> wrote: > On Wed, Aug 13, 2014 at 06:06:32PM +0100, Kees Cook wrote: >> Adds CONFIG_ARM_KERNMEM_PERMS to separate the kernel memory regions >> into section-sized areas that can have different permisions. Performs >> the NX permission changes during free_initmem, so that init memory can be >> reclaimed. >> >> This uses section size instead of PMD size to reduce memory lost to >> padding on non-LPAE systems. >> >> Based on work by Brad Spengler, Larry Bassel, and Laura Abbott. > > [...] > >> diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S >> index 6f57cb94367f..a3d07ca2bbb4 100644 >> --- a/arch/arm/kernel/vmlinux.lds.S >> +++ b/arch/arm/kernel/vmlinux.lds.S >> @@ -8,6 +8,9 @@ >> #include <asm/thread_info.h> >> #include <asm/memory.h> >> #include <asm/page.h> >> +#ifdef CONFIG_ARM_KERNMEM_PERMS >> +#include <asm/pgtable.h> >> +#endif >> >> #define PROC_INFO \ >> . = ALIGN(4); \ >> @@ -90,6 +93,11 @@ SECTIONS >> _text = .; >> HEAD_TEXT >> } >> + >> +#ifdef CONFIG_ARM_KERNMEM_PERMS >> + . = ALIGN(1<<SECTION_SHIFT); >> +#endif >> + >> .text : { /* Real text segment */ >> _stext = .; /* Text and read-only data */ >> __exception_text_start = .; >> @@ -145,7 +153,11 @@ SECTIONS >> _etext = .; /* End of text and rodata section */ >> >> #ifndef CONFIG_XIP_KERNEL >> +# ifdef CONFIG_ARM_KERNMEM_PERMS >> + . = ALIGN(1<<SECTION_SHIFT); >> +# else >> . = ALIGN(PAGE_SIZE); >> +# endif > > This might be cleaner if we had a single macro (ALIGN_MIN?) that expanded to > ALIGN(1 << SECTION_SHIFT) #ifdef CONFIG_ARM_KERNMEM_PERMS. I didn't see a cleaner way to do this, since some times it's optional (no alignment instead of different alignment), and sometimes it's not controlled by _KERNMEM_PERMS (i.e. sometimes it's _DEBUG_RODATA). > >> diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c >> index ad82c05bfc3a..ccf392ef40d4 100644 >> --- a/arch/arm/mm/init.c >> +++ b/arch/arm/mm/init.c >> @@ -32,6 +32,11 @@ >> #include <asm/tlb.h> >> #include <asm/fixmap.h> >> >> +#ifdef CONFIG_ARM_KERNMEM_PERMS >> +#include <asm/system_info.h> >> +#include <asm/cp15.h> >> +#endif > > We already #include cp15.h in this file. If you need system_info.h, I don't > think you should bother making the include conditional. Ah, yes. I will fix this. > >> +/* >> + * Updates section permissions only for the current mm (sections are >> + * copied into each mm). During startup, this is the init_mm. >> + */ >> +static inline void section_update(unsigned long addr, pmdval_t mask, >> + pmdval_t prot) >> +{ >> + struct mm_struct *mm; >> + pmd_t *pmd; >> + >> + mm = current->active_mm; >> + pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); >> + >> +#ifdef CONFIG_ARM_LPAE >> + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); >> +#else >> + if (addr & SECTION_SIZE) >> + pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot); >> + else >> + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); >> +#endif >> + flush_pmd_entry(pmd); >> + local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE); > > Why only a local flush? You're changing global mappings here, right? Yes, but with the a15 errata, it cannot use a global flush. As a result, section_update can only be used by a single CPU which is how the usage is managed. Perhaps I should add some comments to that effect? (There was a thread a few months ago on this problem and this shook out as a solution.) > >> +} >> + >> +/* Make sure extended page tables are in use. */ >> +static inline bool arch_has_strict_perms(void) >> +{ >> + unsigned int cr; >> + >> + if (cpu_architecture() < CPU_ARCH_ARMv6) >> + return false; >> + >> + cr = get_cr(); >> + if (!(cr & CR_XP)) >> + return false; >> + >> + return true; > > return !!(get_cr() & CR_XP)? Sure, I can reduce this. > >> +} >> + >> +#define set_section_perms(perms, field) { \ >> + size_t i; \ >> + unsigned long addr; \ >> + \ >> + if (!arch_has_strict_perms()) \ >> + return; \ >> + \ >> + for (i = 0; i < ARRAY_SIZE(perms); i++) { \ >> + if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || \ >> + !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { \ >> + pr_err("BUG: section %lx-%lx not aligned to %lx\n", \ >> + perms[i].start, perms[i].end, \ >> + SECTION_SIZE); \ >> + continue; \ >> + } \ >> + \ >> + for (addr = perms[i].start; \ >> + addr < perms[i].end; \ >> + addr += SECTION_SIZE) \ >> + section_update(addr, perms[i].mask, \ >> + perms[i].field); \ >> + } \ >> +} >> + >> +static inline void fix_kernmem_perms(void) >> +{ >> + set_section_perms(nx_perms, prot); >> +} >> +#else >> +static inline void fix_kernmem_perms(void) { } >> +#endif /* CONFIG_ARM_KERNMEM_PERMS */ >> + >> void free_initmem(void) >> { >> #ifdef CONFIG_HAVE_TCM >> extern char __tcm_start, __tcm_end; >> +#endif >> + >> + fix_kernmem_perms(); >> >> +#ifdef CONFIG_HAVE_TCM > > You could avoid the double #ifdef by moving the tcm stuff into another > function (free_tcmmem?) Sure, I can do that. Thanks! -Kees
On Wed, Aug 20, 2014 at 01:37:14PM +0100, Kees Cook wrote: > On Tue, Aug 19, 2014 at 7:33 AM, Will Deacon <will.deacon@arm.com> wrote: > > On Wed, Aug 13, 2014 at 06:06:32PM +0100, Kees Cook wrote: > >> +/* > >> + * Updates section permissions only for the current mm (sections are > >> + * copied into each mm). During startup, this is the init_mm. > >> + */ > >> +static inline void section_update(unsigned long addr, pmdval_t mask, > >> + pmdval_t prot) > >> +{ > >> + struct mm_struct *mm; > >> + pmd_t *pmd; > >> + > >> + mm = current->active_mm; > >> + pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); > >> + > >> +#ifdef CONFIG_ARM_LPAE > >> + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); > >> +#else > >> + if (addr & SECTION_SIZE) > >> + pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot); > >> + else > >> + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); > >> +#endif > >> + flush_pmd_entry(pmd); > >> + local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE); > > > > Why only a local flush? You're changing global mappings here, right? > > Yes, but with the a15 errata, it cannot use a global flush. As a > result, section_update can only be used by a single CPU which is how > the usage is managed. Perhaps I should add some comments to that > effect? (There was a thread a few months ago on this problem and this > shook out as a solution.) Hmm, so do you mandate that preemption is disabled during sections of code where the permissions must be changed after boot? (e.g. ftrace patching) Will
On Tue, Aug 26, 2014 at 7:43 AM, Will Deacon <will.deacon@arm.com> wrote: > On Wed, Aug 20, 2014 at 01:37:14PM +0100, Kees Cook wrote: >> On Tue, Aug 19, 2014 at 7:33 AM, Will Deacon <will.deacon@arm.com> wrote: >> > On Wed, Aug 13, 2014 at 06:06:32PM +0100, Kees Cook wrote: >> >> +/* >> >> + * Updates section permissions only for the current mm (sections are >> >> + * copied into each mm). During startup, this is the init_mm. >> >> + */ >> >> +static inline void section_update(unsigned long addr, pmdval_t mask, >> >> + pmdval_t prot) >> >> +{ >> >> + struct mm_struct *mm; >> >> + pmd_t *pmd; >> >> + >> >> + mm = current->active_mm; >> >> + pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); >> >> + >> >> +#ifdef CONFIG_ARM_LPAE >> >> + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); >> >> +#else >> >> + if (addr & SECTION_SIZE) >> >> + pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot); >> >> + else >> >> + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); >> >> +#endif >> >> + flush_pmd_entry(pmd); >> >> + local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE); >> > >> > Why only a local flush? You're changing global mappings here, right? >> >> Yes, but with the a15 errata, it cannot use a global flush. As a >> result, section_update can only be used by a single CPU which is how >> the usage is managed. Perhaps I should add some comments to that >> effect? (There was a thread a few months ago on this problem and this >> shook out as a solution.) > > Hmm, so do you mandate that preemption is disabled during sections of code > where the permissions must be changed after boot? (e.g. ftrace patching) I guess it's not enforced (how do I make sure preempt is off?), but it's only used during the ftrace patching. I'm open to suggestions! (I had avoided going the fixmap route since I would have expected that to be much slower.) -Kees
On Fri, Aug 29, 2014 at 09:04:41AM -0700, Kees Cook wrote: > On Tue, Aug 26, 2014 at 7:43 AM, Will Deacon <will.deacon@arm.com> wrote: > > Hmm, so do you mandate that preemption is disabled during sections of code > > where the permissions must be changed after boot? (e.g. ftrace patching) > > I guess it's not enforced (how do I make sure preempt is off?), but > it's only used during the ftrace patching. I'm open to suggestions! (I > had avoided going the fixmap route since I would have expected that to > be much slower.) For ftrace patching this is called under stop_machine() so preemption is already off there.
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 6f57cb94367f..a3d07ca2bbb4 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -8,6 +8,9 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> +#ifdef CONFIG_ARM_KERNMEM_PERMS +#include <asm/pgtable.h> +#endif #define PROC_INFO \ . = ALIGN(4); \ @@ -90,6 +93,11 @@ SECTIONS _text = .; HEAD_TEXT } + +#ifdef CONFIG_ARM_KERNMEM_PERMS + . = ALIGN(1<<SECTION_SHIFT); +#endif + .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -145,7 +153,11 @@ SECTIONS _etext = .; /* End of text and rodata section */ #ifndef CONFIG_XIP_KERNEL +# ifdef CONFIG_ARM_KERNMEM_PERMS + . = ALIGN(1<<SECTION_SHIFT); +# else . = ALIGN(PAGE_SIZE); +# endif __init_begin = .; #endif /* @@ -220,7 +232,12 @@ SECTIONS . = PAGE_OFFSET + TEXT_OFFSET; #else __init_end = .; + +#ifdef CONFIG_ARM_KERNMEM_PERMS + . = ALIGN(1<<SECTION_SHIFT); +#else . = ALIGN(THREAD_SIZE); +#endif __data_loc = .; #endif diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index ae69809a9e47..7a0756df91a2 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1008,3 +1008,12 @@ config ARCH_SUPPORTS_BIG_ENDIAN help This option specifies the architecture can support big endian operation. + +config ARM_KERNMEM_PERMS + bool "Restrict kernel memory permissions" + help + If this is set, kernel memory other than kernel text (and rodata) + will be made non-executable. The tradeoff is that each region is + padded to section-size (1MiB) boundaries (because their permissions + are different and splitting the 1M pages into 4K ones causes TLB + performance problems), wasting memory. diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index ad82c05bfc3a..ccf392ef40d4 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -32,6 +32,11 @@ #include <asm/tlb.h> #include <asm/fixmap.h> +#ifdef CONFIG_ARM_KERNMEM_PERMS +#include <asm/system_info.h> +#include <asm/cp15.h> +#endif + #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -615,11 +620,112 @@ void __init mem_init(void) } } +#ifdef CONFIG_ARM_KERNMEM_PERMS +struct section_perm { + unsigned long start; + unsigned long end; + pmdval_t mask; + pmdval_t prot; +}; + +struct section_perm nx_perms[] = { + /* Make pages tables, etc before _stext RW (set NX). */ + { + .start = PAGE_OFFSET, + .end = (unsigned long)_stext, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, + /* Make init RW (set NX). */ + { + .start = (unsigned long)__init_begin, + .end = (unsigned long)_sdata, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, +}; + +/* + * Updates section permissions only for the current mm (sections are + * copied into each mm). During startup, this is the init_mm. + */ +static inline void section_update(unsigned long addr, pmdval_t mask, + pmdval_t prot) +{ + struct mm_struct *mm; + pmd_t *pmd; + + mm = current->active_mm; + pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); + +#ifdef CONFIG_ARM_LPAE + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); +#else + if (addr & SECTION_SIZE) + pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot); + else + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); +#endif + flush_pmd_entry(pmd); + local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE); +} + +/* Make sure extended page tables are in use. */ +static inline bool arch_has_strict_perms(void) +{ + unsigned int cr; + + if (cpu_architecture() < CPU_ARCH_ARMv6) + return false; + + cr = get_cr(); + if (!(cr & CR_XP)) + return false; + + return true; +} + +#define set_section_perms(perms, field) { \ + size_t i; \ + unsigned long addr; \ + \ + if (!arch_has_strict_perms()) \ + return; \ + \ + for (i = 0; i < ARRAY_SIZE(perms); i++) { \ + if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || \ + !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { \ + pr_err("BUG: section %lx-%lx not aligned to %lx\n", \ + perms[i].start, perms[i].end, \ + SECTION_SIZE); \ + continue; \ + } \ + \ + for (addr = perms[i].start; \ + addr < perms[i].end; \ + addr += SECTION_SIZE) \ + section_update(addr, perms[i].mask, \ + perms[i].field); \ + } \ +} + +static inline void fix_kernmem_perms(void) +{ + set_section_perms(nx_perms, prot); +} +#else +static inline void fix_kernmem_perms(void) { } +#endif /* CONFIG_ARM_KERNMEM_PERMS */ + void free_initmem(void) { #ifdef CONFIG_HAVE_TCM extern char __tcm_start, __tcm_end; +#endif + + fix_kernmem_perms(); +#ifdef CONFIG_HAVE_TCM poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link"); #endif diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 94eecb913f15..c9227bf59b45 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1368,13 +1368,24 @@ static void __init map_lowmem(void) if (start >= end) break; - if (end < kernel_x_start || start >= kernel_x_end) { + if (end < kernel_x_start) { map.pfn = __phys_to_pfn(start); map.virtual = __phys_to_virt(start); map.length = end - start; map.type = MT_MEMORY_RWX; create_mapping(&map); + } else if (start >= kernel_x_end) { + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; +#ifdef CONFIG_ARM_KERNMEM_PERMS + map.type = MT_MEMORY_RW; +#else + map.type = MT_MEMORY_RWX; +#endif + + create_mapping(&map); } else { /* This better cover the entire kernel */ if (start < kernel_x_start) {