Message ID | 1490884982-5066-1-git-send-email-hoeun.ryu@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Mar 30, 2017 at 7:39 AM, Hoeun Ryu <hoeun.ryu@gmail.com> wrote: > This patch might be a part of Kees Cook's rare_write infrastructure series > for [1] for arm64 architecture. > > This implementation is based on Mark Rutland's suggestions [2], which is > that a special userspace mm that maps only __start/end_rodata as RW permi- > ssion is prepared during early boot time (paging_init) and __arch_rare_- > write_begin() switches to the mm [2]. > > rare_write_mm address space is added for the special purpose and a page > global directory is also prepared for it. The mm remaps __start_rodata ~ > __end_rodata to rodata_rw_alias_start which starts from TASK_SIZE_64 / 4 > + kaslr_offset(). > > It passes LKDTM's rare write test. Great work! I think this will need some further changes, though, since it doesn't look to me like this would pass LKDTM's tests if it was built as a module. (This is missing from my ARM attempt too... I haven't figured out how to set the domain on the kernel modules...) > > [1] : http://www.openwall.com/lists/kernel-hardening/2017/02/27/5 > [2] : https://lkml.org/lkml/2017/3/29/704 > > Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com> > --- > arch/arm64/Kconfig | 2 + > arch/arm64/include/asm/pgtable.h | 4 ++ > arch/arm64/mm/mmu.c | 101 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 107 insertions(+) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index f2b0b52..6e2c592 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -102,6 +102,8 @@ config ARM64 > select HAVE_SYSCALL_TRACEPOINTS > select HAVE_KPROBES > select HAVE_KRETPROBES > + select HAVE_ARCH_RARE_WRITE > + select HAVE_ARCH_RARE_WRITE_MEMCPY > select IOMMU_DMA if IOMMU_SUPPORT > select IRQ_DOMAIN > select IRQ_FORCED_THREADING > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index c213fdbd0..1514933 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -741,6 +741,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, > #define kc_vaddr_to_offset(v) ((v) & ~VA_START) > #define kc_offset_to_vaddr(o) ((o) | VA_START) > > +unsigned long __arch_rare_write_begin(void); > +unsigned long __arch_rare_write_end(void); > +void __arch_rare_write_memcpy(void *dst, const void *src, __kernel_size_t len); > + > #endif /* !__ASSEMBLY__ */ > > #endif /* __ASM_PGTABLE_H */ > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index 91502e3..86b25c9 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -570,6 +570,105 @@ static void __init map_kernel(pgd_t *pgd) > kasan_copy_shadow(pgd); > } > > +struct mm_struct rare_write_mm = { > + .mm_rb = RB_ROOT, > + .mm_users = ATOMIC_INIT(2), > + .mm_count = ATOMIC_INIT(1), > + .mmap_sem = __RWSEM_INITIALIZER(rare_write_mm.mmap_sem), > + .page_table_lock= __SPIN_LOCK_UNLOCKED(rare_write_mm.page_table_lock), > + .mmlist = LIST_HEAD_INIT(rare_write_mm.mmlist), > +}; > + > +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS > +#include <asm/ptdump.h> > + > +static struct ptdump_info rare_write_ptdump_info = { > + .mm = &rare_write_mm, > + .markers = (struct addr_marker[]){ > + { 0, "rare-write start" }, > + { TASK_SIZE_64, "rare-write end" } > + }, > + .base_addr = 0, > +}; > + > +static int __init ptdump_init(void) > +{ > + return ptdump_debugfs_register(&rare_write_ptdump_info, > + "rare_write_page_tables"); > +} > +device_initcall(ptdump_init); > + > +#endif > + > +__always_inline unsigned long __arch_rare_write_begin(void) > +{ > + struct mm_struct *mm = &rare_write_mm; > + > + preempt_disable(); > + > + __switch_mm(mm); Can you include a BUG_ON check here that rare_write_mm is not already active? > + > + if (system_uses_ttbr0_pan()) { > + update_saved_ttbr0(current, mm); > + cpu_switch_mm(mm->pgd, mm); > + } > + > + return 0; > +} > + > +__always_inline unsigned long __arch_rare_write_end(void) > +{ > + struct mm_struct *mm = current->active_mm; > + > + __switch_mm(mm); And same here (though the reverse test)? > + > + if (system_uses_ttbr0_pan()) { > + cpu_set_reserved_ttbr0(); > + if (mm != &init_mm) > + update_saved_ttbr0(current, mm); > + } > + > + preempt_enable_no_resched(); > + > + return 0; > +} > + > +static unsigned long rodata_rw_alias_start __ro_after_init = TASK_SIZE_64 / 4; > + > +__always_inline void __arch_rare_write_memcpy(void *dst, const void *src, > + __kernel_size_t len) > +{ > + unsigned long __dst = (unsigned long)dst; > + > + __dst -= (unsigned long)__start_rodata; > + __dst += rodata_rw_alias_start; > + > + memcpy((void *)__dst, src, len); > +} > + > +void __init rare_write_init(void) > +{ > + phys_addr_t pgd_phys = early_pgtable_alloc(); > + pgd_t *pgd = (pgd_t *)__phys_to_virt(pgd_phys); > + phys_addr_t pa_start = __pa_symbol(__start_rodata); > + unsigned long size = __end_rodata - __start_rodata; > + > + BUG_ON(!PAGE_ALIGNED(pa_start)); > + BUG_ON(!PAGE_ALIGNED(size)); > + > + rodata_rw_alias_start += kaslr_offset(); > + > + BUG_ON(rodata_rw_alias_start + size > TASK_SIZE_64); > + > + rare_write_mm.pgd = pgd; > + init_new_context(NULL, &rare_write_mm); > + > + __create_pgd_mapping(pgd, > + pa_start, rodata_rw_alias_start, size, > + __pgprot(pgprot_val(PAGE_KERNEL) | PTE_NG), > + early_pgtable_alloc, debug_pagealloc_enabled()); > +} > + > /* > * paging_init() sets up the page tables, initialises the zone memory > * maps and sets up the zero page. > @@ -603,6 +702,8 @@ void __init paging_init(void) > */ > memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, > SWAPPER_DIR_SIZE - PAGE_SIZE); > + > + rare_write_init(); > } > > /* > -- > 2.7.4 > Thanks! -Kees
On Thu, Mar 30, 2017 at 12:38:15PM -0700, Kees Cook wrote: > Great work! I think this will need some further changes, though, since > it doesn't look to me like this would pass LKDTM's tests if it was > built as a module. (This is missing from my ARM attempt too... I > haven't figured out how to set the domain on the kernel modules...) You're not going to be able to do it very easily. The only way I can think of achieving it would be to split the module area into one chunk for text, one chunk for write-rare and one chunk for data. I still think that using domains is a mistake for this - it's a good solution for things that are contiguous and big (like userspace), but not for small amounts of data (like module sections.)
On Thu, Mar 30, 2017 at 12:45 PM, Russell King - ARM Linux <linux@armlinux.org.uk> wrote: > On Thu, Mar 30, 2017 at 12:38:15PM -0700, Kees Cook wrote: >> Great work! I think this will need some further changes, though, since >> it doesn't look to me like this would pass LKDTM's tests if it was >> built as a module. (This is missing from my ARM attempt too... I >> haven't figured out how to set the domain on the kernel modules...) > > You're not going to be able to do it very easily. The only way I can > think of achieving it would be to split the module area into one > chunk for text, one chunk for write-rare and one chunk for data. Well, my intention was to just make the entire module area DOMAIN_WR_RARE. It's overly permissive in the sense that non-data changes could be made, but this is already an improvement over either not having this feature at all or x86's version which makes all of RAM writable. -Kees
On 30 March 2017 at 15:39, Hoeun Ryu <hoeun.ryu@gmail.com> wrote: > This patch might be a part of Kees Cook's rare_write infrastructure series > for [1] for arm64 architecture. > > This implementation is based on Mark Rutland's suggestions [2], which is > that a special userspace mm that maps only __start/end_rodata as RW permi- > ssion is prepared during early boot time (paging_init) and __arch_rare_- > write_begin() switches to the mm [2]. > > rare_write_mm address space is added for the special purpose and a page > global directory is also prepared for it. The mm remaps __start_rodata ~ > __end_rodata to rodata_rw_alias_start which starts from TASK_SIZE_64 / 4 > + kaslr_offset(). > > It passes LKDTM's rare write test. > > [1] : http://www.openwall.com/lists/kernel-hardening/2017/02/27/5 > [2] : https://lkml.org/lkml/2017/3/29/704 > > Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com> > --- > arch/arm64/Kconfig | 2 + > arch/arm64/include/asm/pgtable.h | 4 ++ > arch/arm64/mm/mmu.c | 101 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 107 insertions(+) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index f2b0b52..6e2c592 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -102,6 +102,8 @@ config ARM64 > select HAVE_SYSCALL_TRACEPOINTS > select HAVE_KPROBES > select HAVE_KRETPROBES > + select HAVE_ARCH_RARE_WRITE > + select HAVE_ARCH_RARE_WRITE_MEMCPY > select IOMMU_DMA if IOMMU_SUPPORT > select IRQ_DOMAIN > select IRQ_FORCED_THREADING > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index c213fdbd0..1514933 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -741,6 +741,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, > #define kc_vaddr_to_offset(v) ((v) & ~VA_START) > #define kc_offset_to_vaddr(o) ((o) | VA_START) > > +unsigned long __arch_rare_write_begin(void); > +unsigned long __arch_rare_write_end(void); > +void __arch_rare_write_memcpy(void *dst, const void *src, __kernel_size_t len); > + If these hook into a generic framework, shouldn't these already be declared in a generic header file? > #endif /* !__ASSEMBLY__ */ > > #endif /* __ASM_PGTABLE_H */ > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index 91502e3..86b25c9 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -570,6 +570,105 @@ static void __init map_kernel(pgd_t *pgd) > kasan_copy_shadow(pgd); > } > > +struct mm_struct rare_write_mm = { static please > + .mm_rb = RB_ROOT, > + .mm_users = ATOMIC_INIT(2), > + .mm_count = ATOMIC_INIT(1), > + .mmap_sem = __RWSEM_INITIALIZER(rare_write_mm.mmap_sem), > + .page_table_lock= __SPIN_LOCK_UNLOCKED(rare_write_mm.page_table_lock), > + .mmlist = LIST_HEAD_INIT(rare_write_mm.mmlist), > +}; > + > +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS > +#include <asm/ptdump.h> > + > +static struct ptdump_info rare_write_ptdump_info = { > + .mm = &rare_write_mm, > + .markers = (struct addr_marker[]){ > + { 0, "rare-write start" }, > + { TASK_SIZE_64, "rare-write end" } > + }, > + .base_addr = 0, > +}; > + > +static int __init ptdump_init(void) > +{ > + return ptdump_debugfs_register(&rare_write_ptdump_info, > + "rare_write_page_tables"); > +} > +device_initcall(ptdump_init); > + > +#endif > + > +__always_inline unsigned long __arch_rare_write_begin(void) These functions are not called from the same compilation unit, so what do you think __always_inline buys you here? > +{ > + struct mm_struct *mm = &rare_write_mm; > + > + preempt_disable(); > + > + __switch_mm(mm); > + > + if (system_uses_ttbr0_pan()) { > + update_saved_ttbr0(current, mm); > + cpu_switch_mm(mm->pgd, mm); > + } > + > + return 0; > +} > + > +__always_inline unsigned long __arch_rare_write_end(void) > +{ > + struct mm_struct *mm = current->active_mm; > + > + __switch_mm(mm); > + > + if (system_uses_ttbr0_pan()) { > + cpu_set_reserved_ttbr0(); > + if (mm != &init_mm) > + update_saved_ttbr0(current, mm); > + } > + > + preempt_enable_no_resched(); > + > + return 0; > +} > + > +static unsigned long rodata_rw_alias_start __ro_after_init = TASK_SIZE_64 / 4; > + > +__always_inline void __arch_rare_write_memcpy(void *dst, const void *src, > + __kernel_size_t len) > +{ > + unsigned long __dst = (unsigned long)dst; > + > + __dst -= (unsigned long)__start_rodata; > + __dst += rodata_rw_alias_start; > + > + memcpy((void *)__dst, src, len); > +} > + > +void __init rare_write_init(void) static please > +{ > + phys_addr_t pgd_phys = early_pgtable_alloc(); > + pgd_t *pgd = (pgd_t *)__phys_to_virt(pgd_phys); > + phys_addr_t pa_start = __pa_symbol(__start_rodata); > + unsigned long size = __end_rodata - __start_rodata; > + > + BUG_ON(!PAGE_ALIGNED(pa_start)); > + BUG_ON(!PAGE_ALIGNED(size)); > + > + rodata_rw_alias_start += kaslr_offset(); > + This places the rodata alias at a fixed offset from the original, right, even under KASLR? Given that they are completely independent, I think you can use a random offset here rather than the same offset we use for vmlinux. > + BUG_ON(rodata_rw_alias_start + size > TASK_SIZE_64); > + > + rare_write_mm.pgd = pgd; For correctness, you should add a call to mm_init_cpumask() here, although it doesn't matter in practice on arm64. > + init_new_context(NULL, &rare_write_mm); > + > + __create_pgd_mapping(pgd, > + pa_start, rodata_rw_alias_start, size, > + __pgprot(pgprot_val(PAGE_KERNEL) | PTE_NG), > + early_pgtable_alloc, debug_pagealloc_enabled()); This needs to be updated after the changes that are now queued in the arm64 tree for v4.12 BTW you can allow page/cont mappings in all cases, no need for the debug_pagealloc_enabled() check > +} > + > /* > * paging_init() sets up the page tables, initialises the zone memory > * maps and sets up the zero page. > @@ -603,6 +702,8 @@ void __init paging_init(void) > */ > memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, > SWAPPER_DIR_SIZE - PAGE_SIZE); > + > + rare_write_init(); > } > > /* > -- > 2.7.4 >
> On Mar 31, 2017, at 4:38 AM, Kees Cook <keescook@chromium.org> wrote: > >> On Thu, Mar 30, 2017 at 7:39 AM, Hoeun Ryu <hoeun.ryu@gmail.com> wrote: >> This patch might be a part of Kees Cook's rare_write infrastructure series >> for [1] for arm64 architecture. >> >> This implementation is based on Mark Rutland's suggestions [2], which is >> that a special userspace mm that maps only __start/end_rodata as RW permi- >> ssion is prepared during early boot time (paging_init) and __arch_rare_- >> write_begin() switches to the mm [2]. >> >> rare_write_mm address space is added for the special purpose and a page >> global directory is also prepared for it. The mm remaps __start_rodata ~ >> __end_rodata to rodata_rw_alias_start which starts from TASK_SIZE_64 / 4 >> + kaslr_offset(). >> >> It passes LKDTM's rare write test. > > Great work! I think this will need some further changes, though, since > it doesn't look to me like this would pass LKDTM's tests if it was > built as a module. (This is missing from my ARM attempt too... I > haven't figured out how to set the domain on the kernel modules...) > ah.. I understand.. so we need to additional rw mappings for modules, right ? >> >> [1] : http://www.openwall.com/lists/kernel-hardening/2017/02/27/5 >> [2] : https://lkml.org/lkml/2017/3/29/704 >> >> Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com> >> --- >> arch/arm64/Kconfig | 2 + >> arch/arm64/include/asm/pgtable.h | 4 ++ >> arch/arm64/mm/mmu.c | 101 +++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 107 insertions(+) >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index f2b0b52..6e2c592 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -102,6 +102,8 @@ config ARM64 >> select HAVE_SYSCALL_TRACEPOINTS >> select HAVE_KPROBES >> select HAVE_KRETPROBES >> + select HAVE_ARCH_RARE_WRITE >> + select HAVE_ARCH_RARE_WRITE_MEMCPY >> select IOMMU_DMA if IOMMU_SUPPORT >> select IRQ_DOMAIN >> select IRQ_FORCED_THREADING >> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h >> index c213fdbd0..1514933 100644 >> --- a/arch/arm64/include/asm/pgtable.h >> +++ b/arch/arm64/include/asm/pgtable.h >> @@ -741,6 +741,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, >> #define kc_vaddr_to_offset(v) ((v) & ~VA_START) >> #define kc_offset_to_vaddr(o) ((o) | VA_START) >> >> +unsigned long __arch_rare_write_begin(void); >> +unsigned long __arch_rare_write_end(void); >> +void __arch_rare_write_memcpy(void *dst, const void *src, __kernel_size_t len); >> + >> #endif /* !__ASSEMBLY__ */ >> >> #endif /* __ASM_PGTABLE_H */ >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >> index 91502e3..86b25c9 100644 >> --- a/arch/arm64/mm/mmu.c >> +++ b/arch/arm64/mm/mmu.c >> @@ -570,6 +570,105 @@ static void __init map_kernel(pgd_t *pgd) >> kasan_copy_shadow(pgd); >> } >> >> +struct mm_struct rare_write_mm = { >> + .mm_rb = RB_ROOT, >> + .mm_users = ATOMIC_INIT(2), >> + .mm_count = ATOMIC_INIT(1), >> + .mmap_sem = __RWSEM_INITIALIZER(rare_write_mm.mmap_sem), >> + .page_table_lock= __SPIN_LOCK_UNLOCKED(rare_write_mm.page_table_lock), >> + .mmlist = LIST_HEAD_INIT(rare_write_mm.mmlist), >> +}; >> + >> +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS >> +#include <asm/ptdump.h> >> + >> +static struct ptdump_info rare_write_ptdump_info = { >> + .mm = &rare_write_mm, >> + .markers = (struct addr_marker[]){ >> + { 0, "rare-write start" }, >> + { TASK_SIZE_64, "rare-write end" } >> + }, >> + .base_addr = 0, >> +}; >> + >> +static int __init ptdump_init(void) >> +{ >> + return ptdump_debugfs_register(&rare_write_ptdump_info, >> + "rare_write_page_tables"); >> +} >> +device_initcall(ptdump_init); >> + >> +#endif >> + >> +__always_inline unsigned long __arch_rare_write_begin(void) >> +{ >> + struct mm_struct *mm = &rare_write_mm; >> + >> + preempt_disable(); >> + >> + __switch_mm(mm); > > Can you include a BUG_ON check here that rare_write_mm is not already active? > OK, I will find the way to do so. >> + >> + if (system_uses_ttbr0_pan()) { >> + update_saved_ttbr0(current, mm); >> + cpu_switch_mm(mm->pgd, mm); >> + } >> + >> + return 0; >> +} >> + >> +__always_inline unsigned long __arch_rare_write_end(void) >> +{ >> + struct mm_struct *mm = current->active_mm; >> + >> + __switch_mm(mm); > > And same here (though the reverse test)? > >> + >> + if (system_uses_ttbr0_pan()) { >> + cpu_set_reserved_ttbr0(); >> + if (mm != &init_mm) >> + update_saved_ttbr0(current, mm); >> + } >> + >> + preempt_enable_no_resched(); >> + >> + return 0; >> +} >> + >> +static unsigned long rodata_rw_alias_start __ro_after_init = TASK_SIZE_64 / 4; >> + >> +__always_inline void __arch_rare_write_memcpy(void *dst, const void *src, >> + __kernel_size_t len) >> +{ >> + unsigned long __dst = (unsigned long)dst; >> + >> + __dst -= (unsigned long)__start_rodata; >> + __dst += rodata_rw_alias_start; >> + >> + memcpy((void *)__dst, src, len); >> +} >> + >> +void __init rare_write_init(void) >> +{ >> + phys_addr_t pgd_phys = early_pgtable_alloc(); >> + pgd_t *pgd = (pgd_t *)__phys_to_virt(pgd_phys); >> + phys_addr_t pa_start = __pa_symbol(__start_rodata); >> + unsigned long size = __end_rodata - __start_rodata; >> + >> + BUG_ON(!PAGE_ALIGNED(pa_start)); >> + BUG_ON(!PAGE_ALIGNED(size)); >> + >> + rodata_rw_alias_start += kaslr_offset(); >> + >> + BUG_ON(rodata_rw_alias_start + size > TASK_SIZE_64); >> + >> + rare_write_mm.pgd = pgd; >> + init_new_context(NULL, &rare_write_mm); >> + >> + __create_pgd_mapping(pgd, >> + pa_start, rodata_rw_alias_start, size, >> + __pgprot(pgprot_val(PAGE_KERNEL) | PTE_NG), >> + early_pgtable_alloc, debug_pagealloc_enabled()); >> +} >> + >> /* >> * paging_init() sets up the page tables, initialises the zone memory >> * maps and sets up the zero page. >> @@ -603,6 +702,8 @@ void __init paging_init(void) >> */ >> memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, >> SWAPPER_DIR_SIZE - PAGE_SIZE); >> + >> + rare_write_init(); >> } >> >> /* >> -- >> 2.7.4 >> > Thank you for the review ! > Thanks! > > -Kees > > -- > Kees Cook > Pixel Security
>> On Mar 31, 2017, at 6:25 PM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: >> >> On 30 March 2017 at 15:39, Hoeun Ryu <hoeun.ryu@gmail.com> wrote: >> This patch might be a part of Kees Cook's rare_write infrastructure series >> for [1] for arm64 architecture. >> >> This implementation is based on Mark Rutland's suggestions [2], which is >> that a special userspace mm that maps only __start/end_rodata as RW permi- >> ssion is prepared during early boot time (paging_init) and __arch_rare_- >> write_begin() switches to the mm [2]. >> >> rare_write_mm address space is added for the special purpose and a page >> global directory is also prepared for it. The mm remaps __start_rodata ~ >> __end_rodata to rodata_rw_alias_start which starts from TASK_SIZE_64 / 4 >> + kaslr_offset(). >> >> It passes LKDTM's rare write test. >> >> [1] : http://www.openwall.com/lists/kernel-hardening/2017/02/27/5 >> [2] : https://lkml.org/lkml/2017/3/29/704 >> >> Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com> >> --- >> arch/arm64/Kconfig | 2 + >> arch/arm64/include/asm/pgtable.h | 4 ++ >> arch/arm64/mm/mmu.c | 101 +++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 107 insertions(+) >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index f2b0b52..6e2c592 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -102,6 +102,8 @@ config ARM64 >> select HAVE_SYSCALL_TRACEPOINTS >> select HAVE_KPROBES >> select HAVE_KRETPROBES >> + select HAVE_ARCH_RARE_WRITE >> + select HAVE_ARCH_RARE_WRITE_MEMCPY >> select IOMMU_DMA if IOMMU_SUPPORT >> select IRQ_DOMAIN >> select IRQ_FORCED_THREADING >> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h >> index c213fdbd0..1514933 100644 >> --- a/arch/arm64/include/asm/pgtable.h >> +++ b/arch/arm64/include/asm/pgtable.h >> @@ -741,6 +741,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, >> #define kc_vaddr_to_offset(v) ((v) & ~VA_START) >> #define kc_offset_to_vaddr(o) ((o) | VA_START) >> >> +unsigned long __arch_rare_write_begin(void); >> +unsigned long __arch_rare_write_end(void); >> +void __arch_rare_write_memcpy(void *dst, const void *src, __kernel_size_t len); >> + > > If these hook into a generic framework, shouldn't these already be > declared in a generic header file? the generic header file doesn't declare arch-specific version of api. >> #endif /* !__ASSEMBLY__ */ >> >> #endif /* __ASM_PGTABLE_H */ >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >> index 91502e3..86b25c9 100644 >> --- a/arch/arm64/mm/mmu.c >> +++ b/arch/arm64/mm/mmu.c >> @@ -570,6 +570,105 @@ static void __init map_kernel(pgd_t *pgd) >> kasan_copy_shadow(pgd); >> } >> >> +struct mm_struct rare_write_mm = { > > static please OK, add static. > >> + .mm_rb = RB_ROOT, >> + .mm_users = ATOMIC_INIT(2), >> + .mm_count = ATOMIC_INIT(1), >> + .mmap_sem = __RWSEM_INITIALIZER(rare_write_mm.mmap_sem), >> + .page_table_lock= __SPIN_LOCK_UNLOCKED(rare_write_mm.page_table_lock), >> + .mmlist = LIST_HEAD_INIT(rare_write_mm.mmlist), >> +}; >> + >> +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS >> +#include <asm/ptdump.h> >> + >> +static struct ptdump_info rare_write_ptdump_info = { >> + .mm = &rare_write_mm, >> + .markers = (struct addr_marker[]){ >> + { 0, "rare-write start" }, >> + { TASK_SIZE_64, "rare-write end" } >> + }, >> + .base_addr = 0, >> +}; >> + >> +static int __init ptdump_init(void) >> +{ >> + return ptdump_debugfs_register(&rare_write_ptdump_info, >> + "rare_write_page_tables"); >> +} >> +device_initcall(ptdump_init); >> + >> +#endif >> + >> +__always_inline unsigned long __arch_rare_write_begin(void) > > These functions are not called from the same compilation unit, so what > do you think __always_inline buys you here? The first patch of Kee's series ([PATCH 01/11] Introduce rare_write() infrastructure) [1] says some requirements and one of these is the arch-specific helpers should be inline to avoid making them ROP targets. So I'd make sure the helpers are inline. [1] : http://www.openwall.com/lists/kernel-hardening/2017/03/29/16 > >> +{ >> + struct mm_struct *mm = &rare_write_mm; >> + >> + preempt_disable(); >> + >> + __switch_mm(mm); >> + >> + if (system_uses_ttbr0_pan()) { >> + update_saved_ttbr0(current, mm); >> + cpu_switch_mm(mm->pgd, mm); >> + } >> + >> + return 0; >> +} >> + >> +__always_inline unsigned long __arch_rare_write_end(void) >> +{ >> + struct mm_struct *mm = current->active_mm; >> + >> + __switch_mm(mm); >> + >> + if (system_uses_ttbr0_pan()) { >> + cpu_set_reserved_ttbr0(); >> + if (mm != &init_mm) >> + update_saved_ttbr0(current, mm); >> + } >> + >> + preempt_enable_no_resched(); >> + >> + return 0; >> +} >> + >> +static unsigned long rodata_rw_alias_start __ro_after_init = TASK_SIZE_64 / 4; >> + >> +__always_inline void __arch_rare_write_memcpy(void *dst, const void *src, >> + __kernel_size_t len) >> +{ >> + unsigned long __dst = (unsigned long)dst; >> + >> + __dst -= (unsigned long)__start_rodata; >> + __dst += rodata_rw_alias_start; >> + >> + memcpy((void *)__dst, src, len); >> +} >> + >> +void __init rare_write_init(void) > > static please OK, add static. > >> +{ >> + phys_addr_t pgd_phys = early_pgtable_alloc(); >> + pgd_t *pgd = (pgd_t *)__phys_to_virt(pgd_phys); >> + phys_addr_t pa_start = __pa_symbol(__start_rodata); >> + unsigned long size = __end_rodata - __start_rodata; >> + >> + BUG_ON(!PAGE_ALIGNED(pa_start)); >> + BUG_ON(!PAGE_ALIGNED(size)); >> + >> + rodata_rw_alias_start += kaslr_offset(); >> + > > This places the rodata alias at a fixed offset from the original, > right, even under KASLR? Given that they are completely independent, I > think you can use a random offset here rather than the same offset we > use for vmlinux. What I did here is just to randomize the base of rw alias mapping when kaslr is enabled. You're right it's completely independent though from kaslr, I would like to add randomization for rw alias base address by using simple solution in early boot stage. I' ll think again to achieve randomization or just remove it. > > >> + BUG_ON(rodata_rw_alias_start + size > TASK_SIZE_64); >> + >> + rare_write_mm.pgd = pgd; > > For correctness, you should add a call to mm_init_cpumask() here, > although it doesn't matter in practice on arm64. OK, I'll fix this. > >> + init_new_context(NULL, &rare_write_mm); >> + >> + __create_pgd_mapping(pgd, >> + pa_start, rodata_rw_alias_start, size, >> + __pgprot(pgprot_val(PAGE_KERNEL) | PTE_NG), >> + early_pgtable_alloc, debug_pagealloc_enabled()); > > This needs to be updated after the changes that are now queued in the > arm64 tree for v4.12 > > BTW you can allow page/cont mappings in all cases, no need for the > debug_pagealloc_enabled() check I will rebase this onto the version you mentioned later. >> +} >> + >> /* >> * paging_init() sets up the page tables, initialises the zone memory >> * maps and sets up the zero page. >> @@ -603,6 +702,8 @@ void __init paging_init(void) >> */ >> memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, >> SWAPPER_DIR_SIZE - PAGE_SIZE); >> + >> + rare_write_init(); >> } >> >> /* >> -- >> 2.7.4 Thank you very much for the review
> On 31 Mar 2017, at 6:25 PM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > > On 30 March 2017 at 15:39, Hoeun Ryu <hoeun.ryu@gmail.com> wrote: >> This patch might be a part of Kees Cook's rare_write infrastructure series >> for [1] for arm64 architecture. >> >> This implementation is based on Mark Rutland's suggestions [2], which is >> that a special userspace mm that maps only __start/end_rodata as RW permi- >> ssion is prepared during early boot time (paging_init) and __arch_rare_- >> write_begin() switches to the mm [2]. >> >> rare_write_mm address space is added for the special purpose and a page >> global directory is also prepared for it. The mm remaps __start_rodata ~ >> __end_rodata to rodata_rw_alias_start which starts from TASK_SIZE_64 / 4 >> + kaslr_offset(). >> >> It passes LKDTM's rare write test. >> >> [1] : http://www.openwall.com/lists/kernel-hardening/2017/02/27/5 >> [2] : https://lkml.org/lkml/2017/3/29/704 >> >> Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com> >> --- >> arch/arm64/Kconfig | 2 + >> arch/arm64/include/asm/pgtable.h | 4 ++ >> arch/arm64/mm/mmu.c | 101 +++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 107 insertions(+) >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index f2b0b52..6e2c592 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -102,6 +102,8 @@ config ARM64 >> select HAVE_SYSCALL_TRACEPOINTS >> select HAVE_KPROBES >> select HAVE_KRETPROBES >> + select HAVE_ARCH_RARE_WRITE >> + select HAVE_ARCH_RARE_WRITE_MEMCPY >> select IOMMU_DMA if IOMMU_SUPPORT >> select IRQ_DOMAIN >> select IRQ_FORCED_THREADING >> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h >> index c213fdbd0..1514933 100644 >> --- a/arch/arm64/include/asm/pgtable.h >> +++ b/arch/arm64/include/asm/pgtable.h >> @@ -741,6 +741,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, >> #define kc_vaddr_to_offset(v) ((v) & ~VA_START) >> #define kc_offset_to_vaddr(o) ((o) | VA_START) >> >> +unsigned long __arch_rare_write_begin(void); >> +unsigned long __arch_rare_write_end(void); >> +void __arch_rare_write_memcpy(void *dst, const void *src, __kernel_size_t len); >> + > > If these hook into a generic framework, shouldn't these already be > declared in a generic header file? the generic header file doesn't declare arch-specific version of api. > >> #endif /* !__ASSEMBLY__ */ >> >> #endif /* __ASM_PGTABLE_H */ >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >> index 91502e3..86b25c9 100644 >> --- a/arch/arm64/mm/mmu.c >> +++ b/arch/arm64/mm/mmu.c >> @@ -570,6 +570,105 @@ static void __init map_kernel(pgd_t *pgd) >> kasan_copy_shadow(pgd); >> } >> >> +struct mm_struct rare_write_mm = { > > static please OK, add static. > >> + .mm_rb = RB_ROOT, >> + .mm_users = ATOMIC_INIT(2), >> + .mm_count = ATOMIC_INIT(1), >> + .mmap_sem = __RWSEM_INITIALIZER(rare_write_mm.mmap_sem), >> + .page_table_lock= __SPIN_LOCK_UNLOCKED(rare_write_mm.page_table_lock), >> + .mmlist = LIST_HEAD_INIT(rare_write_mm.mmlist), >> +}; >> + >> +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS >> +#include <asm/ptdump.h> >> + >> +static struct ptdump_info rare_write_ptdump_info = { >> + .mm = &rare_write_mm, >> + .markers = (struct addr_marker[]){ >> + { 0, "rare-write start" }, >> + { TASK_SIZE_64, "rare-write end" } >> + }, >> + .base_addr = 0, >> +}; >> + >> +static int __init ptdump_init(void) >> +{ >> + return ptdump_debugfs_register(&rare_write_ptdump_info, >> + "rare_write_page_tables"); >> +} >> +device_initcall(ptdump_init); >> + >> +#endif >> + >> +__always_inline unsigned long __arch_rare_write_begin(void) > > These functions are not called from the same compilation unit, so what > do you think __always_inline buys you here? The first patch of Kee's series ([PATCH 01/11] Introduce rare_write() infrastructure) [1] says some requirements and one of these is the arch-specific helpers should be inline to avoid making them ROP targets. So I'd make sure the helpers are inline. [1] : http://www.openwall.com/lists/kernel-hardening/2017/03/29/16 > >> +{ >> + struct mm_struct *mm = &rare_write_mm; >> + >> + preempt_disable(); >> + >> + __switch_mm(mm); >> + >> + if (system_uses_ttbr0_pan()) { >> + update_saved_ttbr0(current, mm); >> + cpu_switch_mm(mm->pgd, mm); >> + } >> + >> + return 0; >> +} >> + >> +__always_inline unsigned long __arch_rare_write_end(void) >> +{ >> + struct mm_struct *mm = current->active_mm; >> + >> + __switch_mm(mm); >> + >> + if (system_uses_ttbr0_pan()) { >> + cpu_set_reserved_ttbr0(); >> + if (mm != &init_mm) >> + update_saved_ttbr0(current, mm); >> + } >> + >> + preempt_enable_no_resched(); >> + >> + return 0; >> +} >> + >> +static unsigned long rodata_rw_alias_start __ro_after_init = TASK_SIZE_64 / 4; >> + >> +__always_inline void __arch_rare_write_memcpy(void *dst, const void *src, >> + __kernel_size_t len) >> +{ >> + unsigned long __dst = (unsigned long)dst; >> + >> + __dst -= (unsigned long)__start_rodata; >> + __dst += rodata_rw_alias_start; >> + >> + memcpy((void *)__dst, src, len); >> +} >> + >> +void __init rare_write_init(void) > > static please OK, add static. > >> +{ >> + phys_addr_t pgd_phys = early_pgtable_alloc(); >> + pgd_t *pgd = (pgd_t *)__phys_to_virt(pgd_phys); >> + phys_addr_t pa_start = __pa_symbol(__start_rodata); >> + unsigned long size = __end_rodata - __start_rodata; >> + >> + BUG_ON(!PAGE_ALIGNED(pa_start)); >> + BUG_ON(!PAGE_ALIGNED(size)); >> + >> + rodata_rw_alias_start += kaslr_offset(); >> + > > This places the rodata alias at a fixed offset from the original, > right, even under KASLR? Given that they are completely independent, I > think you can use a random offset here rather than the same offset we > use for vmlinux. What I did here is just to randomize the base of rw alias mapping when kaslr is enabled. You're right it's completely independent from kaslr though, I would like to add randomization for rw alias base address by using simple solution in early boot stage. I' ll think again to achieve randomization or just remove it. > > >> + BUG_ON(rodata_rw_alias_start + size > TASK_SIZE_64); >> + >> + rare_write_mm.pgd = pgd; > > For correctness, you should add a call to mm_init_cpumask() here, > although it doesn't matter in practice on arm64. OK, I'll fix this. > >> + init_new_context(NULL, &rare_write_mm); >> + >> + __create_pgd_mapping(pgd, >> + pa_start, rodata_rw_alias_start, size, >> + __pgprot(pgprot_val(PAGE_KERNEL) | PTE_NG), >> + early_pgtable_alloc, debug_pagealloc_enabled()); > > This needs to be updated after the changes that are now queued in the > arm64 tree for v4.12 > > BTW you can allow page/cont mappings in all cases, no need for the > debug_pagealloc_enabled() check I will rebase this onto the version you mentioned later. > >> +} >> + >> /* >> * paging_init() sets up the page tables, initialises the zone memory >> * maps and sets up the zero page. >> @@ -603,6 +702,8 @@ void __init paging_init(void) >> */ >> memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, >> SWAPPER_DIR_SIZE - PAGE_SIZE); >> + >> + rare_write_init(); >> } >> >> /* >> -- >> 2.7.4 >> Thank you very much for the review
On 3 April 2017 at 05:17, Ho-Eun Ryu <hoeun.ryu@gmail.com> wrote: > >> On 31 Mar 2017, at 6:25 PM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: >> >> On 30 March 2017 at 15:39, Hoeun Ryu <hoeun.ryu@gmail.com> wrote: >>> This patch might be a part of Kees Cook's rare_write infrastructure series >>> for [1] for arm64 architecture. >>> >>> This implementation is based on Mark Rutland's suggestions [2], which is >>> that a special userspace mm that maps only __start/end_rodata as RW permi- >>> ssion is prepared during early boot time (paging_init) and __arch_rare_- >>> write_begin() switches to the mm [2]. >>> >>> rare_write_mm address space is added for the special purpose and a page >>> global directory is also prepared for it. The mm remaps __start_rodata ~ >>> __end_rodata to rodata_rw_alias_start which starts from TASK_SIZE_64 / 4 >>> + kaslr_offset(). >>> >>> It passes LKDTM's rare write test. >>> >>> [1] : http://www.openwall.com/lists/kernel-hardening/2017/02/27/5 >>> [2] : https://lkml.org/lkml/2017/3/29/704 >>> >>> Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com> >>> --- >>> arch/arm64/Kconfig | 2 + >>> arch/arm64/include/asm/pgtable.h | 4 ++ >>> arch/arm64/mm/mmu.c | 101 +++++++++++++++++++++++++++++++++++++++ >>> 3 files changed, 107 insertions(+) >>> >>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >>> index f2b0b52..6e2c592 100644 >>> --- a/arch/arm64/Kconfig >>> +++ b/arch/arm64/Kconfig >>> @@ -102,6 +102,8 @@ config ARM64 >>> select HAVE_SYSCALL_TRACEPOINTS >>> select HAVE_KPROBES >>> select HAVE_KRETPROBES >>> + select HAVE_ARCH_RARE_WRITE >>> + select HAVE_ARCH_RARE_WRITE_MEMCPY >>> select IOMMU_DMA if IOMMU_SUPPORT >>> select IRQ_DOMAIN >>> select IRQ_FORCED_THREADING >>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h >>> index c213fdbd0..1514933 100644 >>> --- a/arch/arm64/include/asm/pgtable.h >>> +++ b/arch/arm64/include/asm/pgtable.h >>> @@ -741,6 +741,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, >>> #define kc_vaddr_to_offset(v) ((v) & ~VA_START) >>> #define kc_offset_to_vaddr(o) ((o) | VA_START) >>> >>> +unsigned long __arch_rare_write_begin(void); >>> +unsigned long __arch_rare_write_end(void); >>> +void __arch_rare_write_memcpy(void *dst, const void *src, __kernel_size_t len); >>> + >> >> If these hook into a generic framework, shouldn't these already be >> declared in a generic header file? > > the generic header file doesn't declare arch-specific version of api. > >> >>> #endif /* !__ASSEMBLY__ */ >>> >>> #endif /* __ASM_PGTABLE_H */ >>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >>> index 91502e3..86b25c9 100644 >>> --- a/arch/arm64/mm/mmu.c >>> +++ b/arch/arm64/mm/mmu.c >>> @@ -570,6 +570,105 @@ static void __init map_kernel(pgd_t *pgd) >>> kasan_copy_shadow(pgd); >>> } >>> >>> +struct mm_struct rare_write_mm = { >> >> static please > > OK, add static. > >> >>> + .mm_rb = RB_ROOT, >>> + .mm_users = ATOMIC_INIT(2), >>> + .mm_count = ATOMIC_INIT(1), >>> + .mmap_sem = __RWSEM_INITIALIZER(rare_write_mm.mmap_sem), >>> + .page_table_lock= __SPIN_LOCK_UNLOCKED(rare_write_mm.page_table_lock), >>> + .mmlist = LIST_HEAD_INIT(rare_write_mm.mmlist), >>> +}; >>> + >>> +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS >>> +#include <asm/ptdump.h> >>> + >>> +static struct ptdump_info rare_write_ptdump_info = { >>> + .mm = &rare_write_mm, >>> + .markers = (struct addr_marker[]){ >>> + { 0, "rare-write start" }, >>> + { TASK_SIZE_64, "rare-write end" } >>> + }, >>> + .base_addr = 0, >>> +}; >>> + >>> +static int __init ptdump_init(void) >>> +{ >>> + return ptdump_debugfs_register(&rare_write_ptdump_info, >>> + "rare_write_page_tables"); >>> +} >>> +device_initcall(ptdump_init); >>> + >>> +#endif >>> + >>> +__always_inline unsigned long __arch_rare_write_begin(void) >> >> These functions are not called from the same compilation unit, so what >> do you think __always_inline buys you here? > > The first patch of Kee's series ([PATCH 01/11] Introduce rare_write() infrastructure) [1] says some requirements and one of these is the arch-specific helpers should be inline to avoid making them ROP targets. > > So I'd make sure the helpers are inline. > > [1] : http://www.openwall.com/lists/kernel-hardening/2017/03/29/16 > OK, that explains a number of issues with this patch. Please understand that marking a function inline has *no* effect whatsoever if the C definition is not visible to the caller. This is why the C declarations in the header file are not made generic: these functions are supposed to be *defined* as static inline in header files. So please move these implementations into the header files, and mark them static inline. This means the rare_write_mm structure should *not* be made static either. >> >>> +{ >>> + struct mm_struct *mm = &rare_write_mm; >>> + >>> + preempt_disable(); >>> + >>> + __switch_mm(mm); >>> + >>> + if (system_uses_ttbr0_pan()) { >>> + update_saved_ttbr0(current, mm); >>> + cpu_switch_mm(mm->pgd, mm); >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +__always_inline unsigned long __arch_rare_write_end(void) >>> +{ >>> + struct mm_struct *mm = current->active_mm; >>> + >>> + __switch_mm(mm); >>> + >>> + if (system_uses_ttbr0_pan()) { >>> + cpu_set_reserved_ttbr0(); >>> + if (mm != &init_mm) >>> + update_saved_ttbr0(current, mm); >>> + } >>> + >>> + preempt_enable_no_resched(); >>> + >>> + return 0; >>> +} >>> + >>> +static unsigned long rodata_rw_alias_start __ro_after_init = TASK_SIZE_64 / 4; >>> + >>> +__always_inline void __arch_rare_write_memcpy(void *dst, const void *src, >>> + __kernel_size_t len) >>> +{ >>> + unsigned long __dst = (unsigned long)dst; >>> + >>> + __dst -= (unsigned long)__start_rodata; >>> + __dst += rodata_rw_alias_start; >>> + >>> + memcpy((void *)__dst, src, len); >>> +} >>> + >>> +void __init rare_write_init(void) >> >> static please > > OK, add static. > >> >>> +{ >>> + phys_addr_t pgd_phys = early_pgtable_alloc(); >>> + pgd_t *pgd = (pgd_t *)__phys_to_virt(pgd_phys); >>> + phys_addr_t pa_start = __pa_symbol(__start_rodata); >>> + unsigned long size = __end_rodata - __start_rodata; >>> + >>> + BUG_ON(!PAGE_ALIGNED(pa_start)); >>> + BUG_ON(!PAGE_ALIGNED(size)); >>> + >>> + rodata_rw_alias_start += kaslr_offset(); >>> + >> >> This places the rodata alias at a fixed offset from the original, >> right, even under KASLR? Given that they are completely independent, I >> think you can use a random offset here rather than the same offset we >> use for vmlinux. > > What I did here is just to randomize the base of rw alias mapping when kaslr is enabled. > You're right it's completely independent from kaslr though, I would like to add randomization for rw alias base address by using simple solution in early boot stage. > > I' ll think again to achieve randomization or just remove it. > >> >> >>> + BUG_ON(rodata_rw_alias_start + size > TASK_SIZE_64); >>> + >>> + rare_write_mm.pgd = pgd; >> >> For correctness, you should add a call to mm_init_cpumask() here, >> although it doesn't matter in practice on arm64. > > OK, I'll fix this. > >> >>> + init_new_context(NULL, &rare_write_mm); >>> + >>> + __create_pgd_mapping(pgd, >>> + pa_start, rodata_rw_alias_start, size, >>> + __pgprot(pgprot_val(PAGE_KERNEL) | PTE_NG), >>> + early_pgtable_alloc, debug_pagealloc_enabled()); >> >> This needs to be updated after the changes that are now queued in the >> arm64 tree for v4.12 >> >> BTW you can allow page/cont mappings in all cases, no need for the >> debug_pagealloc_enabled() check > > I will rebase this onto the version you mentioned later. > >> >>> +} >>> + >>> /* >>> * paging_init() sets up the page tables, initialises the zone memory >>> * maps and sets up the zero page. >>> @@ -603,6 +702,8 @@ void __init paging_init(void) >>> */ >>> memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, >>> SWAPPER_DIR_SIZE - PAGE_SIZE); >>> + >>> + rare_write_init(); >>> } >>> >>> /* >>> -- >>> 2.7.4 >>> > > Thank you very much for the review >
> On 3 Apr 2017, at 4:11 PM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > > On 3 April 2017 at 05:17, Ho-Eun Ryu <hoeun.ryu@gmail.com> wrote: >> >>> On 31 Mar 2017, at 6:25 PM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: >>> >>> On 30 March 2017 at 15:39, Hoeun Ryu <hoeun.ryu@gmail.com> wrote: >>>> This patch might be a part of Kees Cook's rare_write infrastructure series >>>> for [1] for arm64 architecture. >>>> >>>> This implementation is based on Mark Rutland's suggestions [2], which is >>>> that a special userspace mm that maps only __start/end_rodata as RW permi- >>>> ssion is prepared during early boot time (paging_init) and __arch_rare_- >>>> write_begin() switches to the mm [2]. >>>> >>>> rare_write_mm address space is added for the special purpose and a page >>>> global directory is also prepared for it. The mm remaps __start_rodata ~ >>>> __end_rodata to rodata_rw_alias_start which starts from TASK_SIZE_64 / 4 >>>> + kaslr_offset(). >>>> >>>> It passes LKDTM's rare write test. >>>> >>>> [1] : http://www.openwall.com/lists/kernel-hardening/2017/02/27/5 >>>> [2] : https://lkml.org/lkml/2017/3/29/704 >>>> >>>> Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com> >>>> --- >>>> arch/arm64/Kconfig | 2 + >>>> arch/arm64/include/asm/pgtable.h | 4 ++ >>>> arch/arm64/mm/mmu.c | 101 +++++++++++++++++++++++++++++++++++++++ >>>> 3 files changed, 107 insertions(+) >>>> >>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >>>> index f2b0b52..6e2c592 100644 >>>> --- a/arch/arm64/Kconfig >>>> +++ b/arch/arm64/Kconfig >>>> @@ -102,6 +102,8 @@ config ARM64 >>>> select HAVE_SYSCALL_TRACEPOINTS >>>> select HAVE_KPROBES >>>> select HAVE_KRETPROBES >>>> + select HAVE_ARCH_RARE_WRITE >>>> + select HAVE_ARCH_RARE_WRITE_MEMCPY >>>> select IOMMU_DMA if IOMMU_SUPPORT >>>> select IRQ_DOMAIN >>>> select IRQ_FORCED_THREADING >>>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h >>>> index c213fdbd0..1514933 100644 >>>> --- a/arch/arm64/include/asm/pgtable.h >>>> +++ b/arch/arm64/include/asm/pgtable.h >>>> @@ -741,6 +741,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, >>>> #define kc_vaddr_to_offset(v) ((v) & ~VA_START) >>>> #define kc_offset_to_vaddr(o) ((o) | VA_START) >>>> >>>> +unsigned long __arch_rare_write_begin(void); >>>> +unsigned long __arch_rare_write_end(void); >>>> +void __arch_rare_write_memcpy(void *dst, const void *src, __kernel_size_t len); >>>> + >>> >>> If these hook into a generic framework, shouldn't these already be >>> declared in a generic header file? >> >> the generic header file doesn't declare arch-specific version of api. >> >>> >>>> #endif /* !__ASSEMBLY__ */ >>>> >>>> #endif /* __ASM_PGTABLE_H */ >>>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >>>> index 91502e3..86b25c9 100644 >>>> --- a/arch/arm64/mm/mmu.c >>>> +++ b/arch/arm64/mm/mmu.c >>>> @@ -570,6 +570,105 @@ static void __init map_kernel(pgd_t *pgd) >>>> kasan_copy_shadow(pgd); >>>> } >>>> >>>> +struct mm_struct rare_write_mm = { >>> >>> static please >> >> OK, add static. >> >>> >>>> + .mm_rb = RB_ROOT, >>>> + .mm_users = ATOMIC_INIT(2), >>>> + .mm_count = ATOMIC_INIT(1), >>>> + .mmap_sem = __RWSEM_INITIALIZER(rare_write_mm.mmap_sem), >>>> + .page_table_lock= __SPIN_LOCK_UNLOCKED(rare_write_mm.page_table_lock), >>>> + .mmlist = LIST_HEAD_INIT(rare_write_mm.mmlist), >>>> +}; >>>> + >>>> +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS >>>> +#include <asm/ptdump.h> >>>> + >>>> +static struct ptdump_info rare_write_ptdump_info = { >>>> + .mm = &rare_write_mm, >>>> + .markers = (struct addr_marker[]){ >>>> + { 0, "rare-write start" }, >>>> + { TASK_SIZE_64, "rare-write end" } >>>> + }, >>>> + .base_addr = 0, >>>> +}; >>>> + >>>> +static int __init ptdump_init(void) >>>> +{ >>>> + return ptdump_debugfs_register(&rare_write_ptdump_info, >>>> + "rare_write_page_tables"); >>>> +} >>>> +device_initcall(ptdump_init); >>>> + >>>> +#endif >>>> + >>>> +__always_inline unsigned long __arch_rare_write_begin(void) >>> >>> These functions are not called from the same compilation unit, so what >>> do you think __always_inline buys you here? >> >> The first patch of Kee's series ([PATCH 01/11] Introduce rare_write() infrastructure) [1] says some requirements and one of these is the arch-specific helpers should be inline to avoid making them ROP targets. >> >> So I'd make sure the helpers are inline. >> >> [1] : http://www.openwall.com/lists/kernel-hardening/2017/03/29/16 >> > > OK, that explains a number of issues with this patch. > > Please understand that marking a function inline has *no* effect > whatsoever if the C definition is not visible to the caller. This is > why the C declarations in the header file are not made generic: these > functions are supposed to be *defined* as static inline in header > files. Thank you for the guidance. > > So please move these implementations into the header files, and mark > them static inline. This means the rare_write_mm structure should > *not* be made static either. > OK, I’ll fix this in the next version. > >>> >>>> +{ >>>> + struct mm_struct *mm = &rare_write_mm; >>>> + >>>> + preempt_disable(); >>>> + >>>> + __switch_mm(mm); >>>> + >>>> + if (system_uses_ttbr0_pan()) { >>>> + update_saved_ttbr0(current, mm); >>>> + cpu_switch_mm(mm->pgd, mm); >>>> + } >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +__always_inline unsigned long __arch_rare_write_end(void) >>>> +{ >>>> + struct mm_struct *mm = current->active_mm; >>>> + >>>> + __switch_mm(mm); >>>> + >>>> + if (system_uses_ttbr0_pan()) { >>>> + cpu_set_reserved_ttbr0(); >>>> + if (mm != &init_mm) >>>> + update_saved_ttbr0(current, mm); >>>> + } >>>> + >>>> + preempt_enable_no_resched(); >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +static unsigned long rodata_rw_alias_start __ro_after_init = TASK_SIZE_64 / 4; >>>> + >>>> +__always_inline void __arch_rare_write_memcpy(void *dst, const void *src, >>>> + __kernel_size_t len) >>>> +{ >>>> + unsigned long __dst = (unsigned long)dst; >>>> + >>>> + __dst -= (unsigned long)__start_rodata; >>>> + __dst += rodata_rw_alias_start; >>>> + >>>> + memcpy((void *)__dst, src, len); >>>> +} >>>> + >>>> +void __init rare_write_init(void) >>> >>> static please >> >> OK, add static. >> >>> >>>> +{ >>>> + phys_addr_t pgd_phys = early_pgtable_alloc(); >>>> + pgd_t *pgd = (pgd_t *)__phys_to_virt(pgd_phys); >>>> + phys_addr_t pa_start = __pa_symbol(__start_rodata); >>>> + unsigned long size = __end_rodata - __start_rodata; >>>> + >>>> + BUG_ON(!PAGE_ALIGNED(pa_start)); >>>> + BUG_ON(!PAGE_ALIGNED(size)); >>>> + >>>> + rodata_rw_alias_start += kaslr_offset(); >>>> + >>> >>> This places the rodata alias at a fixed offset from the original, >>> right, even under KASLR? Given that they are completely independent, I >>> think you can use a random offset here rather than the same offset we >>> use for vmlinux. >> >> What I did here is just to randomize the base of rw alias mapping when kaslr is enabled. >> You're right it's completely independent from kaslr though, I would like to add randomization for rw alias base address by using simple solution in early boot stage. >> >> I' ll think again to achieve randomization or just remove it. >> >>> >>> >>>> + BUG_ON(rodata_rw_alias_start + size > TASK_SIZE_64); >>>> + >>>> + rare_write_mm.pgd = pgd; >>> >>> For correctness, you should add a call to mm_init_cpumask() here, >>> although it doesn't matter in practice on arm64. >> >> OK, I'll fix this. >> >>> >>>> + init_new_context(NULL, &rare_write_mm); >>>> + >>>> + __create_pgd_mapping(pgd, >>>> + pa_start, rodata_rw_alias_start, size, >>>> + __pgprot(pgprot_val(PAGE_KERNEL) | PTE_NG), >>>> + early_pgtable_alloc, debug_pagealloc_enabled()); >>> >>> This needs to be updated after the changes that are now queued in the >>> arm64 tree for v4.12 >>> >>> BTW you can allow page/cont mappings in all cases, no need for the >>> debug_pagealloc_enabled() check >> >> I will rebase this onto the version you mentioned later. >> >>> >>>> +} >>>> + >>>> /* >>>> * paging_init() sets up the page tables, initialises the zone memory >>>> * maps and sets up the zero page. >>>> @@ -603,6 +702,8 @@ void __init paging_init(void) >>>> */ >>>> memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, >>>> SWAPPER_DIR_SIZE - PAGE_SIZE); >>>> + >>>> + rare_write_init(); >>>> } >>>> >>>> /* >>>> -- >>>> 2.7.4 >>>> >> >> Thank you very much for the review
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f2b0b52..6e2c592 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -102,6 +102,8 @@ config ARM64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_ARCH_RARE_WRITE + select HAVE_ARCH_RARE_WRITE_MEMCPY select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c213fdbd0..1514933 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -741,6 +741,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define kc_vaddr_to_offset(v) ((v) & ~VA_START) #define kc_offset_to_vaddr(o) ((o) | VA_START) +unsigned long __arch_rare_write_begin(void); +unsigned long __arch_rare_write_end(void); +void __arch_rare_write_memcpy(void *dst, const void *src, __kernel_size_t len); + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 91502e3..86b25c9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -570,6 +570,105 @@ static void __init map_kernel(pgd_t *pgd) kasan_copy_shadow(pgd); } +struct mm_struct rare_write_mm = { + .mm_rb = RB_ROOT, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(rare_write_mm.mmap_sem), + .page_table_lock= __SPIN_LOCK_UNLOCKED(rare_write_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(rare_write_mm.mmlist), +}; + +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS +#include <asm/ptdump.h> + +static struct ptdump_info rare_write_ptdump_info = { + .mm = &rare_write_mm, + .markers = (struct addr_marker[]){ + { 0, "rare-write start" }, + { TASK_SIZE_64, "rare-write end" } + }, + .base_addr = 0, +}; + +static int __init ptdump_init(void) +{ + return ptdump_debugfs_register(&rare_write_ptdump_info, + "rare_write_page_tables"); +} +device_initcall(ptdump_init); + +#endif + +__always_inline unsigned long __arch_rare_write_begin(void) +{ + struct mm_struct *mm = &rare_write_mm; + + preempt_disable(); + + __switch_mm(mm); + + if (system_uses_ttbr0_pan()) { + update_saved_ttbr0(current, mm); + cpu_switch_mm(mm->pgd, mm); + } + + return 0; +} + +__always_inline unsigned long __arch_rare_write_end(void) +{ + struct mm_struct *mm = current->active_mm; + + __switch_mm(mm); + + if (system_uses_ttbr0_pan()) { + cpu_set_reserved_ttbr0(); + if (mm != &init_mm) + update_saved_ttbr0(current, mm); + } + + preempt_enable_no_resched(); + + return 0; +} + +static unsigned long rodata_rw_alias_start __ro_after_init = TASK_SIZE_64 / 4; + +__always_inline void __arch_rare_write_memcpy(void *dst, const void *src, + __kernel_size_t len) +{ + unsigned long __dst = (unsigned long)dst; + + __dst -= (unsigned long)__start_rodata; + __dst += rodata_rw_alias_start; + + memcpy((void *)__dst, src, len); +} + +void __init rare_write_init(void) +{ + phys_addr_t pgd_phys = early_pgtable_alloc(); + pgd_t *pgd = (pgd_t *)__phys_to_virt(pgd_phys); + phys_addr_t pa_start = __pa_symbol(__start_rodata); + unsigned long size = __end_rodata - __start_rodata; + + BUG_ON(!PAGE_ALIGNED(pa_start)); + BUG_ON(!PAGE_ALIGNED(size)); + + rodata_rw_alias_start += kaslr_offset(); + + BUG_ON(rodata_rw_alias_start + size > TASK_SIZE_64); + + rare_write_mm.pgd = pgd; + init_new_context(NULL, &rare_write_mm); + + __create_pgd_mapping(pgd, + pa_start, rodata_rw_alias_start, size, + __pgprot(pgprot_val(PAGE_KERNEL) | PTE_NG), + early_pgtable_alloc, debug_pagealloc_enabled()); +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. @@ -603,6 +702,8 @@ void __init paging_init(void) */ memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, SWAPPER_DIR_SIZE - PAGE_SIZE); + + rare_write_init(); } /*
This patch might be a part of Kees Cook's rare_write infrastructure series for [1] for arm64 architecture. This implementation is based on Mark Rutland's suggestions [2], which is that a special userspace mm that maps only __start/end_rodata as RW permi- ssion is prepared during early boot time (paging_init) and __arch_rare_- write_begin() switches to the mm [2]. rare_write_mm address space is added for the special purpose and a page global directory is also prepared for it. The mm remaps __start_rodata ~ __end_rodata to rodata_rw_alias_start which starts from TASK_SIZE_64 / 4 + kaslr_offset(). It passes LKDTM's rare write test. [1] : http://www.openwall.com/lists/kernel-hardening/2017/02/27/5 [2] : https://lkml.org/lkml/2017/3/29/704 Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com> --- arch/arm64/Kconfig | 2 + arch/arm64/include/asm/pgtable.h | 4 ++ arch/arm64/mm/mmu.c | 101 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+)