Message ID | 20191126224446.15145-2-consult-mg@gstardust.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | riscv: Align shared mappings to avoid cache aliasing | expand |
On Tue, 26 Nov 2019 14:44:45 PST (-0800), consult-mg@gstardust.com wrote: > Align shared mappings according to SHMLBA for VIPT cache performance. > > arch_get_unmapped_area() and arch_get_unmapped_area_topdown() are > essentially copies of their default implementations in mm/mmap.c, > modified to align the address to SHMLBA for shared mappings, i.e. > where MAP_SHARED is specified or a file pointer is provided. > > Allow MAP_FIXED to request unaligned shared mappings. Although this > may potentially reduce performance, very little software does this, as > it is not portable across architectures that enforce alignment. > > Signed-off-by: Marc Gauthier <consult-mg@gstardust.com> > --- > arch/riscv/include/asm/pgtable.h | 4 ++ > arch/riscv/kernel/sys_riscv.c | 113 +++++++++++++++++++++++++++++++ > 2 files changed, 117 insertions(+) > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index d3221017194d..7d1cc47ac5f9 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -436,6 +436,10 @@ extern void *dtb_early_va; > extern void setup_bootmem(void); > extern void paging_init(void); > > +/* We provide arch_get_unmapped_area to handle VIPT caches efficiently. */ > +#define HAVE_ARCH_UNMAPPED_AREA > +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN > + > /* > * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. > * Note that PGDIR_SIZE must evenly divide TASK_SIZE. > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c > index f3619f59d85c..3e739b30b1f7 100644 > --- a/arch/riscv/kernel/sys_riscv.c > +++ b/arch/riscv/kernel/sys_riscv.c > @@ -3,11 +3,15 @@ > * Copyright (C) 2012 Regents of the University of California > * Copyright (C) 2014 Darius Rad <darius@bluespec.com> > * Copyright (C) 2017 SiFive > + * Copyright (C) 2019 Aril Inc > */ > > #include <linux/syscalls.h> > #include <asm/unistd.h> > #include <asm/cacheflush.h> > +#include <linux/shm.h> > +#include <linux/mman.h> > +#include <linux/security.h> > > static long riscv_sys_mmap(unsigned long addr, unsigned long len, > unsigned long prot, unsigned long flags, > @@ -65,3 +69,112 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, > > return 0; > } > + > +/* > + * RISC-V requires implementations to function correctly in the presence > + * of cache aliasing, regardless of page alignment. It says nothing about > + * performance. To ensure healthy performance with commonly implemented > + * VIPT caches, the following code avoids most cases of cache aliasing by > + * aligning shared mappings such that all mappings of a given physical > + * page of an object are at a multiple of SHMLBA bytes from each other. > + * > + * It does not enforce alignment. Using MAP_FIXED to request unaligned > + * shared mappings is not common, and may perform poorly with VIPT caches. > + */ > +unsigned long > +arch_get_unmapped_area(struct file *filp, unsigned long addr, > + unsigned long len, unsigned long pgoff, > + unsigned long flags) > +{ > + struct mm_struct *mm = current->mm; > + struct vm_area_struct *vma, *prev; > + struct vm_unmapped_area_info info; > + const unsigned long pgoffset = pgoff << PAGE_SHIFT; > + int do_align = (filp || (flags & MAP_SHARED)); > + > + if (len > TASK_SIZE - mmap_min_addr) > + return -ENOMEM; > + > + if (flags & MAP_FIXED) > + return addr; > + > + if (addr) { > + if (do_align) > + addr = ALIGN(addr, SHMLBA) + (pgoffset & (SHMLBA - 1)); > + else > + addr = PAGE_ALIGN(addr); > + vma = find_vma_prev(mm, addr, &prev); > + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && > + (!vma || addr + len <= vm_start_gap(vma)) && > + (!prev || addr >= vm_end_gap(prev))) > + return addr; > + } > + > + info.flags = 0; > + info.length = len; > + info.low_limit = mm->mmap_base; > + info.high_limit = TASK_SIZE; > + info.align_mask = do_align ? SHMLBA - 1 : 0; > + info.align_offset = pgoffset; > + return vm_unmapped_area(&info); > +} > + > +/* > + * Similar to arch_get_unmapped_area(), but allocating top-down from below the > + * stack's low limit (the base). > + */ > +unsigned long > +arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, > + unsigned long len, unsigned long pgoff, > + unsigned long flags) > +{ > + struct vm_area_struct *vma, *prev; > + struct mm_struct *mm = current->mm; > + struct vm_unmapped_area_info info; > + const unsigned long pgoffset = pgoff << PAGE_SHIFT; > + int do_align = (filp || (flags & MAP_SHARED)); > + > + /* requested length too big for entire address space */ > + if (len > TASK_SIZE - mmap_min_addr) > + return -ENOMEM; > + > + if (flags & MAP_FIXED) > + return addr; > + > + /* requesting a specific address */ > + if (addr) { > + if (do_align) > + addr = ALIGN(addr, SHMLBA) + (pgoffset & (SHMLBA - 1)); > + else > + addr = PAGE_ALIGN(addr); > + vma = find_vma_prev(mm, addr, &prev); > + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && > + (!vma || addr + len <= vm_start_gap(vma)) && > + (!prev || addr >= vm_end_gap(prev))) > + return addr; > + } > + > + info.flags = VM_UNMAPPED_AREA_TOPDOWN; > + info.length = len; > + info.low_limit = max(PAGE_SIZE, mmap_min_addr); > + info.high_limit = mm->mmap_base; > + info.align_mask = do_align ? SHMLBA - 1 : 0; > + info.align_offset = pgoffset; > + addr = vm_unmapped_area(&info); > + > + /* > + * A failed mmap() very likely causes application failure, > + * so fall back to the bottom-up function here. This scenario > + * can happen with large stack limits and large mmap() > + * allocations. > + */ > + if (offset_in_page(addr)) { > + VM_BUG_ON(addr != -ENOMEM); > + info.flags = 0; > + info.low_limit = TASK_UNMAPPED_BASE; > + info.high_limit = TASK_SIZE; > + addr = vm_unmapped_area(&info); > + } > + > + return addr; > +} It really feels like this should be generic code to me. It looks like the only major difference between this and the routines in arch/arm/mm/mmap.c is whether or not MAP_FIXED alignment is enforced, so we could probably just make the arch-specific code be arch_cache_is_vipt_aliasing() which on RISC-V would always be false and on ARM would be the current cache_is_vipt_aliasing(). ARM is also using a different alignment expression, but I think they may be equivilant. They have #define COLOUR_ALIGN(addr,pgoff) \ ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) LMK if you're OK doing this, or if you want me to take over the patch set.
Palmer Dabbelt wrote on 2019-12-05 18:03: > It really feels like this should be generic code to me. Indeed. The generic mm/mmap.c versions might be generalized a bit. > It looks like the only > major difference between this and the routines in arch/arm/mm/mmap.c > is whether > or not MAP_FIXED alignment is enforced, so we could probably just make > the > arch-specific code be arch_cache_is_vipt_aliasing() which on RISC-V would > always be false and on ARM would be the current cache_is_vipt_aliasing(). Probably right. ARM uses find_vma() instead of find_map_prev() as in the generic code, I haven't examined why exactly. And their use of PAGE_MASK here is redundant (SHMLBA needs to be page-aligned): info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; "cache_is_vipt_aliasing" might be a bit misleading. RISC-V caches can be VIPT-aliasing. They simply can't let that show up functionally to software. Maybe strict_vipt_aliasing, or ? > ARM is also using a different alignment expression, but I think they > may be > equivilant. They have > > #define COLOUR_ALIGN(addr,pgoff) \ > ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ > (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) Yes. After looking at this and others carefully, it was clearer to use the existing ALIGN() macro, and now pgoff is shifted once instead of twice (optimization habit :). > LMK if you're OK doing this, or if you want me to take over the patch > set. You're certainly welcome to take this on, if so willing. M
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index d3221017194d..7d1cc47ac5f9 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -436,6 +436,10 @@ extern void *dtb_early_va; extern void setup_bootmem(void); extern void paging_init(void); +/* We provide arch_get_unmapped_area to handle VIPT caches efficiently. */ +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f3619f59d85c..3e739b30b1f7 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -3,11 +3,15 @@ * Copyright (C) 2012 Regents of the University of California * Copyright (C) 2014 Darius Rad <darius@bluespec.com> * Copyright (C) 2017 SiFive + * Copyright (C) 2019 Aril Inc */ #include <linux/syscalls.h> #include <asm/unistd.h> #include <asm/cacheflush.h> +#include <linux/shm.h> +#include <linux/mman.h> +#include <linux/security.h> static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -65,3 +69,112 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, return 0; } + +/* + * RISC-V requires implementations to function correctly in the presence + * of cache aliasing, regardless of page alignment. It says nothing about + * performance. To ensure healthy performance with commonly implemented + * VIPT caches, the following code avoids most cases of cache aliasing by + * aligning shared mappings such that all mappings of a given physical + * page of an object are at a multiple of SHMLBA bytes from each other. + * + * It does not enforce alignment. Using MAP_FIXED to request unaligned + * shared mappings is not common, and may perform poorly with VIPT caches. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma, *prev; + struct vm_unmapped_area_info info; + const unsigned long pgoffset = pgoff << PAGE_SHIFT; + int do_align = (filp || (flags & MAP_SHARED)); + + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + + if (addr) { + if (do_align) + addr = ALIGN(addr, SHMLBA) + (pgoffset & (SHMLBA - 1)); + else + addr = PAGE_ALIGN(addr); + vma = find_vma_prev(mm, addr, &prev); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) + return addr; + } + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? SHMLBA - 1 : 0; + info.align_offset = pgoffset; + return vm_unmapped_area(&info); +} + +/* + * Similar to arch_get_unmapped_area(), but allocating top-down from below the + * stack's low limit (the base). + */ +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct vm_area_struct *vma, *prev; + struct mm_struct *mm = current->mm; + struct vm_unmapped_area_info info; + const unsigned long pgoffset = pgoff << PAGE_SHIFT; + int do_align = (filp || (flags & MAP_SHARED)); + + /* requested length too big for entire address space */ + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + + /* requesting a specific address */ + if (addr) { + if (do_align) + addr = ALIGN(addr, SHMLBA) + (pgoffset & (SHMLBA - 1)); + else + addr = PAGE_ALIGN(addr); + vma = find_vma_prev(mm, addr, &prev); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) + return addr; + } + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = mm->mmap_base; + info.align_mask = do_align ? SHMLBA - 1 : 0; + info.align_offset = pgoffset; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (offset_in_page(addr)) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; +}
Align shared mappings according to SHMLBA for VIPT cache performance. arch_get_unmapped_area() and arch_get_unmapped_area_topdown() are essentially copies of their default implementations in mm/mmap.c, modified to align the address to SHMLBA for shared mappings, i.e. where MAP_SHARED is specified or a file pointer is provided. Allow MAP_FIXED to request unaligned shared mappings. Although this may potentially reduce performance, very little software does this, as it is not portable across architectures that enforce alignment. Signed-off-by: Marc Gauthier <consult-mg@gstardust.com> --- arch/riscv/include/asm/pgtable.h | 4 ++ arch/riscv/kernel/sys_riscv.c | 113 +++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+)