Message ID | 1347035226-18649-10-git-send-email-catalin.marinas@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Friday 07 September 2012, Catalin Marinas wrote: > + > +/* > + * dmac_inv_range(start,end) all of these appear to be unused now. Can you remove them? > + * Invalidate the data cache within the specified region; we will be > + * performing a DMA operation in this region and we want to purge old > + * data in the cache. > + * > + * - start - virtual start address of region > + * - end - virtual end address of region > + */ > +ENTRY(dmac_inv_range) > + dcache_line_size x2, x3 > + sub x3, x2, #1 > + bic x0, x0, x3 > + bic x1, x1, x3 > +1: dc ivac, x0 // invalidate D / U line > + add x0, x0, x2 > + cmp x0, x1 > + b.lo 1b > + dsb sy > + ret > +ENDPROC(dmac_inv_range) > + > +/* > + * dmac_clean_range(start,end) > + * - start - virtual start address of region > + * - end - virtual end address of region > + */ > +ENTRY(dmac_clean_range) > + dcache_line_size x2, x3 > + sub x3, x2, #1 > + bic x0, x0, x3 > +1: dc cvac, x0 // clean D / U line > + add x0, x0, x2 > + cmp x0, x1 > + b.lo 1b > + dsb sy > + ret > +ENDPROC(dmac_clean_range) > + > +/* > + * dmac_flush_range(start,end) > + * - start - virtual start address of region > + * - end - virtual end address of region > + */ > +ENTRY(dmac_flush_range) > + dcache_line_size x2, x3 > + sub x3, x2, #1 > + bic x0, x0, x3 > +1: dc civac, x0 // clean & invalidate D / U line > + add x0, x0, x2 > + cmp x0, x1 > + b.lo 1b > + dsb sy > + ret > +ENDPROC(dmac_flush_range) > + > +/* > + * dmac_map_area(start, size, dir) > + * - start - kernel virtual start address > + * - size - size of region > + * - dir - DMA direction > + */ > +ENTRY(dmac_map_area) > + add x1, x1, x0 > + cmp x2, #DMA_FROM_DEVICE > + b.eq dmac_inv_range > + b dmac_clean_range > +ENDPROC(dmac_map_area) > + > +/* > + * dmac_unmap_area(start, size, dir) > + * - start - kernel virtual start address > + * - size - size of region > + * - dir - DMA direction > + */ > +ENTRY(dmac_unmap_area) > + add x1, x1, x0 > + cmp x2, #DMA_TO_DEVICE > + b.ne dmac_inv_range > + ret > +ENDPROC(dmac_unmap_area) Aside from this: Acked-by: Arnd Bergmann <arnd@arndb.de>
Hi Catalin, On Fri, Sep 07, 2012 at 05:26:44PM +0100, Catalin Marinas wrote: > The patch adds functionality required for cache maintenance. The AArch64 > architecture mandates non-aliasing VIPT or PIPT D-cache and VIPT (may > have aliases) or ASID-tagged VIVT I-cache. Cache maintenance operations > are automatically broadcast in hardware between CPUs. > > Signed-off-by: Will Deacon <will.deacon@arm.com> > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > Acked-by: Tony Lindgren <tony@atomide.com> > --- > arch/arm64/include/asm/cache.h | 32 +++++ > arch/arm64/include/asm/cacheflush.h | 168 +++++++++++++++++++++++ > arch/arm64/include/asm/cachetype.h | 48 +++++++ > arch/arm64/mm/cache.S | 251 +++++++++++++++++++++++++++++++++++ > arch/arm64/mm/flush.c | 138 +++++++++++++++++++ > 5 files changed, 637 insertions(+), 0 deletions(-) > create mode 100644 arch/arm64/include/asm/cache.h > create mode 100644 arch/arm64/include/asm/cacheflush.h > create mode 100644 arch/arm64/include/asm/cachetype.h > create mode 100644 arch/arm64/mm/cache.S > create mode 100644 arch/arm64/mm/flush.c > > diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h > new file mode 100644 > index 0000000..390308a > --- /dev/null > +++ b/arch/arm64/include/asm/cache.h > @@ -0,0 +1,32 @@ > +/* > + * Copyright (C) 2012 ARM Ltd. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > +#ifndef __ASM_CACHE_H > +#define __ASM_CACHE_H > + > +#define L1_CACHE_SHIFT 6 > +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) > + > +/* > + * Memory returned by kmalloc() may be used for DMA, so we must make > + * sure that all such allocations are cache aligned. Otherwise, > + * unrelated code may cause parts of the buffer to be read into the > + * cache before the transfer is done, causing old data to be seen by > + * the CPU. > + */ > +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES > +#define ARCH_SLAB_MINALIGN 8 > + > +#endif > diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h > new file mode 100644 > index 0000000..0bb9853 > --- /dev/null > +++ b/arch/arm64/include/asm/cacheflush.h > @@ -0,0 +1,168 @@ > +/* > + * Based on arch/arm/include/asm/cacheflush.h > + * > + * Copyright (C) 1999-2002 Russell King. > + * Copyright (C) 2012 ARM Ltd. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > +#ifndef __ASM_CACHEFLUSH_H > +#define __ASM_CACHEFLUSH_H > + > +#include <linux/mm.h> > + > +/* > + * This flag is used to indicate that the page pointed to by a pte is clean > + * and does not require cleaning before returning it to the user. > + */ > +#define PG_dcache_clean PG_arch_1 > + > +/* > + * MM Cache Management > + * =================== > + * > + * The arch/arm64/mm/cache.S implements these methods. > + * > + * Start addresses are inclusive and end addresses are exclusive; start > + * addresses should be rounded down, end addresses up. > + * > + * See Documentation/cachetlb.txt for more information. Please note that > + * the implementation assumes non-aliasing VIPT D-cache and (aliasing) > + * VIPT or ASID-tagged VIVT I-cache. > + * > + * flush_cache_all() > + * > + * Unconditionally clean and invalidate the entire cache. > + * > + * flush_cache_mm(mm) > + * > + * Clean and invalidate all user space cache entries > + * before a change of page tables. > + * > + * flush_icache_range(start, end) > + * > + * Ensure coherency between the I-cache and the D-cache in the > + * region described by start, end. > + * - start - virtual start address > + * - end - virtual end address > + * > + * __flush_cache_user_range(start, end) > + * > + * Ensure coherency between the I-cache and the D-cache in the > + * region described by start, end. > + * - start - virtual start address > + * - end - virtual end address > + * > + * __flush_dcache_area(kaddr, size) > + * > + * Ensure that the data held in page is written back. > + * - kaddr - page address > + * - size - region size > + */ > +extern void flush_cache_all(void); > +extern void flush_cache_mm(struct mm_struct *mm); > +extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); > +extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn); > +extern void flush_icache_range(unsigned long start, unsigned long end); > +extern void __flush_dcache_area(void *addr, size_t len); > +extern void __flush_cache_user_range(unsigned long start, unsigned long end); > + > +/* > + * These are private to the dma-mapping API. Do not use directly. > + * Their sole purpose is to ensure that data held in the cache > + * is visible to DMA, or data written by DMA to system memory is > + * visible to the CPU. > + */ > +extern void dmac_map_area(const void *, size_t, int); > +extern void dmac_unmap_area(const void *, size_t, int); > +extern void dmac_flush_range(const void *, const void *); > + > +/* > + * Copy user data from/to a page which is mapped into a different > + * processes address space. Really, we want to allow our "user > + * space" model to handle this. > + */ > +extern void copy_to_user_page(struct vm_area_struct *, struct page *, > + unsigned long, void *, const void *, unsigned long); > +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ > + do { \ > + memcpy(dst, src, len); \ > + } while (0) > + > +#define flush_cache_dup_mm(mm) flush_cache_mm(mm) > + > +/* > + * flush_dcache_page is used when the kernel has written to the page > + * cache page at virtual address page->virtual. > + * > + * If this page isn't mapped (ie, page_mapping == NULL), or it might > + * have userspace mappings, then we _must_ always clean + invalidate > + * the dcache entries associated with the kernel mapping. > + * > + * Otherwise we can defer the operation, and clean the cache when we are > + * about to change to user space. This is the same method as used on SPARC64. > + * See update_mmu_cache for the user space part. > + */ > +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 > +extern void flush_dcache_page(struct page *); > + > +static inline void __flush_icache_all(void) > +{ > + asm("ic ialluis"); > +} > + > +#define ARCH_HAS_FLUSH_ANON_PAGE > +static inline void flush_anon_page(struct vm_area_struct *vma, > + struct page *page, unsigned long vmaddr) > +{ > + extern void __flush_anon_page(struct vm_area_struct *vma, > + struct page *, unsigned long); > + if (PageAnon(page)) > + __flush_anon_page(vma, page, vmaddr); __flush_anon_page() does nothing. Shouldn't this be removed as well? > +} > + > +#define flush_dcache_mmap_lock(mapping) \ > + spin_lock_irq(&(mapping)->tree_lock) > +#define flush_dcache_mmap_unlock(mapping) \ > + spin_unlock_irq(&(mapping)->tree_lock) > + > +#define flush_icache_user_range(vma,page,addr,len) \ > + flush_dcache_page(page) > + > +/* > + * We don't appear to need to do anything here. In fact, if we did, we'd > + * duplicate cache flushing elsewhere performed by flush_dcache_page(). > + */ > +#define flush_icache_page(vma,page) do { } while (0) > + > +/* > + * flush_cache_vmap() is used when creating mappings (eg, via vmap, > + * vmalloc, ioremap etc) in kernel space for pages. On non-VIPT > + * caches, since the direct-mappings of these pages may contain cached > + * data, we need to do a full cache flush to ensure that writebacks > + * don't corrupt data placed into these pages via the new mappings. > + */ > +static inline void flush_cache_vmap(unsigned long start, unsigned long end) > +{ > + /* > + * set_pte_at() called from vmap_pte_range() does not > + * have a DSB after cleaning the cache line. > + */ > + dsb(); > +} > + > +static inline void flush_cache_vunmap(unsigned long start, unsigned long end) > +{ > +} > + > +#endif > diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h > new file mode 100644 > index 0000000..85f5f51 > --- /dev/null > +++ b/arch/arm64/include/asm/cachetype.h > @@ -0,0 +1,48 @@ > +/* > + * Copyright (C) 2012 ARM Ltd. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > +#ifndef __ASM_CACHETYPE_H > +#define __ASM_CACHETYPE_H > + > +#include <asm/cputype.h> > + > +#define CTR_L1IP_SHIFT 14 > +#define CTR_L1IP_MASK 3 > + > +#define ICACHE_POLICY_RESERVED 0 > +#define ICACHE_POLICY_AIVIVT 1 > +#define ICACHE_POLICY_VIPT 2 > +#define ICACHE_POLICY_PIPT 3 > + > +static inline u32 icache_policy(void) > +{ > + return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK; > +} > + > +/* > + * Whilst the D-side always behaves as PIPT on AArch64, aliasing is > + * permitted in the I-cache. > + */ > +static inline int icache_is_aliasing(void) > +{ > + return icache_policy() != ICACHE_POLICY_PIPT; > +} > + > +static inline int icache_is_aivivt(void) > +{ > + return icache_policy() == ICACHE_POLICY_AIVIVT; > +} > + > +#endif /* __ASM_CACHETYPE_H */ > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S > new file mode 100644 > index 0000000..3df0aa7 > --- /dev/null > +++ b/arch/arm64/mm/cache.S > @@ -0,0 +1,251 @@ > +/* > + * Cache maintenance > + * > + * Copyright (C) 2001 Deep Blue Solutions Ltd. > + * Copyright (C) 2012 ARM Ltd. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include <linux/linkage.h> > +#include <linux/init.h> > +#include <asm/assembler.h> > + > +#include "proc-macros.S" > + > +/* > + * __flush_dcache_all() > + * > + * Flush the whole D-cache. > + * > + * Corrupted registers: x0-x7, x9-x11 > + */ > +ENTRY(__flush_dcache_all) > + dsb sy // ensure ordering with previous memory accesses > + mrs x0, clidr_el1 // read clidr > + and x3, x0, #0x7000000 // extract loc from clidr > + lsr x3, x3, #23 // left align loc bit field > + cbz x3, finished // if loc is 0, then no need to clean > + mov x10, #0 // start clean at cache level 0 > +loop1: > + add x2, x10, x10, lsr #1 // work out 3x current cache level > + lsr x1, x0, x2 // extract cache type bits from clidr > + and x1, x1, #7 // mask of the bits for current cache only > + cmp x1, #2 // see what cache we have at this level > + b.lt skip // skip if no cache, or just i-cache > + save_and_disable_irqs x9 // make CSSELR and CCSIDR access atomic > + msr csselr_el1, x10 // select current cache level in csselr > + isb // isb to sych the new cssr&csidr > + mrs x1, ccsidr_el1 // read the new ccsidr > + restore_irqs x9 > + and x2, x1, #7 // extract the length of the cache lines > + add x2, x2, #4 // add 4 (line length offset) > + mov x4, #0x3ff > + and x4, x4, x1, lsr #3 // find maximum number on the way size > + clz x5, x4 // find bit position of way size increment > + mov x7, #0x7fff > + and x7, x7, x1, lsr #13 // extract max number of the index size > +loop2: > + mov x9, x4 // create working copy of max way size > +loop3: > + lsl x6, x9, x5 > + orr x11, x10, x6 // factor way and cache number into x11 > + lsl x6, x7, x2 > + orr x11, x11, x6 // factor index number into x11 > + dc cisw, x11 // clean & invalidate by set/way > + subs x9, x9, #1 // decrement the way > + b.ge loop3 > + subs x7, x7, #1 // decrement the index > + b.ge loop2 > +skip: > + add x10, x10, #2 // increment cache number > + cmp x3, x10 > + b.gt loop1 > +finished: > + mov x10, #0 // swith back to cache level 0 > + msr csselr_el1, x10 // select current cache level in csselr > + dsb sy > + isb > + ret > +ENDPROC(__flush_dcache_all) > + > +/* > + * flush_cache_all() > + * > + * Flush the entire cache system. The data cache flush is now achieved > + * using atomic clean / invalidates working outwards from L1 cache. This > + * is done using Set/Way based cache maintainance instructions. The > + * instruction cache can still be invalidated back to the point of > + * unification in a single instruction. > + */ > +ENTRY(flush_cache_all) > + mov x12, lr > + bl __flush_dcache_all > + mov x0, #0 > + ic ialluis // I+BTB cache invalidate > + ret x12 > +ENDPROC(flush_cache_all) > + > +/* > + * flush_icache_range(start,end) > + * > + * Ensure that the I and D caches are coherent within specified region. > + * This is typically used when code has been written to a memory region, > + * and will be executed. > + * > + * - start - virtual start address of region > + * - end - virtual end address of region > + */ > +ENTRY(flush_icache_range) > + /* FALLTHROUGH */ > + > +/* > + * __flush_cache_user_range(start,end) > + * > + * Ensure that the I and D caches are coherent within specified region. > + * This is typically used when code has been written to a memory region, > + * and will be executed. > + * > + * - start - virtual start address of region > + * - end - virtual end address of region > + */ > +ENTRY(__flush_cache_user_range) > + dcache_line_size x2, x3 > + sub x3, x2, #1 > + bic x4, x0, x3 > +1: > +USER(9f, dc cvau, x4 ) // clean D line to PoU > + add x4, x4, x2 > + cmp x4, x1 > + b.lo 1b > + dsb sy > + > + icache_line_size x2, x3 > + sub x3, x2, #1 > + bic x4, x0, x3 > +1: > +USER(9f, ic ivau, x4 ) // invalidate I line PoU > + add x4, x4, x2 > + cmp x4, x1 > + b.lo 1b > +9: // ignore any faulting cache operation > + dsb sy > + isb > + ret > +ENDPROC(flush_icache_range) > +ENDPROC(__flush_cache_user_range) > + > +/* > + * __flush_kern_dcache_page(kaddr) Should be: __flush_dcache_area(kaddr,size) > + * > + * Ensure that the data held in the page kaddr is written back to the > + * page in question. s/page/area > + * > + * - kaddr - kernel address > + * - size - size in question > + */ > +ENTRY(__flush_dcache_area) > + dcache_line_size x2, x3 > + add x1, x0, x1 > + sub x3, x2, #1 > + bic x0, x0, x3 > +1: dc civac, x0 // clean & invalidate D line / unified line > + add x0, x0, x2 > + cmp x0, x1 > + b.lo 1b > + dsb sy > + ret > +ENDPROC(__flush_dcache_area) > + > +/* > + * dmac_inv_range(start,end) > + * > + * Invalidate the data cache within the specified region; we will be > + * performing a DMA operation in this region and we want to purge old > + * data in the cache. > + * > + * - start - virtual start address of region > + * - end - virtual end address of region > + */ > +ENTRY(dmac_inv_range) > + dcache_line_size x2, x3 > + sub x3, x2, #1 > + bic x0, x0, x3 > + bic x1, x1, x3 > +1: dc ivac, x0 // invalidate D / U line > + add x0, x0, x2 > + cmp x0, x1 > + b.lo 1b > + dsb sy > + ret > +ENDPROC(dmac_inv_range) > + > +/* > + * dmac_clean_range(start,end) > + * - start - virtual start address of region > + * - end - virtual end address of region > + */ > +ENTRY(dmac_clean_range) > + dcache_line_size x2, x3 > + sub x3, x2, #1 > + bic x0, x0, x3 > +1: dc cvac, x0 // clean D / U line > + add x0, x0, x2 > + cmp x0, x1 > + b.lo 1b > + dsb sy > + ret > +ENDPROC(dmac_clean_range) > + > +/* > + * dmac_flush_range(start,end) > + * - start - virtual start address of region > + * - end - virtual end address of region > + */ > +ENTRY(dmac_flush_range) > + dcache_line_size x2, x3 > + sub x3, x2, #1 > + bic x0, x0, x3 > +1: dc civac, x0 // clean & invalidate D / U line > + add x0, x0, x2 > + cmp x0, x1 > + b.lo 1b > + dsb sy > + ret > +ENDPROC(dmac_flush_range) > + > +/* > + * dmac_map_area(start, size, dir) > + * - start - kernel virtual start address > + * - size - size of region > + * - dir - DMA direction > + */ > +ENTRY(dmac_map_area) > + add x1, x1, x0 > + cmp x2, #DMA_FROM_DEVICE > + b.eq dmac_inv_range > + b dmac_clean_range > +ENDPROC(dmac_map_area) > + > +/* > + * dmac_unmap_area(start, size, dir) > + * - start - kernel virtual start address > + * - size - size of region > + * - dir - DMA direction > + */ > +ENTRY(dmac_unmap_area) > + add x1, x1, x0 > + cmp x2, #DMA_TO_DEVICE > + b.ne dmac_inv_range > + ret > +ENDPROC(dmac_unmap_area) > diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c > new file mode 100644 > index 0000000..6138d86 > --- /dev/null > +++ b/arch/arm64/mm/flush.c > @@ -0,0 +1,138 @@ > +/* > + * Based on arch/arm/mm/flush.c > + * > + * Copyright (C) 1995-2002 Russell King > + * Copyright (C) 2012 ARM Ltd. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include <linux/export.h> > +#include <linux/mm.h> > +#include <linux/pagemap.h> > + > +#include <asm/cacheflush.h> > +#include <asm/cachetype.h> > +#include <asm/tlbflush.h> > + > +#include "mm.h" > + > +void flush_cache_mm(struct mm_struct *mm) > +{ > +} > + > +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > +{ > + if (vma->vm_flags & VM_EXEC) > + __flush_icache_all(); > +} > + > +void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, > + unsigned long pfn) > +{ > +} > + > +static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, > + unsigned long uaddr, void *kaddr, > + unsigned long len) > +{ > + if (vma->vm_flags & VM_EXEC) { > + unsigned long addr = (unsigned long)kaddr; > + if (icache_is_aliasing()) { > + __flush_dcache_area(kaddr, len); > + __flush_icache_all(); > + } else { > + flush_icache_range(addr, addr + len); > + } > + } > +} > + > +/* > + * Copy user data from/to a page which is mapped into a different processes > + * address space. Really, we want to allow our "user space" model to handle > + * this. > + * > + * Note that this code needs to run on the current CPU. > + */ > +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, > + unsigned long uaddr, void *dst, const void *src, > + unsigned long len) > +{ > +#ifdef CONFIG_SMP > + preempt_disable(); > +#endif > + memcpy(dst, src, len); > + flush_ptrace_access(vma, page, uaddr, dst, len); > +#ifdef CONFIG_SMP > + preempt_enable(); > +#endif > +} > + > +void __flush_dcache_page(struct address_space *mapping, struct page *page) > +{ > + __flush_dcache_area(page_address(page), PAGE_SIZE); > +} > + > +void __sync_icache_dcache(pte_t pte) > +{ > + unsigned long pfn; > + struct page *page; > + > + pfn = pte_pfn(pte); > + if (!pfn_valid(pfn)) > + return; > + > + page = pfn_to_page(pfn); > + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) > + __flush_dcache_page(NULL, page); > + __flush_icache_all(); > +} > + > +/* > + * Ensure cache coherency between kernel mapping and userspace mapping of this > + * page. > + */ > +void flush_dcache_page(struct page *page) > +{ > + struct address_space *mapping; > + > + /* > + * The zero page is never written to, so never has any dirty cache > + * lines, and therefore never needs to be flushed. > + */ > + if (page == ZERO_PAGE(0)) > + return; > + > + mapping = page_mapping(page); > + > + if (mapping && !mapping_mapped(mapping)) > + clear_bit(PG_dcache_clean, &page->flags); > + else { > + __flush_dcache_page(mapping, page); > + if (mapping) > + __flush_icache_all(); Is this necessary to ensure I/D coherency? Then, I would have expected if (mapping) { __flush_dcache_page(mapping, page); __flush_icache_all(); } similar to __sync_icache_dcache() above. What is the reason why the D-cache flush is done in different cases than the following I-cache flush? > + set_bit(PG_dcache_clean, &page->flags); > + } > +} > +EXPORT_SYMBOL(flush_dcache_page); > + > +void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) > +{ > +} Note that the __flush_dcache_page(mapping, page) in flush_dcache_page() above is called when page is an anonymous page (since mapping == NULL in this case). If the call to __flush_dcache_page() is right above, it should be needed here as well? > + > +/* > + * Additional functions defined in assembly. > + */ > +EXPORT_SYMBOL(flush_cache_all); > +EXPORT_SYMBOL(flush_icache_range); > > - Simon
On Fri, Sep 07, 2012 at 08:28:09PM +0100, Arnd Bergmann wrote: > On Friday 07 September 2012, Catalin Marinas wrote: > > + > > +/* > > + * dmac_inv_range(start,end) > > all of these appear to be unused now. Can you remove them? They aren't currently used but I expect some partners to make use of them on their private development. I could drop them from the upstream branch temporarily.
On Mon, 10 Sep 2012, Catalin Marinas wrote: > On Fri, Sep 07, 2012 at 08:28:09PM +0100, Arnd Bergmann wrote: > > On Friday 07 September 2012, Catalin Marinas wrote: > > > + > > > +/* > > > + * dmac_inv_range(start,end) > > > > all of these appear to be unused now. Can you remove them? > > They aren't currently used but I expect some partners to make use of > them on their private development. I could drop them from the upstream > branch temporarily. Depends. If your expectation from those partners is a real probability then it is best to keep a skeleton in place for them to use, otherwise they might get ... creative. If this is just wild speculation then you should drop it. Nicolas
Hi Simon, On Fri, Sep 07, 2012 at 08:35:42PM +0100, Simon Baatz wrote: > On Fri, Sep 07, 2012 at 05:26:44PM +0100, Catalin Marinas wrote: > > +#define ARCH_HAS_FLUSH_ANON_PAGE > > +static inline void flush_anon_page(struct vm_area_struct *vma, > > + struct page *page, unsigned long vmaddr) > > +{ > > + extern void __flush_anon_page(struct vm_area_struct *vma, > > + struct page *, unsigned long); > > + if (PageAnon(page)) > > + __flush_anon_page(vma, page, vmaddr); > > > __flush_anon_page() does nothing. Shouldn't this be removed as well? Yes, good point. > > +void __flush_dcache_page(struct address_space *mapping, struct page *page) > > +{ > > + __flush_dcache_area(page_address(page), PAGE_SIZE); > > +} > > + > > +void __sync_icache_dcache(pte_t pte) > > +{ > > + unsigned long pfn; > > + struct page *page; > > + > > + pfn = pte_pfn(pte); > > + if (!pfn_valid(pfn)) > > + return; > > + > > + page = pfn_to_page(pfn); > > + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) > > + __flush_dcache_page(NULL, page); > > + __flush_icache_all(); > > +} > > + > > +/* > > + * Ensure cache coherency between kernel mapping and userspace mapping of this > > + * page. > > + */ > > +void flush_dcache_page(struct page *page) > > +{ > > + struct address_space *mapping; > > + > > + /* > > + * The zero page is never written to, so never has any dirty cache > > + * lines, and therefore never needs to be flushed. > > + */ > > + if (page == ZERO_PAGE(0)) > > + return; > > + > > + mapping = page_mapping(page); > > + > > + if (mapping && !mapping_mapped(mapping)) > > + clear_bit(PG_dcache_clean, &page->flags); > > + else { > > + __flush_dcache_page(mapping, page); > > + if (mapping) > > + __flush_icache_all(); > > > Is this necessary to ensure I/D coherency? Then, I would have > expected > > if (mapping) { > __flush_dcache_page(mapping, page); > __flush_icache_all(); > } > > similar to __sync_icache_dcache() above. We don't want to do additional flushing if !mapping_mapped() as the page isn't mapped in user space. In this case we defer the flushing until __sync_icache_dcache(). The other case is for anonymous pages where mapping == NULL. Here we don't defer the D-cache flush and do it directly. The I-cache, if needed, is handled later in __sync_icache_dcache(). This was based on the idea that this case is mainly for the args/env page which is mapped shortly after anyway, so not worth deferring. On AArch64, I don't think it makes any difference. Maybe a slight improvement (at least in clarity) in flush_dcache_page(): if (mapping && mapping_mapped(mapping)) { __flush_dcache_page(page); __flush_icache_all(); set_bit(PG_dcache_clean, &page->flags); } else { clear_bit(PG_dcache_clean, &page->flags); } In this case the anonymous page flushing is deferred to __sync_icache_dcache(). > What is the reason why the D-cache flush is done in different > cases than the following I-cache flush? For __sync_icache_dcache(), we need to handle the situation where the page mapped into user space has been cleaned (D-cache) but there may be stale data in the I-cache. I think this can only happen with an ASID-tagged VIVT I-cache configuration (which is allowed on AArch64) if an existing page has been unmapped and the same virtual address remapped (withing the same mm context) to a different page that had been cleaned previously. We could optimise the __sync_icache_dcache() as below: if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { __flush_dcache_page(page); __flush_icache_all(); } else if (icache_is_aivivt()) { __flush_icache_all(); } > > + set_bit(PG_dcache_clean, &page->flags); > > + } > > +} > > +EXPORT_SYMBOL(flush_dcache_page); > > + > > +void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) > > +{ > > +} > > Note that the __flush_dcache_page(mapping, page) in > flush_dcache_page() above is called when page is an anonymous page > (since mapping == NULL in this case). If the call to > __flush_dcache_page() is right above, it should be needed > here as well? flush_anon_page() is called when the kernel needs to access an anonymous page. Given that the D-cache behaves like a PIPT, there is no need for additional flushing here. The flush_dcache_page() call was based on the assumption that such page needs flushing anyway and it's not worth deferring. But the code may be easier to understand as I suggested above (and slightly more optimal for the VIPT I-cache case). It looks like any other architecture does something different here.
Hi Catalin, On Wed, Sep 12, 2012 at 10:29:54AM +0100, Catalin Marinas wrote: > > > > +void __flush_dcache_page(struct address_space *mapping, struct page *page) > > > +{ > > > + __flush_dcache_area(page_address(page), PAGE_SIZE); > > > +} > > > + > > > +void __sync_icache_dcache(pte_t pte) > > > +{ > > > + unsigned long pfn; > > > + struct page *page; > > > + > > > + pfn = pte_pfn(pte); > > > + if (!pfn_valid(pfn)) > > > + return; > > > + > > > + page = pfn_to_page(pfn); > > > + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) > > > + __flush_dcache_page(NULL, page); > > > + __flush_icache_all(); > > > +} > > > + > > > +/* > > > + * Ensure cache coherency between kernel mapping and userspace mapping of this > > > + * page. > > > + */ > > > +void flush_dcache_page(struct page *page) > > > +{ > > > + struct address_space *mapping; > > > + > > > + /* > > > + * The zero page is never written to, so never has any dirty cache > > > + * lines, and therefore never needs to be flushed. > > > + */ > > > + if (page == ZERO_PAGE(0)) > > > + return; > > > + > > > + mapping = page_mapping(page); > > > + > > > + if (mapping && !mapping_mapped(mapping)) > > > + clear_bit(PG_dcache_clean, &page->flags); > > > + else { > > > + __flush_dcache_page(mapping, page); > > > + if (mapping) > > > + __flush_icache_all(); > > > > > > Is this necessary to ensure I/D coherency? Then, I would have > > expected > > > > if (mapping) { > > __flush_dcache_page(mapping, page); > > __flush_icache_all(); > > } > > > > similar to __sync_icache_dcache() above. > > We don't want to do additional flushing if !mapping_mapped() as the page > isn't mapped in user space. In this case we defer the flushing until > __sync_icache_dcache(). > > The other case is for anonymous pages where mapping == NULL. Here we > don't defer the D-cache flush and do it directly. The I-cache, if > needed, is handled later in __sync_icache_dcache(). This was based on > the idea that this case is mainly for the args/env page which is mapped > shortly after anyway, so not worth deferring. On AArch64, I don't think > it makes any difference. Maybe a slight improvement (at least in > clarity) in flush_dcache_page(): > > if (mapping && mapping_mapped(mapping)) { > __flush_dcache_page(page); > __flush_icache_all(); > set_bit(PG_dcache_clean, &page->flags); > } else { > clear_bit(PG_dcache_clean, &page->flags); > } > > In this case the anonymous page flushing is deferred to > __sync_icache_dcache(). Yes, I think this is much clearer. It makes clear that the D-cache flush is done to avoid I/D incoherencies. Previously, the obvious question was "Why do we flush only the D-cache in some situations given that it essentially behaves like a PIPT cache?" (the motivation for this became only clear to me after your explanation.) However, the reason why this was an obvious question for me is interesting: I think the main use case you had in mind does not happen since about five years (since commit b6a2fe, "mm: variable length argument support"). And I had a completely different main use case in mind. I reckon that the use case you refer to is the use in fs/exec.c? Copying arg/env was changed in the commit mentioned above. This also changed the use of flush_dcache_page() (which is not supposed to handle anon pages) into flush_kernel_dcache_page() (which is supposed to handle kernel modified user pages, i.e. also anon pages). Nevertheless, the __flush_dcache_page(mapping, page) in the mapping==NULL case is absoluty necessary for aliasing D-caches on arm for the use case I had in mind (which does not apply to arm64 and thus my question). In case of direct I/O (and probably also in other cases like SG_IO) the block layer will see pages from get_user_pages() directly, i.e. also anonymous pages. Many drivers (especially emulated storage drivers like dm-crypt) use flush_dcache_page() after modifying a page. Although flush_dcache_page() is not even supposed to handle anonymous pages, it flushes the kernel mapping of the page because of this code line and everything is well on aliasing D-caches. Ironically, flush_kernel_dcache_page(), which is specifically designed to handle this case, does not on arm. Thus, those few parts of the kernel which use flush_kernel_dcache_page() may fail horribly (for example the scatterlist memory iterator API, see [1]). Back to arm64 (and possibly to arm with non-aliasing D-caches?), this also means that the saved D-cache flush in the anonymous page case is not only a slight improvement on clarity, but may avoid a considerable number of D-cache flushes in some I/O situations. (If it is still correct that there are no problems with the I-cache for this use case.) If now we could additionally avoid to flush the entire I-cache for every page in direct I/O operations with user mapped page cache pages (e.g. direct I/O read into an mmap region)... > > What is the reason why the D-cache flush is done in different > > cases than the following I-cache flush? > > For __sync_icache_dcache(), we need to handle the situation where the > page mapped into user space has been cleaned (D-cache) but there may be > stale data in the I-cache. I think this can only happen with an > ASID-tagged VIVT I-cache configuration (which is allowed on AArch64) if > an existing page has been unmapped and the same virtual address remapped > (withing the same mm context) to a different page that had been cleaned > previously. We could optimise the __sync_icache_dcache() as below: > > if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { > __flush_dcache_page(page); > __flush_icache_all(); > } else if (icache_is_aivivt()) { > __flush_icache_all(); > } Sorry, this is out of my depth. I think I don't really understand the cases leading to I/D incoherency. - Simon [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2012-July/111393.html PS: You did not mention the following comment from my mail. It was easy to overlook. Just to make sure you did not miss it: > diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S > new file mode 100644 > index 0000000..3df0aa7 > --- /dev/null > +++ b/arch/arm64/mm/cache.S ... > +/* > + * __flush_kern_dcache_page(kaddr) Should be: __flush_dcache_area(kaddr,size) > + * > + * Ensure that the data held in the page kaddr is written back > to the > + * page in question. s/page/area > + * > + * - kaddr - kernel address > + * - size - size in question > + */ > +ENTRY(__flush_dcache_area) > + dcache_line_size x2, x3 > + add x1, x0, x1 > +
On Wed, Sep 12, 2012 at 10:55:54PM +0100, Simon Baatz wrote: > On Wed, Sep 12, 2012 at 10:29:54AM +0100, Catalin Marinas wrote: > > > > +void __flush_dcache_page(struct address_space *mapping, struct page *page) > > > > +{ > > > > + __flush_dcache_area(page_address(page), PAGE_SIZE); > > > > +} > > > > + > > > > +void __sync_icache_dcache(pte_t pte) > > > > +{ > > > > + unsigned long pfn; > > > > + struct page *page; > > > > + > > > > + pfn = pte_pfn(pte); > > > > + if (!pfn_valid(pfn)) > > > > + return; > > > > + > > > > + page = pfn_to_page(pfn); > > > > + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) > > > > + __flush_dcache_page(NULL, page); > > > > + __flush_icache_all(); > > > > +} > > > > + > > > > +/* > > > > + * Ensure cache coherency between kernel mapping and userspace mapping of this > > > > + * page. > > > > + */ > > > > +void flush_dcache_page(struct page *page) > > > > +{ > > > > + struct address_space *mapping; > > > > + > > > > + /* > > > > + * The zero page is never written to, so never has any dirty cache > > > > + * lines, and therefore never needs to be flushed. > > > > + */ > > > > + if (page == ZERO_PAGE(0)) > > > > + return; > > > > + > > > > + mapping = page_mapping(page); > > > > + > > > > + if (mapping && !mapping_mapped(mapping)) > > > > + clear_bit(PG_dcache_clean, &page->flags); > > > > + else { > > > > + __flush_dcache_page(mapping, page); > > > > + if (mapping) > > > > + __flush_icache_all(); > > > > > > > > > Is this necessary to ensure I/D coherency? Then, I would have > > > expected > > > > > > if (mapping) { > > > __flush_dcache_page(mapping, page); > > > __flush_icache_all(); > > > } > > > > > > similar to __sync_icache_dcache() above. > > > > We don't want to do additional flushing if !mapping_mapped() as the page > > isn't mapped in user space. In this case we defer the flushing until > > __sync_icache_dcache(). > > > > The other case is for anonymous pages where mapping == NULL. Here we > > don't defer the D-cache flush and do it directly. The I-cache, if > > needed, is handled later in __sync_icache_dcache(). This was based on > > the idea that this case is mainly for the args/env page which is mapped > > shortly after anyway, so not worth deferring. On AArch64, I don't think > > it makes any difference. Maybe a slight improvement (at least in > > clarity) in flush_dcache_page(): > > > > if (mapping && mapping_mapped(mapping)) { > > __flush_dcache_page(page); > > __flush_icache_all(); > > set_bit(PG_dcache_clean, &page->flags); > > } else { > > clear_bit(PG_dcache_clean, &page->flags); > > } > > > > In this case the anonymous page flushing is deferred to > > __sync_icache_dcache(). > > Yes, I think this is much clearer. It makes clear that the D-cache > flush is done to avoid I/D incoherencies. Previously, the obvious > question was "Why do we flush only the D-cache in some situations > given that it essentially behaves like a PIPT cache?" (the motivation > for this became only clear to me after your explanation.) > > However, the reason why this was an obvious question for me is > interesting: I think the main use case you had in mind does not > happen since about five years (since commit b6a2fe, "mm: variable > length argument support"). And I had a completely different main use > case in mind. > > I reckon that the use case you refer to is the use in fs/exec.c? > Copying arg/env was changed in the commit mentioned above. This also > changed the use of flush_dcache_page() (which is not supposed to > handle anon pages) into flush_kernel_dcache_page() (which is supposed > to handle kernel modified user pages, i.e. also anon pages). That's what I was thinking about, thanks for the pointer. I still get a flush_dcache_page() call (on the source page) from copy_strings() via get_user_page() but with my changes above it doesn't trigger any cache flushing (which is correct). > In case of direct I/O (and probably also in other cases like SG_IO) > the block layer will see pages from get_user_pages() directly, i.e. > also anonymous pages. Many drivers (especially emulated storage > drivers like dm-crypt) use flush_dcache_page() after modifying a > page. Although flush_dcache_page() is not even supposed to handle > anonymous pages, it flushes the kernel mapping of the page because of > this code line and everything is well on aliasing D-caches. According to the cachetlb.txt document (though not sure architecture ports follow it entirely), flush_dcache_page() deliberately shouldn't follow anonymous pages. But it seems that we do it on ARM (maybe as an alternative to flush_kernel_dcache_page()). > Back to arm64 (and possibly to arm with non-aliasing D-caches?), this > also means that the saved D-cache flush in the anonymous page case is > not only a slight improvement on clarity, but may avoid a > considerable number of D-cache flushes in some I/O situations. (If > it is still correct that there are no problems with the I-cache for > this use case.) The I-cache would be needed if the kernel modifies an executable user page. But I don't see a case for this yet. So with non-aliasing D-cache the flush_kernel_dcache_page() can be a nop. > If now we could additionally avoid to flush the entire I-cache for > every page in direct I/O operations with user mapped page cache > pages (e.g. direct I/O read into an mmap region)... If the page is already mapped, we don't have a later hook to be able to flush the caches, so we do it here. We can avoid the I-cache operation only if we are sure that the user would not execute code from such page. IOW the direct I/O wouldn't write any instructions. The powerpc implementation of flush_dcache_page() doesn't even check for the existence of a mapping, it always marks the page as dirty. We can do the same on arm64 (only leave the clear_bit part of the condition) as long as we know that the kernel wouldn't write new code into a page that is already mapped. > > > What is the reason why the D-cache flush is done in different > > > cases than the following I-cache flush? > > > > For __sync_icache_dcache(), we need to handle the situation where the > > page mapped into user space has been cleaned (D-cache) but there may be > > stale data in the I-cache. I think this can only happen with an > > ASID-tagged VIVT I-cache configuration (which is allowed on AArch64) if > > an existing page has been unmapped and the same virtual address remapped > > (withing the same mm context) to a different page that had been cleaned > > previously. We could optimise the __sync_icache_dcache() as below: > > > > if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { > > __flush_dcache_page(page); > > __flush_icache_all(); > > } else if (icache_is_aivivt()) { > > __flush_icache_all(); > > } > > Sorry, this is out of my depth. I think I don't really understand the > cases leading to I/D incoherency. That's with a VIVT I-cache where a process mapped some file at a virtual address, unmapped it and remapped a different file at the same virtual address. For the latter file, the page may have already been cleaned by a different process but our process has stale I-cache entries from the previous mapping. With physically tagged I-cache, this wouldn't be necessary.
On Thu, Sep 13, 2012 at 01:38:50PM +0100, Catalin Marinas wrote: > On Wed, Sep 12, 2012 at 10:55:54PM +0100, Simon Baatz wrote: > > On Wed, Sep 12, 2012 at 10:29:54AM +0100, Catalin Marinas wrote: > ... > > > In case of direct I/O (and probably also in other cases like SG_IO) > > the block layer will see pages from get_user_pages() directly, i.e. > > also anonymous pages. Many drivers (especially emulated storage > > drivers like dm-crypt) use flush_dcache_page() after modifying a > > page. Although flush_dcache_page() is not even supposed to handle > > anonymous pages, it flushes the kernel mapping of the page because of > > this code line and everything is well on aliasing D-caches. > > According to the cachetlb.txt document (though not sure architecture > ports follow it entirely), flush_dcache_page() deliberately shouldn't > follow anonymous pages. But it seems that we do it on ARM (maybe as an > alternative to flush_kernel_dcache_page()). > > > Back to arm64 (and possibly to arm with non-aliasing D-caches?), this > > also means that the saved D-cache flush in the anonymous page case is > > not only a slight improvement on clarity, but may avoid a > > considerable number of D-cache flushes in some I/O situations. (If > > it is still correct that there are no problems with the I-cache for > > this use case.) > > The I-cache would be needed if the kernel modifies an executable user > page. But I don't see a case for this yet. So with non-aliasing D-cache > the flush_kernel_dcache_page() can be a nop. Ok, this is true for anon pages. But, if we really need to do the D/I flush for user mapped page cache pages in flush_dcache_page() then it should also be done by flush_kernel_dcache_page(). In general, both flush_dcache_page() and flush_kernel_dcache_page() need to handle the case in which the kernel modifies such a page. (This means that in effect, both functions should be the same in the arm64 case.) > > If now we could additionally avoid to flush the entire I-cache for > > every page in direct I/O operations with user mapped page cache > > pages (e.g. direct I/O read into an mmap region)... > > If the page is already mapped, we don't have a later hook to be able to > flush the caches, so we do it here. We can avoid the I-cache operation > only if we are sure that the user would not execute code from such page. > IOW the direct I/O wouldn't write any instructions. > > The powerpc implementation of flush_dcache_page() doesn't even check for > the existence of a mapping, it always marks the page as dirty. We can do > the same on arm64 (only leave the clear_bit part of the condition) as > long as we know that the kernel wouldn't write new code into a page that > is already mapped. Yes, but how do we know? - Simon
On Mon, Sep 10, 2012 at 06:29:21PM +0100, Nicolas Pitre wrote: > On Mon, 10 Sep 2012, Catalin Marinas wrote: > > > On Fri, Sep 07, 2012 at 08:28:09PM +0100, Arnd Bergmann wrote: > > > On Friday 07 September 2012, Catalin Marinas wrote: > > > > + > > > > +/* > > > > + * dmac_inv_range(start,end) > > > > > > all of these appear to be unused now. Can you remove them? > > > > They aren't currently used but I expect some partners to make use of > > them on their private development. I could drop them from the upstream > > branch temporarily. > > Depends. If your expectation from those partners is a real probability > then it is best to keep a skeleton in place for them to use, otherwise > they might get ... creative. If this is just wild speculation then you > should drop it. I dropped them for now since there is no API using those functions. But I have a feeling they will be needed at some point (so I'll keep the patches around).
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h new file mode 100644 index 0000000..390308a --- /dev/null +++ b/arch/arm64/include/asm/cache.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CACHE_H +#define __ASM_CACHE_H + +#define L1_CACHE_SHIFT 6 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +/* + * Memory returned by kmalloc() may be used for DMA, so we must make + * sure that all such allocations are cache aligned. Otherwise, + * unrelated code may cause parts of the buffer to be read into the + * cache before the transfer is done, causing old data to be seen by + * the CPU. + */ +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#define ARCH_SLAB_MINALIGN 8 + +#endif diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h new file mode 100644 index 0000000..0bb9853 --- /dev/null +++ b/arch/arm64/include/asm/cacheflush.h @@ -0,0 +1,168 @@ +/* + * Based on arch/arm/include/asm/cacheflush.h + * + * Copyright (C) 1999-2002 Russell King. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CACHEFLUSH_H +#define __ASM_CACHEFLUSH_H + +#include <linux/mm.h> + +/* + * This flag is used to indicate that the page pointed to by a pte is clean + * and does not require cleaning before returning it to the user. + */ +#define PG_dcache_clean PG_arch_1 + +/* + * MM Cache Management + * =================== + * + * The arch/arm64/mm/cache.S implements these methods. + * + * Start addresses are inclusive and end addresses are exclusive; start + * addresses should be rounded down, end addresses up. + * + * See Documentation/cachetlb.txt for more information. Please note that + * the implementation assumes non-aliasing VIPT D-cache and (aliasing) + * VIPT or ASID-tagged VIVT I-cache. + * + * flush_cache_all() + * + * Unconditionally clean and invalidate the entire cache. + * + * flush_cache_mm(mm) + * + * Clean and invalidate all user space cache entries + * before a change of page tables. + * + * flush_icache_range(start, end) + * + * Ensure coherency between the I-cache and the D-cache in the + * region described by start, end. + * - start - virtual start address + * - end - virtual end address + * + * __flush_cache_user_range(start, end) + * + * Ensure coherency between the I-cache and the D-cache in the + * region described by start, end. + * - start - virtual start address + * - end - virtual end address + * + * __flush_dcache_area(kaddr, size) + * + * Ensure that the data held in page is written back. + * - kaddr - page address + * - size - region size + */ +extern void flush_cache_all(void); +extern void flush_cache_mm(struct mm_struct *mm); +extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn); +extern void flush_icache_range(unsigned long start, unsigned long end); +extern void __flush_dcache_area(void *addr, size_t len); +extern void __flush_cache_user_range(unsigned long start, unsigned long end); + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +extern void dmac_map_area(const void *, size_t, int); +extern void dmac_unmap_area(const void *, size_t, int); +extern void dmac_flush_range(const void *, const void *); + +/* + * Copy user data from/to a page which is mapped into a different + * processes address space. Really, we want to allow our "user + * space" model to handle this. + */ +extern void copy_to_user_page(struct vm_area_struct *, struct page *, + unsigned long, void *, const void *, unsigned long); +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + do { \ + memcpy(dst, src, len); \ + } while (0) + +#define flush_cache_dup_mm(mm) flush_cache_mm(mm) + +/* + * flush_dcache_page is used when the kernel has written to the page + * cache page at virtual address page->virtual. + * + * If this page isn't mapped (ie, page_mapping == NULL), or it might + * have userspace mappings, then we _must_ always clean + invalidate + * the dcache entries associated with the kernel mapping. + * + * Otherwise we can defer the operation, and clean the cache when we are + * about to change to user space. This is the same method as used on SPARC64. + * See update_mmu_cache for the user space part. + */ +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 +extern void flush_dcache_page(struct page *); + +static inline void __flush_icache_all(void) +{ + asm("ic ialluis"); +} + +#define ARCH_HAS_FLUSH_ANON_PAGE +static inline void flush_anon_page(struct vm_area_struct *vma, + struct page *page, unsigned long vmaddr) +{ + extern void __flush_anon_page(struct vm_area_struct *vma, + struct page *, unsigned long); + if (PageAnon(page)) + __flush_anon_page(vma, page, vmaddr); +} + +#define flush_dcache_mmap_lock(mapping) \ + spin_lock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_unlock(mapping) \ + spin_unlock_irq(&(mapping)->tree_lock) + +#define flush_icache_user_range(vma,page,addr,len) \ + flush_dcache_page(page) + +/* + * We don't appear to need to do anything here. In fact, if we did, we'd + * duplicate cache flushing elsewhere performed by flush_dcache_page(). + */ +#define flush_icache_page(vma,page) do { } while (0) + +/* + * flush_cache_vmap() is used when creating mappings (eg, via vmap, + * vmalloc, ioremap etc) in kernel space for pages. On non-VIPT + * caches, since the direct-mappings of these pages may contain cached + * data, we need to do a full cache flush to ensure that writebacks + * don't corrupt data placed into these pages via the new mappings. + */ +static inline void flush_cache_vmap(unsigned long start, unsigned long end) +{ + /* + * set_pte_at() called from vmap_pte_range() does not + * have a DSB after cleaning the cache line. + */ + dsb(); +} + +static inline void flush_cache_vunmap(unsigned long start, unsigned long end) +{ +} + +#endif diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h new file mode 100644 index 0000000..85f5f51 --- /dev/null +++ b/arch/arm64/include/asm/cachetype.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CACHETYPE_H +#define __ASM_CACHETYPE_H + +#include <asm/cputype.h> + +#define CTR_L1IP_SHIFT 14 +#define CTR_L1IP_MASK 3 + +#define ICACHE_POLICY_RESERVED 0 +#define ICACHE_POLICY_AIVIVT 1 +#define ICACHE_POLICY_VIPT 2 +#define ICACHE_POLICY_PIPT 3 + +static inline u32 icache_policy(void) +{ + return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK; +} + +/* + * Whilst the D-side always behaves as PIPT on AArch64, aliasing is + * permitted in the I-cache. + */ +static inline int icache_is_aliasing(void) +{ + return icache_policy() != ICACHE_POLICY_PIPT; +} + +static inline int icache_is_aivivt(void) +{ + return icache_policy() == ICACHE_POLICY_AIVIVT; +} + +#endif /* __ASM_CACHETYPE_H */ diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S new file mode 100644 index 0000000..3df0aa7 --- /dev/null +++ b/arch/arm64/mm/cache.S @@ -0,0 +1,251 @@ +/* + * Cache maintenance + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> + +#include "proc-macros.S" + +/* + * __flush_dcache_all() + * + * Flush the whole D-cache. + * + * Corrupted registers: x0-x7, x9-x11 + */ +ENTRY(__flush_dcache_all) + dsb sy // ensure ordering with previous memory accesses + mrs x0, clidr_el1 // read clidr + and x3, x0, #0x7000000 // extract loc from clidr + lsr x3, x3, #23 // left align loc bit field + cbz x3, finished // if loc is 0, then no need to clean + mov x10, #0 // start clean at cache level 0 +loop1: + add x2, x10, x10, lsr #1 // work out 3x current cache level + lsr x1, x0, x2 // extract cache type bits from clidr + and x1, x1, #7 // mask of the bits for current cache only + cmp x1, #2 // see what cache we have at this level + b.lt skip // skip if no cache, or just i-cache + save_and_disable_irqs x9 // make CSSELR and CCSIDR access atomic + msr csselr_el1, x10 // select current cache level in csselr + isb // isb to sych the new cssr&csidr + mrs x1, ccsidr_el1 // read the new ccsidr + restore_irqs x9 + and x2, x1, #7 // extract the length of the cache lines + add x2, x2, #4 // add 4 (line length offset) + mov x4, #0x3ff + and x4, x4, x1, lsr #3 // find maximum number on the way size + clz x5, x4 // find bit position of way size increment + mov x7, #0x7fff + and x7, x7, x1, lsr #13 // extract max number of the index size +loop2: + mov x9, x4 // create working copy of max way size +loop3: + lsl x6, x9, x5 + orr x11, x10, x6 // factor way and cache number into x11 + lsl x6, x7, x2 + orr x11, x11, x6 // factor index number into x11 + dc cisw, x11 // clean & invalidate by set/way + subs x9, x9, #1 // decrement the way + b.ge loop3 + subs x7, x7, #1 // decrement the index + b.ge loop2 +skip: + add x10, x10, #2 // increment cache number + cmp x3, x10 + b.gt loop1 +finished: + mov x10, #0 // swith back to cache level 0 + msr csselr_el1, x10 // select current cache level in csselr + dsb sy + isb + ret +ENDPROC(__flush_dcache_all) + +/* + * flush_cache_all() + * + * Flush the entire cache system. The data cache flush is now achieved + * using atomic clean / invalidates working outwards from L1 cache. This + * is done using Set/Way based cache maintainance instructions. The + * instruction cache can still be invalidated back to the point of + * unification in a single instruction. + */ +ENTRY(flush_cache_all) + mov x12, lr + bl __flush_dcache_all + mov x0, #0 + ic ialluis // I+BTB cache invalidate + ret x12 +ENDPROC(flush_cache_all) + +/* + * flush_icache_range(start,end) + * + * Ensure that the I and D caches are coherent within specified region. + * This is typically used when code has been written to a memory region, + * and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(flush_icache_range) + /* FALLTHROUGH */ + +/* + * __flush_cache_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified region. + * This is typically used when code has been written to a memory region, + * and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(__flush_cache_user_range) + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x0, x3 +1: +USER(9f, dc cvau, x4 ) // clean D line to PoU + add x4, x4, x2 + cmp x4, x1 + b.lo 1b + dsb sy + + icache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x0, x3 +1: +USER(9f, ic ivau, x4 ) // invalidate I line PoU + add x4, x4, x2 + cmp x4, x1 + b.lo 1b +9: // ignore any faulting cache operation + dsb sy + isb + ret +ENDPROC(flush_icache_range) +ENDPROC(__flush_cache_user_range) + +/* + * __flush_kern_dcache_page(kaddr) + * + * Ensure that the data held in the page kaddr is written back to the + * page in question. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__flush_dcache_area) + dcache_line_size x2, x3 + add x1, x0, x1 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 // clean & invalidate D line / unified line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__flush_dcache_area) + +/* + * dmac_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will be + * performing a DMA operation in this region and we want to purge old + * data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(dmac_inv_range) + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 + bic x1, x1, x3 +1: dc ivac, x0 // invalidate D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(dmac_inv_range) + +/* + * dmac_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(dmac_clean_range) + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc cvac, x0 // clean D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(dmac_clean_range) + +/* + * dmac_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(dmac_flush_range) + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 // clean & invalidate D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(dmac_flush_range) + +/* + * dmac_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(dmac_map_area) + add x1, x1, x0 + cmp x2, #DMA_FROM_DEVICE + b.eq dmac_inv_range + b dmac_clean_range +ENDPROC(dmac_map_area) + +/* + * dmac_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(dmac_unmap_area) + add x1, x1, x0 + cmp x2, #DMA_TO_DEVICE + b.ne dmac_inv_range + ret +ENDPROC(dmac_unmap_area) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c new file mode 100644 index 0000000..6138d86 --- /dev/null +++ b/arch/arm64/mm/flush.c @@ -0,0 +1,138 @@ +/* + * Based on arch/arm/mm/flush.c + * + * Copyright (C) 1995-2002 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/pagemap.h> + +#include <asm/cacheflush.h> +#include <asm/cachetype.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +void flush_cache_mm(struct mm_struct *mm) +{ +} + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); +} + +void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, + unsigned long pfn) +{ +} + +static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *kaddr, + unsigned long len) +{ + if (vma->vm_flags & VM_EXEC) { + unsigned long addr = (unsigned long)kaddr; + if (icache_is_aliasing()) { + __flush_dcache_area(kaddr, len); + __flush_icache_all(); + } else { + flush_icache_range(addr, addr + len); + } + } +} + +/* + * Copy user data from/to a page which is mapped into a different processes + * address space. Really, we want to allow our "user space" model to handle + * this. + * + * Note that this code needs to run on the current CPU. + */ +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ +#ifdef CONFIG_SMP + preempt_disable(); +#endif + memcpy(dst, src, len); + flush_ptrace_access(vma, page, uaddr, dst, len); +#ifdef CONFIG_SMP + preempt_enable(); +#endif +} + +void __flush_dcache_page(struct address_space *mapping, struct page *page) +{ + __flush_dcache_area(page_address(page), PAGE_SIZE); +} + +void __sync_icache_dcache(pte_t pte) +{ + unsigned long pfn; + struct page *page; + + pfn = pte_pfn(pte); + if (!pfn_valid(pfn)) + return; + + page = pfn_to_page(pfn); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + __flush_dcache_page(NULL, page); + __flush_icache_all(); +} + +/* + * Ensure cache coherency between kernel mapping and userspace mapping of this + * page. + */ +void flush_dcache_page(struct page *page) +{ + struct address_space *mapping; + + /* + * The zero page is never written to, so never has any dirty cache + * lines, and therefore never needs to be flushed. + */ + if (page == ZERO_PAGE(0)) + return; + + mapping = page_mapping(page); + + if (mapping && !mapping_mapped(mapping)) + clear_bit(PG_dcache_clean, &page->flags); + else { + __flush_dcache_page(mapping, page); + if (mapping) + __flush_icache_all(); + set_bit(PG_dcache_clean, &page->flags); + } +} +EXPORT_SYMBOL(flush_dcache_page); + +void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) +{ +} + +/* + * Additional functions defined in assembly. + */ +EXPORT_SYMBOL(flush_cache_all); +EXPORT_SYMBOL(flush_icache_range);