| Submitter | Valentin R Sitsikov |
|---|---|
| Date | 2009-10-14 12:51:52 |
| Message ID | <4AD5C968.1030005@siemens.com> |
| Download | mbox | patch |
| Permalink | /patch/53661/ |
| State | Under Review |
| Headers | show |
Comments
Hello Paul! If you don`t mind could you please comment on this patch? Best regards, Valentin Valentin R Sitsikov wrote: > Signed-off-by: Valentin Sitdikov <valentin.sitdikov@siemens.com> > --- > arch/sh/include/asm/system_32.h | 2 +- > arch/sh/mm/Makefile | 1 + > arch/sh/mm/cache-sh4a.c | 169 > +++++++++++++++++++++++++++++++++++++++ > arch/sh/mm/cache.c | 6 ++ > 4 files changed, 177 insertions(+), 1 deletions(-) > create mode 100644 arch/sh/mm/cache-sh4a.c > > diff --git a/arch/sh/include/asm/system_32.h > b/arch/sh/include/asm/system_32.h > index 607d413..7fe8011 100644 > --- a/arch/sh/include/asm/system_32.h > +++ b/arch/sh/include/asm/system_32.h > @@ -72,7 +72,7 @@ do { \ > #define __ocbp(addr) __asm__ __volatile__ ( "ocbp @%0\n\t" : : "r" > (addr)) > #define __ocbi(addr) __asm__ __volatile__ ( "ocbi @%0\n\t" : : "r" > (addr)) > #define __ocbwb(addr) __asm__ __volatile__ ( "ocbwb @%0\n\t" : : "r" > (addr)) > - > +#define __icbi(addr) __asm__ __volatile__ ( "icbi @%0\n\t" : : "r" > (addr)) > struct task_struct *__switch_to(struct task_struct *prev, > struct task_struct *next); > > diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile > index b70024d..3a2de1d 100644 > --- a/arch/sh/mm/Makefile > +++ b/arch/sh/mm/Makefile > @@ -10,6 +10,7 @@ cacheops-$(CONFIG_CPU_SH3) := cache-sh3.o > cacheops-$(CONFIG_CPU_SH4) := cache-sh4.o flush-sh4.o > cacheops-$(CONFIG_CPU_SH5) := cache-sh5.o flush-sh4.o > cacheops-$(CONFIG_SH7705_CACHE_32KB) += cache-sh7705.o > +cacheops-$(CONFIG_CPU_SH4A) += cache-sh4a.o > > obj-y += $(cacheops-y) > > diff --git a/arch/sh/mm/cache-sh4a.c b/arch/sh/mm/cache-sh4a.c > new file mode 100644 > index 0000000..147f0e3 > --- /dev/null > +++ b/arch/sh/mm/cache-sh4a.c > @@ -0,0 +1,169 @@ > +/* > + * arch/sh/mm/cache-sh4a.c > + * > + * Copyright (C) 1999, 2000, 2002 Niibe Yutaka > + * Copyright (C) 2001 - 2009 Paul Mundt > + * Copyright (C) 2003 Richard Curnow > + * Copyright (c) 2007 STMicroelectronics (R&D) Ltd. > + * Copyright (c) 2009 Valentin Sitdikov > + * > + * This file is subject to the terms and conditions of the GNU General > Public > + * License. See the file "COPYING" in the main directory of this archive > + * for more details. > + */ > +#include <linux/init.h> > +#include <linux/mm.h> > +#include <linux/io.h> > +#include <linux/mutex.h> > +#include <linux/fs.h> > +#include <linux/highmem.h> > +#include <linux/pagemap.h> > +#include <asm/pgtable.h> > +#include <asm/mmu_context.h> > +#include <asm/cacheflush.h> > + > +/* > + * The maximum number of pages we support up to when doing ranged dcache > + * flushing. Anything exceeding this will simply flush the dcache in its > + * entirety. > + */ > +#define MAX_ICACHE_PAGES 32 > + > + > +static void sh4a_invalidate_icache(void *start, int size) > +{ > + reg_size_t aligned_start, v, cnt, end; > + > + aligned_start = register_align(start); > + v = aligned_start & ~(L1_CACHE_BYTES-1); > + end = (aligned_start + size + L1_CACHE_BYTES-1) > + & ~(L1_CACHE_BYTES-1); > + cnt = (end - v) / L1_CACHE_BYTES; > + > + while (cnt >= 8) { > + __icbi(v); v += L1_CACHE_BYTES; > + __icbi(v); v += L1_CACHE_BYTES; > + __icbi(v); v += L1_CACHE_BYTES; > + __icbi(v); v += L1_CACHE_BYTES; > + __icbi(v); v += L1_CACHE_BYTES; > + __icbi(v); v += L1_CACHE_BYTES; > + __icbi(v); v += L1_CACHE_BYTES; > + __icbi(v); v += L1_CACHE_BYTES; > + cnt -= 8; > + } > + > + while (cnt) { > + __icbi(v); v += L1_CACHE_BYTES; > + cnt--; > + } > +} > + > +/* > + * Write back the dirty D-caches and invalidate them. > + * > + * START: Virtual Address (U0, P1, or P3) > + * SIZE: Size of the region. > + */ > +static void sh4a_purge_dcache(void *start, int size) > +{ > + reg_size_t aligned_start, v, cnt, end; > + > + aligned_start = register_align(start); > + v = aligned_start & ~(L1_CACHE_BYTES-1); > + end = (aligned_start + size + L1_CACHE_BYTES-1) > + & ~(L1_CACHE_BYTES-1); > + cnt = (end - v) / L1_CACHE_BYTES; > + > + while (cnt >= 8) { > + __ocbp(v); v += L1_CACHE_BYTES; > + __ocbp(v); v += L1_CACHE_BYTES; > + __ocbp(v); v += L1_CACHE_BYTES; > + __ocbp(v); v += L1_CACHE_BYTES; > + __ocbp(v); v += L1_CACHE_BYTES; > + __ocbp(v); v += L1_CACHE_BYTES; > + __ocbp(v); v += L1_CACHE_BYTES; > + __ocbp(v); v += L1_CACHE_BYTES; > + cnt -= 8; > + } > + while (cnt) { > + __ocbp(v); v += L1_CACHE_BYTES; > + cnt--; > + } > +} > + > +/* > + * Write back the range of D-cache, and purge the I-cache. > + * > + * Called from kernel/module.c:sys_init_module and routine for a.out > format, > + * signal handler code and kprobes code > + */ > +static void __uses_jump_to_uncached sh4a_flush_icache_range(void *args) > +{ > + struct flusher_data *data = args; > + unsigned long start, end; > + unsigned long flags, v; > + > + start = data->addr1; > + end = data->addr2; > + > + /* If there are too many pages then just blow away the caches */ > + if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) { > + local_flush_cache_all(NULL); > + return; > + } > + > + /* > + * Selectively flush d-cache then invalidate the i-cache. > + * This is inefficient, so only use this for small ranges. > + */ > + start &= ~(L1_CACHE_BYTES-1); > + end += L1_CACHE_BYTES-1; > + end &= ~(L1_CACHE_BYTES-1); > + > + local_irq_save(flags); > + jump_to_uncached(); > + > + for (v = start; v < end; v += L1_CACHE_BYTES) { > + __ocbwb(v); > + __icbi(v); > + } > + > + back_to_cached(); > + local_irq_restore(flags); > +} > + > +/* > + * Write back & invalidate the D-cache of the page. > + * (To avoid "alias" issues) > + */ > +static void sh4a_flush_dcache_page(void *arg) > +{ > + struct page *page = arg; > + struct address_space *mapping = page_mapping(page); > + > +#ifndef CONFIG_SMP > + if (mapping && !mapping_mapped(mapping)) > + set_bit(PG_dcache_dirty, &page->flags); > + else > +#endif > + { > + sh4a_purge_dcache(page_address(page), PAGE_SIZE); > + sh4a_invalidate_icache(page_address(page), PAGE_SIZE); > + } > +} > + > + > +/* > + * SH-4 has virtually indexed and physically tagged cache. > + */ > +void __init sh4a_cache_init(void) > +{ > + printk("SH4A cache optimization\n"); > + > + local_flush_icache_range = sh4a_flush_icache_range; > + /* Not sure about alias cases - not checked yet */ > + if (boot_cpu_data.dcache.n_aliases == 0) { > + local_flush_dcache_page = sh4a_flush_dcache_page; > + } > + > +} > diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c > index 4aa9260..72904d9 100644 > --- a/arch/sh/mm/cache.c > +++ b/arch/sh/mm/cache.c > @@ -310,6 +310,12 @@ void __init cpu_cache_init(void) > extern void __weak sh4_cache_init(void); > > sh4_cache_init(); > + > + if(boot_cpu_data.family == CPU_FAMILY_SH4A) { > + extern void __weak sh4a_cache_init(void); > + > + sh4a_cache_init(); > + } > } > > if (boot_cpu_data.family == CPU_FAMILY_SH5) { -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Oct 28, 2009 at 11:11:11AM +0300, Valentin R Sitsikov wrote: > Hello Paul! > If you don`t mind could you please comment on this patch? > Sorry I haven't gotten around to this yet, it's certainly on my to-review list. Last week was quite busy with kernel summit and the Japan linux symposium, so I'm still catching up. I'll try and get to it before the end of the week. -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Oct 14, 2009 at 04:51:52PM +0400, Valentin R Sitsikov wrote: > Signed-off-by: Valentin Sitdikov <valentin.sitdikov@siemens.com> > --- > arch/sh/include/asm/system_32.h | 2 +- > arch/sh/mm/Makefile | 1 + > arch/sh/mm/cache-sh4a.c | 169 > +++++++++++++++++++++++++++++++++++++++ Your email client appears to have word wrapped this patch which means that it does not apply cleanly. > arch/sh/mm/cache.c | 6 ++ > 4 files changed, 177 insertions(+), 1 deletions(-) > create mode 100644 arch/sh/mm/cache-sh4a.c > > diff --git a/arch/sh/include/asm/system_32.h > b/arch/sh/include/asm/system_32.h > index 607d413..7fe8011 100644 > --- a/arch/sh/include/asm/system_32.h > +++ b/arch/sh/include/asm/system_32.h > @@ -72,7 +72,7 @@ do { \ > #define __ocbp(addr) __asm__ __volatile__ ( "ocbp @%0\n\t" : : "r" > (addr)) > #define __ocbi(addr) __asm__ __volatile__ ( "ocbi @%0\n\t" : : "r" > (addr)) > #define __ocbwb(addr) __asm__ __volatile__ ( "ocbwb @%0\n\t" : : "r" > (addr)) > - > +#define __icbi(addr) __asm__ __volatile__ ( "icbi @%0\n\t" : : "r" > (addr)) I don't think this change is correct. the icbi instruction is only available on SH4-A and there is already an __icbi() placeholder for non-SH4A CPUs. This will break all non-SH4A builds, arch/sh/include/asm/system_32.h:75: error: "__icbi" redefined arch/sh/include/asm/system_32.h:69: note: this is the location of the previous definition [...] > +/* > + * Write back & invalidate the D-cache of the page. > + * (To avoid "alias" issues) > + */ > +static void sh4a_flush_dcache_page(void *arg) > +{ > + struct page *page = arg; > + struct address_space *mapping = page_mapping(page); > + > +#ifndef CONFIG_SMP > + if (mapping && !mapping_mapped(mapping)) > + set_bit(PG_dcache_dirty, &page->flags); > + else > +#endif > + { > + sh4a_purge_dcache(page_address(page), PAGE_SIZE); > + sh4a_invalidate_icache(page_address(page), PAGE_SIZE); > + } Is there a reason why you are also invalidating the icache here? I think that only the dcache needs to be written-back and invalidated. > +/* > + * SH-4 has virtually indexed and physically tagged cache. > + */ > +void __init sh4a_cache_init(void) > +{ > + printk("SH4A cache optimization\n"); > + > + local_flush_icache_range = sh4a_flush_icache_range; > + /* Not sure about alias cases - not checked yet */ > + if (boot_cpu_data.dcache.n_aliases == 0) { > + local_flush_dcache_page = sh4a_flush_dcache_page; > + } > + > +} It is possible for the icache to have aliases too, so you should probably only use the sh4a optimized versions if there are no icache aliases. Also, it would be a good idea to move the printk() so that the "SH4A cache optimization" string is only printed if there are no aliases in the cache and we're actually using the optimized versions. -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Patch
diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h index 607d413..7fe8011 100644 --- a/arch/sh/include/asm/system_32.h +++ b/arch/sh/include/asm/system_32.h @@ -72,7 +72,7 @@ do { \ #define __ocbp(addr) __asm__ __volatile__ ( "ocbp @%0\n\t" : : "r" (addr)) #define __ocbi(addr) __asm__ __volatile__ ( "ocbi @%0\n\t" : : "r" (addr)) #define __ocbwb(addr) __asm__ __volatile__ ( "ocbwb @%0\n\t" : : "r" (addr)) - +#define __icbi(addr) __asm__ __volatile__ ( "icbi @%0\n\t" : : "r" (addr)) struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile index b70024d..3a2de1d 100644 --- a/arch/sh/mm/Makefile +++ b/arch/sh/mm/Makefile @@ -10,6 +10,7 @@ cacheops-$(CONFIG_CPU_SH3) := cache-sh3.o cacheops-$(CONFIG_CPU_SH4) := cache-sh4.o flush-sh4.o cacheops-$(CONFIG_CPU_SH5) := cache-sh5.o flush-sh4.o cacheops-$(CONFIG_SH7705_CACHE_32KB) += cache-sh7705.o +cacheops-$(CONFIG_CPU_SH4A) += cache-sh4a.o obj-y += $(cacheops-y) diff --git a/arch/sh/mm/cache-sh4a.c b/arch/sh/mm/cache-sh4a.c new file mode 100644 index 0000000..147f0e3 --- /dev/null +++ b/arch/sh/mm/cache-sh4a.c @@ -0,0 +1,169 @@ +/* + * arch/sh/mm/cache-sh4a.c + * + * Copyright (C) 1999, 2000, 2002 Niibe Yutaka + * Copyright (C) 2001 - 2009 Paul Mundt + * Copyright (C) 2003 Richard Curnow + * Copyright (c) 2007 STMicroelectronics (R&D) Ltd. + * Copyright (c) 2009 Valentin Sitdikov + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <linux/mutex.h> +#include <linux/fs.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <asm/pgtable.h> +#include <asm/mmu_context.h> +#include <asm/cacheflush.h> + +/* + * The maximum number of pages we support up to when doing ranged dcache + * flushing. Anything exceeding this will simply flush the dcache in its + * entirety. + */ +#define MAX_ICACHE_PAGES 32 + + +static void sh4a_invalidate_icache(void *start, int size) +{ + reg_size_t aligned_start, v, cnt, end; + + aligned_start = register_align(start); + v = aligned_start & ~(L1_CACHE_BYTES-1); + end = (aligned_start + size + L1_CACHE_BYTES-1) + & ~(L1_CACHE_BYTES-1); + cnt = (end - v) / L1_CACHE_BYTES; + + while (cnt >= 8) { + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + cnt -= 8; + } + + while (cnt) { + __icbi(v); v += L1_CACHE_BYTES; + cnt--; + } +} + +/* + * Write back the dirty D-caches and invalidate them. + * + * START: Virtual Address (U0, P1, or P3) + * SIZE: Size of the region. + */ +static void sh4a_purge_dcache(void *start, int size) +{ + reg_size_t aligned_start, v, cnt, end; + + aligned_start = register_align(start); + v = aligned_start & ~(L1_CACHE_BYTES-1); + end = (aligned_start + size + L1_CACHE_BYTES-1) + & ~(L1_CACHE_BYTES-1); + cnt = (end - v) / L1_CACHE_BYTES; + + while (cnt >= 8) { + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + cnt -= 8; + } + while (cnt) { + __ocbp(v); v += L1_CACHE_BYTES; + cnt--; + } +} + +/* + * Write back the range of D-cache, and purge the I-cache. + * + * Called from kernel/module.c:sys_init_module and routine for a.out format, + * signal handler code and kprobes code + */ +static void __uses_jump_to_uncached sh4a_flush_icache_range(void *args) +{ + struct flusher_data *data = args; + unsigned long start, end; + unsigned long flags, v; + + start = data->addr1; + end = data->addr2; + + /* If there are too many pages then just blow away the caches */ + if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) { + local_flush_cache_all(NULL); + return; + } + + /* + * Selectively flush d-cache then invalidate the i-cache. + * This is inefficient, so only use this for small ranges. + */ + start &= ~(L1_CACHE_BYTES-1); + end += L1_CACHE_BYTES-1; + end &= ~(L1_CACHE_BYTES-1); + + local_irq_save(flags); + jump_to_uncached(); + + for (v = start; v < end; v += L1_CACHE_BYTES) { + __ocbwb(v); + __icbi(v); + } + + back_to_cached(); + local_irq_restore(flags); +} + +/* + * Write back & invalidate the D-cache of the page. + * (To avoid "alias" issues) + */ +static void sh4a_flush_dcache_page(void *arg) +{ + struct page *page = arg; + struct address_space *mapping = page_mapping(page); + +#ifndef CONFIG_SMP + if (mapping && !mapping_mapped(mapping)) + set_bit(PG_dcache_dirty, &page->flags); + else +#endif + { + sh4a_purge_dcache(page_address(page), PAGE_SIZE); + sh4a_invalidate_icache(page_address(page), PAGE_SIZE); + } +} + + +/* + * SH-4 has virtually indexed and physically tagged cache. + */ +void __init sh4a_cache_init(void) +{ + printk("SH4A cache optimization\n"); + + local_flush_icache_range = sh4a_flush_icache_range; + /* Not sure about alias cases - not checked yet */ + if (boot_cpu_data.dcache.n_aliases == 0) { + local_flush_dcache_page = sh4a_flush_dcache_page; + } + +} diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c index 4aa9260..72904d9 100644 --- a/arch/sh/mm/cache.c +++ b/arch/sh/mm/cache.c @@ -310,6 +310,12 @@ void __init cpu_cache_init(void) extern void __weak sh4_cache_init(void); sh4_cache_init(); + + if(boot_cpu_data.family == CPU_FAMILY_SH4A) { + extern void __weak sh4a_cache_init(void); + + sh4a_cache_init(); + } } if (boot_cpu_data.family == CPU_FAMILY_SH5) {
Signed-off-by: Valentin Sitdikov <valentin.sitdikov@siemens.com> --- arch/sh/include/asm/system_32.h | 2 +- arch/sh/mm/Makefile | 1 + arch/sh/mm/cache-sh4a.c | 169 +++++++++++++++++++++++++++++++++++++++ arch/sh/mm/cache.c | 6 ++ 4 files changed, 177 insertions(+), 1 deletions(-) create mode 100644 arch/sh/mm/cache-sh4a.c