From patchwork Wed Oct 14 12:51:52 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Valentin R Sitsikov X-Patchwork-Id: 53661 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n9ECjc6N022042 for ; Wed, 14 Oct 2009 12:45:39 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755128AbZJNMoE (ORCPT ); Wed, 14 Oct 2009 08:44:04 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1758619AbZJNMoE (ORCPT ); Wed, 14 Oct 2009 08:44:04 -0400 Received: from thoth.sbs.de ([192.35.17.2]:19664 "EHLO thoth.sbs.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755128AbZJNMoD (ORCPT ); Wed, 14 Oct 2009 08:44:03 -0400 Received: from mail1.siemens.de (localhost [127.0.0.1]) by thoth.sbs.de (8.12.11.20060308/8.12.11) with ESMTP id n9EChLGV009927; Wed, 14 Oct 2009 14:43:22 +0200 Received: from [163.242.202.55] ([163.242.202.55]) by mail1.siemens.de (8.12.11.20060308/8.12.11) with ESMTP id n9EChLrC022840; Wed, 14 Oct 2009 14:43:21 +0200 Message-ID: <4AD5C968.1030005@siemens.com> Date: Wed, 14 Oct 2009 16:51:52 +0400 From: Valentin R Sitsikov User-Agent: Icedove 1.5.0.10 (X11/20070329) MIME-Version: 1.0 To: SH-Linux CC: Paul Mundt Subject: [PATCH] sh: sh4a: Cache optimization if no cache alias Sender: linux-sh-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-sh@vger.kernel.org diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h index 607d413..7fe8011 100644 --- a/arch/sh/include/asm/system_32.h +++ b/arch/sh/include/asm/system_32.h @@ -72,7 +72,7 @@ do { \ #define __ocbp(addr) __asm__ __volatile__ ( "ocbp @%0\n\t" : : "r" (addr)) #define __ocbi(addr) __asm__ __volatile__ ( "ocbi @%0\n\t" : : "r" (addr)) #define __ocbwb(addr) __asm__ __volatile__ ( "ocbwb @%0\n\t" : : "r" (addr)) - +#define __icbi(addr) __asm__ __volatile__ ( "icbi @%0\n\t" : : "r" (addr)) struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile index b70024d..3a2de1d 100644 --- a/arch/sh/mm/Makefile +++ b/arch/sh/mm/Makefile @@ -10,6 +10,7 @@ cacheops-$(CONFIG_CPU_SH3) := cache-sh3.o cacheops-$(CONFIG_CPU_SH4) := cache-sh4.o flush-sh4.o cacheops-$(CONFIG_CPU_SH5) := cache-sh5.o flush-sh4.o cacheops-$(CONFIG_SH7705_CACHE_32KB) += cache-sh7705.o +cacheops-$(CONFIG_CPU_SH4A) += cache-sh4a.o obj-y += $(cacheops-y) diff --git a/arch/sh/mm/cache-sh4a.c b/arch/sh/mm/cache-sh4a.c new file mode 100644 index 0000000..147f0e3 --- /dev/null +++ b/arch/sh/mm/cache-sh4a.c @@ -0,0 +1,169 @@ +/* + * arch/sh/mm/cache-sh4a.c + * + * Copyright (C) 1999, 2000, 2002 Niibe Yutaka + * Copyright (C) 2001 - 2009 Paul Mundt + * Copyright (C) 2003 Richard Curnow + * Copyright (c) 2007 STMicroelectronics (R&D) Ltd. + * Copyright (c) 2009 Valentin Sitdikov + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The maximum number of pages we support up to when doing ranged dcache + * flushing. Anything exceeding this will simply flush the dcache in its + * entirety. + */ +#define MAX_ICACHE_PAGES 32 + + +static void sh4a_invalidate_icache(void *start, int size) +{ + reg_size_t aligned_start, v, cnt, end; + + aligned_start = register_align(start); + v = aligned_start & ~(L1_CACHE_BYTES-1); + end = (aligned_start + size + L1_CACHE_BYTES-1) + & ~(L1_CACHE_BYTES-1); + cnt = (end - v) / L1_CACHE_BYTES; + + while (cnt >= 8) { + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + __icbi(v); v += L1_CACHE_BYTES; + cnt -= 8; + } + + while (cnt) { + __icbi(v); v += L1_CACHE_BYTES; + cnt--; + } +} + +/* + * Write back the dirty D-caches and invalidate them. + * + * START: Virtual Address (U0, P1, or P3) + * SIZE: Size of the region. + */ +static void sh4a_purge_dcache(void *start, int size) +{ + reg_size_t aligned_start, v, cnt, end; + + aligned_start = register_align(start); + v = aligned_start & ~(L1_CACHE_BYTES-1); + end = (aligned_start + size + L1_CACHE_BYTES-1) + & ~(L1_CACHE_BYTES-1); + cnt = (end - v) / L1_CACHE_BYTES; + + while (cnt >= 8) { + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + __ocbp(v); v += L1_CACHE_BYTES; + cnt -= 8; + } + while (cnt) { + __ocbp(v); v += L1_CACHE_BYTES; + cnt--; + } +} + +/* + * Write back the range of D-cache, and purge the I-cache. + * + * Called from kernel/module.c:sys_init_module and routine for a.out format, + * signal handler code and kprobes code + */ +static void __uses_jump_to_uncached sh4a_flush_icache_range(void *args) +{ + struct flusher_data *data = args; + unsigned long start, end; + unsigned long flags, v; + + start = data->addr1; + end = data->addr2; + + /* If there are too many pages then just blow away the caches */ + if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) { + local_flush_cache_all(NULL); + return; + } + + /* + * Selectively flush d-cache then invalidate the i-cache. + * This is inefficient, so only use this for small ranges. + */ + start &= ~(L1_CACHE_BYTES-1); + end += L1_CACHE_BYTES-1; + end &= ~(L1_CACHE_BYTES-1); + + local_irq_save(flags); + jump_to_uncached(); + + for (v = start; v < end; v += L1_CACHE_BYTES) { + __ocbwb(v); + __icbi(v); + } + + back_to_cached(); + local_irq_restore(flags); +} + +/* + * Write back & invalidate the D-cache of the page. + * (To avoid "alias" issues) + */ +static void sh4a_flush_dcache_page(void *arg) +{ + struct page *page = arg; + struct address_space *mapping = page_mapping(page); + +#ifndef CONFIG_SMP + if (mapping && !mapping_mapped(mapping)) + set_bit(PG_dcache_dirty, &page->flags); + else +#endif + { + sh4a_purge_dcache(page_address(page), PAGE_SIZE); + sh4a_invalidate_icache(page_address(page), PAGE_SIZE); + } +} + + +/* + * SH-4 has virtually indexed and physically tagged cache. + */ +void __init sh4a_cache_init(void) +{ + printk("SH4A cache optimization\n"); + + local_flush_icache_range = sh4a_flush_icache_range; + /* Not sure about alias cases - not checked yet */ + if (boot_cpu_data.dcache.n_aliases == 0) { + local_flush_dcache_page = sh4a_flush_dcache_page; + } + +} diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c index 4aa9260..72904d9 100644 --- a/arch/sh/mm/cache.c +++ b/arch/sh/mm/cache.c @@ -310,6 +310,12 @@ void __init cpu_cache_init(void) extern void __weak sh4_cache_init(void); sh4_cache_init(); + + if(boot_cpu_data.family == CPU_FAMILY_SH4A) { + extern void __weak sh4a_cache_init(void); + + sh4a_cache_init(); + } } if (boot_cpu_data.family == CPU_FAMILY_SH5) {