@@ -21,6 +21,7 @@
#define NODE3_ADDRSPACE_OFFSET 0x300000000000UL
#define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
+#define nid_to_addrbase(nid) ((nid) << NODE_ADDRSPACE_SHIFT)
#define LEVELS_PER_SLICE 128
@@ -9,6 +9,14 @@
#include <asm/page.h>
#include <mmzone.h>
+#ifndef pa_to_nid
+#define pa_to_nid(addr) 0
+#endif
+
+#ifndef nid_to_addrbase
+#define nid_to_addrbase(nid) 0
+#endif
+
#ifdef CONFIG_DISCONTIGMEM
#define pfn_to_nid(pfn) pa_to_nid((pfn) << PAGE_SHIFT)
@@ -674,4 +674,25 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
__BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
__BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
+/* Currently, this is very specific to Loongson-3 */
+#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \
+static inline void blast_##pfx##cache##lsize##_node(long node) \
+{ \
+ unsigned long start = CAC_BASE | nid_to_addrbase(node); \
+ unsigned long end = start + current_cpu_data.desc.waysize; \
+ unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \
+ unsigned long ws_end = current_cpu_data.desc.ways << \
+ current_cpu_data.desc.waybit; \
+ unsigned long ws, addr; \
+ \
+ for (ws = 0; ws < ws_end; ws += ws_inc) \
+ for (addr = start; addr < end; addr += lsize * 32) \
+ cache##lsize##_unroll32(addr|ws, indexop); \
+}
+
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
+
#endif /* _ASM_R4KCACHE_H */
@@ -459,11 +459,28 @@ static void r4k_blast_scache_setup(void)
r4k_blast_scache = blast_scache128;
}
+static void (*r4k_blast_scache_node)(long node);
+
+static void r4k_blast_scache_node_setup(void)
+{
+ unsigned long sc_lsize = cpu_scache_line_size();
+
+ if (current_cpu_type() != CPU_LOONGSON3)
+ r4k_blast_scache_node = (void *)cache_noop;
+ else if (sc_lsize == 16)
+ r4k_blast_scache_node = blast_scache16_node;
+ else if (sc_lsize == 32)
+ r4k_blast_scache_node = blast_scache32_node;
+ else if (sc_lsize == 64)
+ r4k_blast_scache_node = blast_scache64_node;
+ else if (sc_lsize == 128)
+ r4k_blast_scache_node = blast_scache128_node;
+}
+
static inline void local_r4k___flush_cache_all(void * args)
{
switch (current_cpu_type()) {
case CPU_LOONGSON2:
- case CPU_LOONGSON3:
case CPU_R4000SC:
case CPU_R4000MC:
case CPU_R4400SC:
@@ -480,6 +497,11 @@ static inline void local_r4k___flush_cache_all(void * args)
r4k_blast_scache();
break;
+ case CPU_LOONGSON3:
+ /* Use get_ebase_cpunum() for both NUMA=y/n */
+ r4k_blast_scache_node(get_ebase_cpunum() >> 2);
+ break;
+
case CPU_BMIPS5000:
r4k_blast_scache();
__sync();
@@ -840,10 +862,14 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
preempt_disable();
if (cpu_has_inclusive_pcaches) {
- if (size >= scache_size)
- r4k_blast_scache();
- else
+ if (size >= scache_size) {
+ if (current_cpu_type() != CPU_LOONGSON3)
+ r4k_blast_scache();
+ else
+ r4k_blast_scache_node(pa_to_nid(addr));
+ } else {
blast_scache_range(addr, addr + size);
+ }
preempt_enable();
__sync();
return;
@@ -877,9 +903,12 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
preempt_disable();
if (cpu_has_inclusive_pcaches) {
- if (size >= scache_size)
- r4k_blast_scache();
- else {
+ if (size >= scache_size) {
+ if (current_cpu_type() != CPU_LOONGSON3)
+ r4k_blast_scache();
+ else
+ r4k_blast_scache_node(pa_to_nid(addr));
+ } else {
/*
* There is no clearly documented alignment requirement
* for the cache instruction on MIPS processors and
@@ -1918,6 +1947,7 @@ void r4k_cache_init(void)
r4k_blast_scache_page_setup();
r4k_blast_scache_page_indexed_setup();
r4k_blast_scache_setup();
+ r4k_blast_scache_node_setup();
#ifdef CONFIG_EVA
r4k_blast_dcache_user_page_setup();
r4k_blast_icache_user_page_setup();
For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can only flush Node-0's scache. So we add r4k_blast_scache_node() by using (CAC_BASE | (node_id << NODE_ADDRSPACE_SHIFT)) instead of CKSEG0 as the start address. Cc: <stable@vger.kernel.org> # 3.15+ Signed-off-by: Huacai Chen <chenhc@lemote.com> --- arch/mips/include/asm/mach-loongson64/mmzone.h | 1 + arch/mips/include/asm/mmzone.h | 8 +++++ arch/mips/include/asm/r4kcache.h | 25 +++++++++++++++ arch/mips/mm/c-r4k.c | 44 ++++++++++++++++++++++---- 4 files changed, 71 insertions(+), 7 deletions(-)