MIPS: Provide unroll() macro, use it for cache ops

Message ID	20191008182149.2082503-1-paul.burton@mips.com (mailing list archive)
State	Mainlined
Commit	6baaeadae911ba9cedfead881f3bf305a18fd011
Delegated to:	Paul Burton
Headers	show Return-Path: <SRS0=W1ji=YB=vger.kernel.org=linux-mips-owner@kernel.org> From: Paul Burton <paul.burton@mips.com> To: "linux-mips@vger.kernel.org" <linux-mips@vger.kernel.org> CC: Paul Burton <pburton@wavecomp.com> Subject: [PATCH] MIPS: Provide unroll() macro, use it for cache ops Thread-Topic: [PATCH] MIPS: Provide unroll() macro, use it for cache ops Thread-Index: AQHVfgVGUUFiUFsiX0+VhTzRiJkYfg== Date: Tue, 8 Oct 2019 18:22:00 +0000 Message-ID: <20191008182149.2082503-1-paul.burton@mips.com> Accept-Language: en-US Content-Language: en-US received-spf: None (protection.outlook.com: wavecomp.com does not designate permitted sender hosts) Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Sender: linux-mips-owner@vger.kernel.org Precedence: bulk
Series	MIPS: Provide unroll() macro, use it for cache ops \| expand MIPS: Provide unroll() macro, use it for cache ops

diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 7f4a32d3345a..e73fc9e899d2 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -15,12 +15,14 @@ #include <linux/stringify.h> #include <asm/asm.h> +#include <asm/asm-eva.h> #include <asm/cacheops.h> #include <asm/compiler.h> #include <asm/cpu-features.h> #include <asm/cpu-type.h> #include <asm/mipsmtregs.h> #include <asm/mmzone.h> +#include <asm/unroll.h> #include <linux/uaccess.h> /* for uaccess_kernel() */ extern void (*r4k_blast_dcache)(void); @@ -39,16 +41,19 @@ extern void (*r4k_blast_icache)(void); */ #define INDEX_BASE CKSEG0 -#define cache_op(op,addr) \ +#define _cache_op(insn, op, addr) \ __asm__ __volatile__( \ " .set push \n" \ " .set noreorder \n" \ " .set "MIPS_ISA_ARCH_LEVEL" \n" \ - " cache %0, %1 \n" \ + " " insn("%0", "%1") " \n" \ " .set pop \n" \ : \ : "i" (op), "R" (*(unsigned char *)(addr))) +#define cache_op(op, addr) \ + _cache_op(kernel_cache, op, addr) + static inline void flush_icache_line_indexed(unsigned long addr) { cache_op(Index_Invalidate_I, addr); @@ -193,338 +198,10 @@ static inline void invalidate_tcache_page(unsigned long addr) cache_op(Page_Invalidate_T, addr); } -#ifndef CONFIG_CPU_MIPSR6 -#define cache16_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips3 \n" \ - " cache %1, 0x000(%0); cache %1, 0x010(%0) \n" \ - " cache %1, 0x020(%0); cache %1, 0x030(%0) \n" \ - " cache %1, 0x040(%0); cache %1, 0x050(%0) \n" \ - " cache %1, 0x060(%0); cache %1, 0x070(%0) \n" \ - " cache %1, 0x080(%0); cache %1, 0x090(%0) \n" \ - " cache %1, 0x0a0(%0); cache %1, 0x0b0(%0) \n" \ - " cache %1, 0x0c0(%0); cache %1, 0x0d0(%0) \n" \ - " cache %1, 0x0e0(%0); cache %1, 0x0f0(%0) \n" \ - " cache %1, 0x100(%0); cache %1, 0x110(%0) \n" \ - " cache %1, 0x120(%0); cache %1, 0x130(%0) \n" \ - " cache %1, 0x140(%0); cache %1, 0x150(%0) \n" \ - " cache %1, 0x160(%0); cache %1, 0x170(%0) \n" \ - " cache %1, 0x180(%0); cache %1, 0x190(%0) \n" \ - " cache %1, 0x1a0(%0); cache %1, 0x1b0(%0) \n" \ - " cache %1, 0x1c0(%0); cache %1, 0x1d0(%0) \n" \ - " cache %1, 0x1e0(%0); cache %1, 0x1f0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache32_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips3 \n" \ - " cache %1, 0x000(%0); cache %1, 0x020(%0) \n" \ - " cache %1, 0x040(%0); cache %1, 0x060(%0) \n" \ - " cache %1, 0x080(%0); cache %1, 0x0a0(%0) \n" \ - " cache %1, 0x0c0(%0); cache %1, 0x0e0(%0) \n" \ - " cache %1, 0x100(%0); cache %1, 0x120(%0) \n" \ - " cache %1, 0x140(%0); cache %1, 0x160(%0) \n" \ - " cache %1, 0x180(%0); cache %1, 0x1a0(%0) \n" \ - " cache %1, 0x1c0(%0); cache %1, 0x1e0(%0) \n" \ - " cache %1, 0x200(%0); cache %1, 0x220(%0) \n" \ - " cache %1, 0x240(%0); cache %1, 0x260(%0) \n" \ - " cache %1, 0x280(%0); cache %1, 0x2a0(%0) \n" \ - " cache %1, 0x2c0(%0); cache %1, 0x2e0(%0) \n" \ - " cache %1, 0x300(%0); cache %1, 0x320(%0) \n" \ - " cache %1, 0x340(%0); cache %1, 0x360(%0) \n" \ - " cache %1, 0x380(%0); cache %1, 0x3a0(%0) \n" \ - " cache %1, 0x3c0(%0); cache %1, 0x3e0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache64_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips3 \n" \ - " cache %1, 0x000(%0); cache %1, 0x040(%0) \n" \ - " cache %1, 0x080(%0); cache %1, 0x0c0(%0) \n" \ - " cache %1, 0x100(%0); cache %1, 0x140(%0) \n" \ - " cache %1, 0x180(%0); cache %1, 0x1c0(%0) \n" \ - " cache %1, 0x200(%0); cache %1, 0x240(%0) \n" \ - " cache %1, 0x280(%0); cache %1, 0x2c0(%0) \n" \ - " cache %1, 0x300(%0); cache %1, 0x340(%0) \n" \ - " cache %1, 0x380(%0); cache %1, 0x3c0(%0) \n" \ - " cache %1, 0x400(%0); cache %1, 0x440(%0) \n" \ - " cache %1, 0x480(%0); cache %1, 0x4c0(%0) \n" \ - " cache %1, 0x500(%0); cache %1, 0x540(%0) \n" \ - " cache %1, 0x580(%0); cache %1, 0x5c0(%0) \n" \ - " cache %1, 0x600(%0); cache %1, 0x640(%0) \n" \ - " cache %1, 0x680(%0); cache %1, 0x6c0(%0) \n" \ - " cache %1, 0x700(%0); cache %1, 0x740(%0) \n" \ - " cache %1, 0x780(%0); cache %1, 0x7c0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache128_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips3 \n" \ - " cache %1, 0x000(%0); cache %1, 0x080(%0) \n" \ - " cache %1, 0x100(%0); cache %1, 0x180(%0) \n" \ - " cache %1, 0x200(%0); cache %1, 0x280(%0) \n" \ - " cache %1, 0x300(%0); cache %1, 0x380(%0) \n" \ - " cache %1, 0x400(%0); cache %1, 0x480(%0) \n" \ - " cache %1, 0x500(%0); cache %1, 0x580(%0) \n" \ - " cache %1, 0x600(%0); cache %1, 0x680(%0) \n" \ - " cache %1, 0x700(%0); cache %1, 0x780(%0) \n" \ - " cache %1, 0x800(%0); cache %1, 0x880(%0) \n" \ - " cache %1, 0x900(%0); cache %1, 0x980(%0) \n" \ - " cache %1, 0xa00(%0); cache %1, 0xa80(%0) \n" \ - " cache %1, 0xb00(%0); cache %1, 0xb80(%0) \n" \ - " cache %1, 0xc00(%0); cache %1, 0xc80(%0) \n" \ - " cache %1, 0xd00(%0); cache %1, 0xd80(%0) \n" \ - " cache %1, 0xe00(%0); cache %1, 0xe80(%0) \n" \ - " cache %1, 0xf00(%0); cache %1, 0xf80(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#else -/* - * MIPS R6 changed the cache opcode and moved to a 8-bit offset field. - * This means we now need to increment the base register before we flush - * more cache lines - */ -#define cache16_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noreorder\n" \ - " .set mips64r6\n" \ - " .set noat\n" \ - " cache %1, 0x000(%0); cache %1, 0x010(%0)\n" \ - " cache %1, 0x020(%0); cache %1, 0x030(%0)\n" \ - " cache %1, 0x040(%0); cache %1, 0x050(%0)\n" \ - " cache %1, 0x060(%0); cache %1, 0x070(%0)\n" \ - " cache %1, 0x080(%0); cache %1, 0x090(%0)\n" \ - " cache %1, 0x0a0(%0); cache %1, 0x0b0(%0)\n" \ - " cache %1, 0x0c0(%0); cache %1, 0x0d0(%0)\n" \ - " cache %1, 0x0e0(%0); cache %1, 0x0f0(%0)\n" \ - " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x010($1)\n" \ - " cache %1, 0x020($1); cache %1, 0x030($1)\n" \ - " cache %1, 0x040($1); cache %1, 0x050($1)\n" \ - " cache %1, 0x060($1); cache %1, 0x070($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x090($1)\n" \ - " cache %1, 0x0a0($1); cache %1, 0x0b0($1)\n" \ - " cache %1, 0x0c0($1); cache %1, 0x0d0($1)\n" \ - " cache %1, 0x0e0($1); cache %1, 0x0f0($1)\n" \ - " .set pop\n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache32_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noreorder\n" \ - " .set mips64r6\n" \ - " .set noat\n" \ - " cache %1, 0x000(%0); cache %1, 0x020(%0)\n" \ - " cache %1, 0x040(%0); cache %1, 0x060(%0)\n" \ - " cache %1, 0x080(%0); cache %1, 0x0a0(%0)\n" \ - " cache %1, 0x0c0(%0); cache %1, 0x0e0(%0)\n" \ - " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x020($1)\n" \ - " cache %1, 0x040($1); cache %1, 0x060($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0a0($1)\n" \ - " cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x020($1)\n" \ - " cache %1, 0x040($1); cache %1, 0x060($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0a0($1)\n" \ - " cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100\n" \ - " cache %1, 0x000($1); cache %1, 0x020($1)\n" \ - " cache %1, 0x040($1); cache %1, 0x060($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0a0($1)\n" \ - " cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n" \ - " .set pop\n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache64_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noreorder\n" \ - " .set mips64r6\n" \ - " .set noat\n" \ - " cache %1, 0x000(%0); cache %1, 0x040(%0)\n" \ - " cache %1, 0x080(%0); cache %1, 0x0c0(%0)\n" \ - " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x040($1)\n" \ - " cache %1, 0x080($1); cache %1, 0x0c0($1)\n" \ - " .set pop\n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache128_unroll32(base,op) \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noreorder\n" \ - " .set mips64r6\n" \ - " .set noat\n" \ - " cache %1, 0x000(%0); cache %1, 0x080(%0)\n" \ - " "__stringify(LONG_ADDIU)" $1, %0, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " "__stringify(LONG_ADDIU)" $1, $1, 0x100 \n" \ - " cache %1, 0x000($1); cache %1, 0x080($1)\n" \ - " .set pop\n" \ - : \ - : "r" (base), \ - "i" (op)); -#endif /* CONFIG_CPU_MIPSR6 */ - -/* - * Perform the cache operation specified by op using a user mode virtual - * address while in kernel mode. - */ -#define cache16_unroll32_user(base,op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips0 \n" \ - " .set eva \n" \ - " cachee %1, 0x000(%0); cachee %1, 0x010(%0) \n" \ - " cachee %1, 0x020(%0); cachee %1, 0x030(%0) \n" \ - " cachee %1, 0x040(%0); cachee %1, 0x050(%0) \n" \ - " cachee %1, 0x060(%0); cachee %1, 0x070(%0) \n" \ - " cachee %1, 0x080(%0); cachee %1, 0x090(%0) \n" \ - " cachee %1, 0x0a0(%0); cachee %1, 0x0b0(%0) \n" \ - " cachee %1, 0x0c0(%0); cachee %1, 0x0d0(%0) \n" \ - " cachee %1, 0x0e0(%0); cachee %1, 0x0f0(%0) \n" \ - " cachee %1, 0x100(%0); cachee %1, 0x110(%0) \n" \ - " cachee %1, 0x120(%0); cachee %1, 0x130(%0) \n" \ - " cachee %1, 0x140(%0); cachee %1, 0x150(%0) \n" \ - " cachee %1, 0x160(%0); cachee %1, 0x170(%0) \n" \ - " cachee %1, 0x180(%0); cachee %1, 0x190(%0) \n" \ - " cachee %1, 0x1a0(%0); cachee %1, 0x1b0(%0) \n" \ - " cachee %1, 0x1c0(%0); cachee %1, 0x1d0(%0) \n" \ - " cachee %1, 0x1e0(%0); cachee %1, 0x1f0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache32_unroll32_user(base, op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips0 \n" \ - " .set eva \n" \ - " cachee %1, 0x000(%0); cachee %1, 0x020(%0) \n" \ - " cachee %1, 0x040(%0); cachee %1, 0x060(%0) \n" \ - " cachee %1, 0x080(%0); cachee %1, 0x0a0(%0) \n" \ - " cachee %1, 0x0c0(%0); cachee %1, 0x0e0(%0) \n" \ - " cachee %1, 0x100(%0); cachee %1, 0x120(%0) \n" \ - " cachee %1, 0x140(%0); cachee %1, 0x160(%0) \n" \ - " cachee %1, 0x180(%0); cachee %1, 0x1a0(%0) \n" \ - " cachee %1, 0x1c0(%0); cachee %1, 0x1e0(%0) \n" \ - " cachee %1, 0x200(%0); cachee %1, 0x220(%0) \n" \ - " cachee %1, 0x240(%0); cachee %1, 0x260(%0) \n" \ - " cachee %1, 0x280(%0); cachee %1, 0x2a0(%0) \n" \ - " cachee %1, 0x2c0(%0); cachee %1, 0x2e0(%0) \n" \ - " cachee %1, 0x300(%0); cachee %1, 0x320(%0) \n" \ - " cachee %1, 0x340(%0); cachee %1, 0x360(%0) \n" \ - " cachee %1, 0x380(%0); cachee %1, 0x3a0(%0) \n" \ - " cachee %1, 0x3c0(%0); cachee %1, 0x3e0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); - -#define cache64_unroll32_user(base, op) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set noreorder \n" \ - " .set mips0 \n" \ - " .set eva \n" \ - " cachee %1, 0x000(%0); cachee %1, 0x040(%0) \n" \ - " cachee %1, 0x080(%0); cachee %1, 0x0c0(%0) \n" \ - " cachee %1, 0x100(%0); cachee %1, 0x140(%0) \n" \ - " cachee %1, 0x180(%0); cachee %1, 0x1c0(%0) \n" \ - " cachee %1, 0x200(%0); cachee %1, 0x240(%0) \n" \ - " cachee %1, 0x280(%0); cachee %1, 0x2c0(%0) \n" \ - " cachee %1, 0x300(%0); cachee %1, 0x340(%0) \n" \ - " cachee %1, 0x380(%0); cachee %1, 0x3c0(%0) \n" \ - " cachee %1, 0x400(%0); cachee %1, 0x440(%0) \n" \ - " cachee %1, 0x480(%0); cachee %1, 0x4c0(%0) \n" \ - " cachee %1, 0x500(%0); cachee %1, 0x540(%0) \n" \ - " cachee %1, 0x580(%0); cachee %1, 0x5c0(%0) \n" \ - " cachee %1, 0x600(%0); cachee %1, 0x640(%0) \n" \ - " cachee %1, 0x680(%0); cachee %1, 0x6c0(%0) \n" \ - " cachee %1, 0x700(%0); cachee %1, 0x740(%0) \n" \ - " cachee %1, 0x780(%0); cachee %1, 0x7c0(%0) \n" \ - " .set pop \n" \ - : \ - : "r" (base), \ - "i" (op)); +#define cache_unroll(times, insn, op, addr, lsize) do { \ + int i = 0; \ + unroll(times, _cache_op, insn, op, (addr) + (i++ * (lsize))); \ +} while (0) /* build blast_xxx, blast_xxx_page, blast_xxx_page_indexed */ #define __BUILD_BLAST_CACHE(pfx, desc, indexop, hitop, lsize, extra) \ @@ -539,7 +216,8 @@ static inline void extra##blast_##pfx##cache##lsize(void) \ \ for (ws = 0; ws < ws_end; ws += ws_inc) \ for (addr = start; addr < end; addr += lsize * 32) \ - cache##lsize##_unroll32(addr|ws, indexop); \ + cache_unroll(32, kernel_cache, indexop, \ + addr | ws, lsize); \ } \ \ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \ @@ -548,7 +226,7 @@ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \ unsigned long end = page + PAGE_SIZE; \ \ do { \ - cache##lsize##_unroll32(start, hitop); \ + cache_unroll(32, kernel_cache, hitop, start, lsize); \ start += lsize * 32; \ } while (start < end); \ } \ @@ -565,7 +243,8 @@ static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long \ for (ws = 0; ws < ws_end; ws += ws_inc) \ for (addr = start; addr < end; addr += lsize * 32) \ - cache##lsize##_unroll32(addr|ws, indexop); \ + cache_unroll(32, kernel_cache, indexop, \ + addr | ws, lsize); \ } __BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16, ) @@ -596,7 +275,7 @@ static inline void blast_##pfx##cache##lsize##_user_page(unsigned long page) \ unsigned long end = page + PAGE_SIZE; \ \ do { \ - cache##lsize##_unroll32_user(start, hitop); \ + cache_unroll(32, user_cache, hitop, start, lsize); \ start += lsize * 32; \ } while (start < end); \ } @@ -688,7 +367,8 @@ static inline void blast_##pfx##cache##lsize##_node(long node) \ \ for (ws = 0; ws < ws_end; ws += ws_inc) \ for (addr = start; addr < end; addr += lsize * 32) \ - cache##lsize##_unroll32(addr|ws, indexop); \ + cache_unroll(32, kernel_cache, indexop, \ + addr | ws, lsize); \ } __BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16) diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h new file mode 100644 index 000000000000..df1cdcfc5a47 --- /dev/null +++ b/arch/mips/include/asm/unroll.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_UNROLL_H__ +#define __ASM_UNROLL_H__ + +/* + * Explicitly unroll a loop, for use in cases where doing so is performance + * critical. + * + * Ideally we'd rely upon the compiler to provide this but there's no commonly + * available means to do so. For example GCC's "#pragma GCC unroll" + * functionality would be ideal but is only available from GCC 8 onwards. Using + * -funroll-loops is an option but GCC tends to make poor choices when + * compiling our string functions. -funroll-all-loops leads to massive code + * bloat, even if only applied to the string functions. + */ +#define unroll(times, fn, ...) do { \ + extern void bad_unroll(void) \ + __compiletime_error("Unsupported unroll"); \ + \ + /* \ + * We can't unroll if the number of iterations isn't \ + * compile-time constant. Unfortunately GCC versions \ + * up until 4.6 tend to miss obvious constants & cause \ + * this check to fail, even though they go on to \ + * generate reasonable code for the switch statement, \ + * so we skip the sanity check for those compilers. \ + */ \ + BUILD_BUG_ON(GCC_VERSION >= 40700 && \ + !__builtin_constant_p(times)); \ + \ + switch (times) { \ + case 32: fn(__VA_ARGS__); /* fall through */ \ + case 31: fn(__VA_ARGS__); /* fall through */ \ + case 30: fn(__VA_ARGS__); /* fall through */ \ + case 29: fn(__VA_ARGS__); /* fall through */ \ + case 28: fn(__VA_ARGS__); /* fall through */ \ + case 27: fn(__VA_ARGS__); /* fall through */ \ + case 26: fn(__VA_ARGS__); /* fall through */ \ + case 25: fn(__VA_ARGS__); /* fall through */ \ + case 24: fn(__VA_ARGS__); /* fall through */ \ + case 23: fn(__VA_ARGS__); /* fall through */ \ + case 22: fn(__VA_ARGS__); /* fall through */ \ + case 21: fn(__VA_ARGS__); /* fall through */ \ + case 20: fn(__VA_ARGS__); /* fall through */ \ + case 19: fn(__VA_ARGS__); /* fall through */ \ + case 18: fn(__VA_ARGS__); /* fall through */ \ + case 17: fn(__VA_ARGS__); /* fall through */ \ + case 16: fn(__VA_ARGS__); /* fall through */ \ + case 15: fn(__VA_ARGS__); /* fall through */ \ + case 14: fn(__VA_ARGS__); /* fall through */ \ + case 13: fn(__VA_ARGS__); /* fall through */ \ + case 12: fn(__VA_ARGS__); /* fall through */ \ + case 11: fn(__VA_ARGS__); /* fall through */ \ + case 10: fn(__VA_ARGS__); /* fall through */ \ + case 9: fn(__VA_ARGS__); /* fall through */ \ + case 8: fn(__VA_ARGS__); /* fall through */ \ + case 7: fn(__VA_ARGS__); /* fall through */ \ + case 6: fn(__VA_ARGS__); /* fall through */ \ + case 5: fn(__VA_ARGS__); /* fall through */ \ + case 4: fn(__VA_ARGS__); /* fall through */ \ + case 3: fn(__VA_ARGS__); /* fall through */ \ + case 2: fn(__VA_ARGS__); /* fall through */ \ + case 1: fn(__VA_ARGS__); /* fall through */ \ + case 0: break; \ + \ + default: \ + /* \ + * Either the iteration count is unreasonable \ + * or we need to add more cases above. \ + */ \ + bad_unroll(); \ + break; \ + } \ +} while (0) + +#endif /* __ASM_UNROLL_H__ */ diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 4bf990633135..378cbb02dcdd 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -271,12 +271,14 @@ static inline void tx49_blast_icache32(void) /* I'm in even chunk. blast odd chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start + 0x400; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); CACHE32_UNROLL32_ALIGN; /* I'm in odd chunk. blast even chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); } static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page) @@ -302,12 +304,14 @@ static inline void tx49_blast_icache32_page_indexed(unsigned long page) /* I'm in even chunk. blast odd chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start + 0x400; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); CACHE32_UNROLL32_ALIGN; /* I'm in odd chunk. blast even chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws, Index_Invalidate_I); + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); } static void (* r4k_blast_icache_page)(unsigned long addr);

MIPS: Provide unroll() macro, use it for cache ops

Commit Message

Comments

Patch