diff mbox series

sh: Remove IO memcpy and memset from sh code

Message ID 20250128084254.1408815-1-julian@outer-limits.org (mailing list archive)
State New
Headers show
Series sh: Remove IO memcpy and memset from sh code | expand

Commit Message

Julian Vetter Jan. 28, 2025, 8:42 a.m. UTC
Remove IO memcpy and memset from sh specific code and fall back to the
new implementation from lib/iomem_copy.c. It uses word accesses if the
buffers are aligned and only falls back to byte accesses for potentially
unaligned parts of a buffer. Keep only the SH4 optimized memcpy_fromio.

Signed-off-by: Julian Vetter <julian@outer-limits.org>
---
 arch/sh/include/asm/io.h |  8 ++----
 arch/sh/kernel/io.c      | 62 ++--------------------------------------
 2 files changed, 6 insertions(+), 64 deletions(-)

Comments

Arnd Bergmann Jan. 28, 2025, 9:25 a.m. UTC | #1
On Tue, Jan 28, 2025, at 09:42, Julian Vetter wrote:
> Remove IO memcpy and memset from sh specific code and fall back to the
> new implementation from lib/iomem_copy.c. It uses word accesses if the
> buffers are aligned and only falls back to byte accesses for potentially
> unaligned parts of a buffer. Keep only the SH4 optimized memcpy_fromio.
>
> Signed-off-by: Julian Vetter <julian@outer-limits.org>

This looks good in pinciple, but I see one mistake:

> +#ifdef CONFIG_CPU_SH4
> +void memcpy_fromio(void *to, const volatile void __iomem *from, size_t 
> count)
>  {
>  	/*
>  	 * Would it be worthwhile doing byte and long transfers first
>  	 * to try and get aligned?
>  	 */
> -#ifdef CONFIG_CPU_SH4
>  	if ((count >= 0x20) &&
>  	     (((u32)to & 0x1f) == 0) && (((u32)from & 0x3) == 0)) {
>  		int tmp2, tmp3, tmp4, tmp5, tmp6;
> @@ -53,59 +50,6 @@ void memcpy_fromio(void *to, const volatile void 
> __iomem *from, unsigned long co
>  			: "7"(from), "0" (to), "1" (count)
>  			: "r0", "r7", "t", "memory");
>  	}
> -#endif
> -
> -	if ((((u32)to | (u32)from) & 0x3) == 0) {
> -		for (; count > 3; count -= 4) {
> -			*(u32 *)to = *(volatile u32 *)from;
> -			to += 4;
> -			from += 4;
> -		}
> -	}
> -

The SH4 version still needs the bottom of the function to
handle data that is not a multiple of 32 bytes long.

I would expect gcc to produce a properly optimized
version for sh4 from the generic code as well, so I would
suggest you remove it entirely and rely on the common code
here.

     Arnd
diff mbox series

Patch

diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index cf5eab840d57..89a9b86bf844 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -269,12 +269,10 @@  __BUILD_IOPORT_STRING(q, u64)
 #define IO_SPACE_LIMIT 0xffffffff
 
 /* We really want to try and get these to memcpy etc */
-#define memset_io memset_io
+#ifdef CONFIG_CPU_SH4
 #define memcpy_fromio memcpy_fromio
-#define memcpy_toio memcpy_toio
-void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
-void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
-void memset_io(volatile void __iomem *, int, unsigned long);
+void memcpy_fromio(void *, const volatile void __iomem *, size_t);
+#endif
 
 /* Quad-word real-mode I/O, don't ask.. */
 unsigned long long peek_real_address_q(unsigned long long addr);
diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
index da22f3b32d30..a3e181643f75 100644
--- a/arch/sh/kernel/io.c
+++ b/arch/sh/kernel/io.c
@@ -5,21 +5,18 @@ 
  * Copyright (C) 2000 - 2009  Stuart Menefy
  * Copyright (C) 2005  Paul Mundt
  */
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <asm/machvec.h>
 #include <asm/io.h>
 
 /*
  * Copy data from IO memory space to "real" memory space.
  */
-void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long count)
+#ifdef CONFIG_CPU_SH4
+void memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
 {
 	/*
 	 * Would it be worthwhile doing byte and long transfers first
 	 * to try and get aligned?
 	 */
-#ifdef CONFIG_CPU_SH4
 	if ((count >= 0x20) &&
 	     (((u32)to & 0x1f) == 0) && (((u32)from & 0x3) == 0)) {
 		int tmp2, tmp3, tmp4, tmp5, tmp6;
@@ -53,59 +50,6 @@  void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long co
 			: "7"(from), "0" (to), "1" (count)
 			: "r0", "r7", "t", "memory");
 	}
-#endif
-
-	if ((((u32)to | (u32)from) & 0x3) == 0) {
-		for (; count > 3; count -= 4) {
-			*(u32 *)to = *(volatile u32 *)from;
-			to += 4;
-			from += 4;
-		}
-	}
-
-	for (; count > 0; count--) {
-		*(u8 *)to = *(volatile u8 *)from;
-		to++;
-		from++;
-	}
-
-	mb();
 }
 EXPORT_SYMBOL(memcpy_fromio);
-
-/*
- * Copy data from "real" memory space to IO memory space.
- */
-void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
-{
-	if ((((u32)to | (u32)from) & 0x3) == 0) {
-		for ( ; count > 3; count -= 4) {
-			*(volatile u32 *)to = *(u32 *)from;
-			to += 4;
-			from += 4;
-		}
-	}
-
-	for (; count > 0; count--) {
-		*(volatile u8 *)to = *(u8 *)from;
-		to++;
-		from++;
-	}
-
-	mb();
-}
-EXPORT_SYMBOL(memcpy_toio);
-
-/*
- * "memset" on IO memory space.
- * This needs to be optimized.
- */
-void memset_io(volatile void __iomem *dst, int c, unsigned long count)
-{
-        while (count) {
-                count--;
-                writeb(c, dst);
-                dst++;
-        }
-}
-EXPORT_SYMBOL(memset_io);
+#endif