diff mbox

simple framebuffer slower by factor of 20, on socfpga (arm) platform

Message ID 20150506104504.GM2067@n2100.arm.linux.org.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Russell King - ARM Linux May 6, 2015, 10:45 a.m. UTC
On Tue, Apr 28, 2015 at 11:28:53AM -0400, Nicolas Pitre wrote:
> On Tue, 28 Apr 2015, Russell King - ARM Linux wrote:
> 
> > On Fri, Apr 24, 2015 at 03:46:56PM +0200, Geert Uytterhoeven wrote:
> > > So please optimize ARM's _memcpy_fromio(), _memcpy_toio(), and _memset_io().
> > > That will benefit other drivers on ARM, too.
> > 
> > That's not going to happen.
> > 
> > I've had a patch which does that, but people are concerned that it changes
> > the behaviour of the functions by changing the access size, which could
> > cause regressions.  It seems people are far too worried about that to even
> > consider trying. :(
> 
> What about making the optimized implementation available via kconfig?

I'd prefer not to.  My personal feeling is to put the patch in and just be
done with it - these functions are supposed to be used on IO areas which
don't care about access size (in other words, are memory-like rather than
being register-like.)  Here's the rather old patch:

From: Russell King <rmk+kernel@arm.linux.org.uk>
Subject: [PATCH] ARM: optimize memset_io()/memcpy_fromio()/memcpy_toio()

If we are building for a LE platform, and we haven't overriden the
MMIO ops, then we can optimize the mem*io operations using the
standard string functions.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/io.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

Comments

Nicolas Pitre May 6, 2015, 8:32 p.m. UTC | #1
On Wed, 6 May 2015, Russell King - ARM Linux wrote:

> On Tue, Apr 28, 2015 at 11:28:53AM -0400, Nicolas Pitre wrote:
> > On Tue, 28 Apr 2015, Russell King - ARM Linux wrote:
> > 
> > > On Fri, Apr 24, 2015 at 03:46:56PM +0200, Geert Uytterhoeven wrote:
> > > > So please optimize ARM's _memcpy_fromio(), _memcpy_toio(), and _memset_io().
> > > > That will benefit other drivers on ARM, too.
> > > 
> > > That's not going to happen.
> > > 
> > > I've had a patch which does that, but people are concerned that it changes
> > > the behaviour of the functions by changing the access size, which could
> > > cause regressions.  It seems people are far too worried about that to even
> > > consider trying. :(
> > 
> > What about making the optimized implementation available via kconfig?
> 
> I'd prefer not to.  My personal feeling is to put the patch in and just be
> done with it - these functions are supposed to be used on IO areas which
> don't care about access size (in other words, are memory-like rather than
> being register-like.)  Here's the rather old patch:
> 
> From: Russell King <rmk+kernel@arm.linux.org.uk>
> Subject: [PATCH] ARM: optimize memset_io()/memcpy_fromio()/memcpy_toio()
> 
> If we are building for a LE platform, and we haven't overriden the
> MMIO ops, then we can optimize the mem*io operations using the
> standard string functions.
> 
> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

Acked-by: Nicolas Pitre <nico@linaro.org>

> ---
>  arch/arm/include/asm/io.h | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> index d070741b2b37..358c8206419b 100644
> --- a/arch/arm/include/asm/io.h
> +++ b/arch/arm/include/asm/io.h
> @@ -23,6 +23,7 @@
>  
>  #ifdef __KERNEL__
>  
> +#include <linux/string.h>
>  #include <linux/types.h>
>  #include <asm/byteorder.h>
>  #include <asm/memory.h>
> @@ -312,9 +313,33 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
>  #define writesw(p,d,l)		__raw_writesw(p,d,l)
>  #define writesl(p,d,l)		__raw_writesl(p,d,l)
>  
> +#ifndef __ARMBE__
> +static inline void memset_io(volatile void __iomem *dst, unsigned c,
> +	size_t count)
> +{
> +	memset((void __force *)dst, c, count);
> +}
> +#define memset_io(dst,c,count) memset_io(dst,c,count)
> +
> +static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
> +	size_t count)
> +{
> +	memcpy(to, (const void __force *)from, count);
> +}
> +#define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count)
> +
> +static inline void memcpy_toio(volatile void __iomem *to, const void *from,
> +	size_t count)
> +{
> +	memcpy((void __force *)to, from, count);
> +}
> +#define memcpy_toio(to,from,count) memcpy_toio(to,from,count)
> +
> +#else
>  #define memset_io(c,v,l)	_memset_io(c,(v),(l))
>  #define memcpy_fromio(a,c,l)	_memcpy_fromio((a),c,(l))
>  #define memcpy_toio(c,a,l)	_memcpy_toio(c,(a),(l))
> +#endif
>  
>  #endif	/* readl */
>  
> -- 
> 1.8.3.1
> 
> 
> 
> -- 
> FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
> according to speedtest.net.
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Machek May 12, 2015, 8:52 a.m. UTC | #2
On Wed 2015-05-06 11:45:04, Russell King - ARM Linux wrote:
> On Tue, Apr 28, 2015 at 11:28:53AM -0400, Nicolas Pitre wrote:
> > On Tue, 28 Apr 2015, Russell King - ARM Linux wrote:
> > 
> > > On Fri, Apr 24, 2015 at 03:46:56PM +0200, Geert Uytterhoeven wrote:
> > > > So please optimize ARM's _memcpy_fromio(), _memcpy_toio(), and _memset_io().
> > > > That will benefit other drivers on ARM, too.
> > > 
> > > That's not going to happen.
> > > 
> > > I've had a patch which does that, but people are concerned that it changes
> > > the behaviour of the functions by changing the access size, which could
> > > cause regressions.  It seems people are far too worried about that to even
> > > consider trying. :(
> > 
> > What about making the optimized implementation available via kconfig?
> 
> I'd prefer not to.  My personal feeling is to put the patch in and just be
> done with it - these functions are supposed to be used on IO areas which
> don't care about access size (in other words, are memory-like rather than
> being register-like.)  Here's the rather old patch:
> 
> From: Russell King <rmk+kernel@arm.linux.org.uk>
> Subject: [PATCH] ARM: optimize memset_io()/memcpy_fromio()/memcpy_toio()
> 
> If we are building for a LE platform, and we haven't overriden the
> MMIO ops, then we can optimize the mem*io operations using the
> standard string functions.
> 
> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

Tested-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Pavel Machek <pavel@ucw.cz>

Works for me, framebuffer performance is back in "too fast to measure"
range.

When this is merged, should 981409b25e2a99409b26daa67293ca1cfd5ea0a0
be reverted in -stable?

Thanks,
									Pavel
diff mbox

Patch

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index d070741b2b37..358c8206419b 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -23,6 +23,7 @@ 
 
 #ifdef __KERNEL__
 
+#include <linux/string.h>
 #include <linux/types.h>
 #include <asm/byteorder.h>
 #include <asm/memory.h>
@@ -312,9 +313,33 @@  extern void _memset_io(volatile void __iomem *, int, size_t);
 #define writesw(p,d,l)		__raw_writesw(p,d,l)
 #define writesl(p,d,l)		__raw_writesl(p,d,l)
 
+#ifndef __ARMBE__
+static inline void memset_io(volatile void __iomem *dst, unsigned c,
+	size_t count)
+{
+	memset((void __force *)dst, c, count);
+}
+#define memset_io(dst,c,count) memset_io(dst,c,count)
+
+static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
+	size_t count)
+{
+	memcpy(to, (const void __force *)from, count);
+}
+#define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count)
+
+static inline void memcpy_toio(volatile void __iomem *to, const void *from,
+	size_t count)
+{
+	memcpy((void __force *)to, from, count);
+}
+#define memcpy_toio(to,from,count) memcpy_toio(to,from,count)
+
+#else
 #define memset_io(c,v,l)	_memset_io(c,(v),(l))
 #define memcpy_fromio(a,c,l)	_memcpy_fromio((a),c,(l))
 #define memcpy_toio(c,a,l)	_memcpy_toio(c,(a),(l))
+#endif
 
 #endif	/* readl */