diff mbox series

parisc: Add assembly implementations for strlen, strcpy, strncpy and strcat

Message ID 20190206222110.GA19527@ls3530.dellerweb.de (mailing list archive)
State Accepted, archived
Headers show
Series parisc: Add assembly implementations for strlen, strcpy, strncpy and strcat | expand

Commit Message

Helge Deller Feb. 6, 2019, 10:21 p.m. UTC
Add performance-optimized versions of the strlen, strcpy, strncpy and
strcat string functions.

Signed-off-by: Helge Deller <deller@gmx.de>

Comments

Sven Schnelle Feb. 7, 2019, 4:24 p.m. UTC | #1
Hi,

On Wed, Feb 06, 2019 at 11:21:10PM +0100, Helge Deller wrote:
> Add performance-optimized versions of the strlen, strcpy, strncpy and
> strcat string functions.
> 
> Signed-off-by: Helge Deller <deller@gmx.de>
> 

I did a quick test on my C3750 with latest git plus this patch and found no
obvious Problems.

Tested-by: Sven Schnelle <svens@stackframe.org>
Matt Turner Feb. 7, 2019, 5:46 p.m. UTC | #2
On Wed, Feb 6, 2019 at 2:21 PM Helge Deller <deller@gmx.de> wrote:
>
> Add performance-optimized versions of the strlen, strcpy, strncpy and
> strcat string functions.

Can you say anything else about them? E.g., are you using some feature
that the compiler isn't able to generate? Do you have performance
data?

Except for strlen, they look like straightforward translations of the
simple C implementations.
Helge Deller Feb. 7, 2019, 10:21 p.m. UTC | #3
On 07.02.19 18:46, Matt Turner wrote:
> On Wed, Feb 6, 2019 at 2:21 PM Helge Deller <deller@gmx.de> wrote:
>>
>> Add performance-optimized versions of the strlen, strcpy, strncpy and
>> strcat string functions.
> 
> Can you say anything else about them?

No.

> E.g., are you using some feature that the compiler isn't able to
> generate?

Some assembler instructions are used which the compiler doesn't
use by default for such use cases. 

> Do you have performance data?

Not yet.
Maybe someone wants to do that?

> Except for strlen, they look like straightforward translations of the
> simple C implementations.

The code is much smaller than what the compiler generates and 
the layout of the bytes-stores are targetted the behaviour of 
the parisc 64bit CPUs.

Helge
Matt Turner Feb. 7, 2019, 10:53 p.m. UTC | #4
On Thu, Feb 7, 2019 at 2:22 PM Helge Deller <deller@gmx.de> wrote:
>
>
> On 07.02.19 18:46, Matt Turner wrote:
> > On Wed, Feb 6, 2019 at 2:21 PM Helge Deller <deller@gmx.de> wrote:
> >>
> >> Add performance-optimized versions of the strlen, strcpy, strncpy and
> >> strcat string functions.
> >
> > Can you say anything else about them?
>
> No.
>
> > E.g., are you using some feature that the compiler isn't able to
> > generate?
>
> Some assembler instructions are used which the compiler doesn't
> use by default for such use cases.
>
> > Do you have performance data?
>
> Not yet.
> Maybe someone wants to do that?
>
> > Except for strlen, they look like straightforward translations of the
> > simple C implementations.
>
> The code is much smaller than what the compiler generates and
> the layout of the bytes-stores are targetted the behaviour of
> the parisc 64bit CPUs.

So is that the rationale? I'm not sure how anyone can offer review if
they don't know what you're trying to accomplish.
diff mbox series

Patch

diff --git a/arch/parisc/include/asm/string.h b/arch/parisc/include/asm/string.h
index f6e1132f4e35..62a9be5e55b2 100644
--- a/arch/parisc/include/asm/string.h
+++ b/arch/parisc/include/asm/string.h
@@ -8,4 +8,16 @@  extern void * memset(void *, int, size_t);
 #define __HAVE_ARCH_MEMCPY
 void * memcpy(void * dest,const void *src,size_t count);
 
+#define __HAVE_ARCH_STRLEN
+extern size_t strlen(const char *s);
+
+#define __HAVE_ARCH_STRCPY
+extern char *strcpy(char *dest, const char *src);
+
+#define __HAVE_ARCH_STRNCPY
+extern char *strncpy(char *dest, const char *src, size_t count);
+
+#define __HAVE_ARCH_STRCAT
+extern char *strcat(char *dest, const char *src);
+
 #endif
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 7baa2265d439..9a72823e939c 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -30,6 +30,10 @@ 
 
 #include <linux/string.h>
 EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strcat);
 
 #include <linux/atomic.h>
 EXPORT_SYMBOL(__xchg8);
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
index f2dac4d73b1b..c2d266358358 100644
--- a/arch/parisc/lib/Makefile
+++ b/arch/parisc/lib/Makefile
@@ -3,6 +3,6 @@ 
 #
 
 lib-y	:= lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
-	   ucmpdi2.o delay.o
+	   ucmpdi2.o delay.o string.o
 
 obj-y	:= iomap.o
diff --git a/arch/parisc/lib/string.S b/arch/parisc/lib/string.S
new file mode 100644
index 000000000000..35b555b11c76
--- /dev/null
+++ b/arch/parisc/lib/string.S
@@ -0,0 +1,107 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    PA-RISC assembly string functions
+ *
+ *    Copyright (C) 2019 Helge Deller <deller@gmx.de>
+ */
+
+#include <asm/assembly.h>
+#include <linux/linkage.h>
+
+	.text
+	.level LEVEL
+
+	t0 = r20
+	t1 = r21
+	t2 = r22
+
+	.align 16
+ENTRY_CFI(strlen)
+	or,COND(<>) arg0,r0,ret0
+	b,l,n	.Lstrlen_null_ptr,r0
+	depwi	0,31,2,ret0
+	cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned
+	ldw,ma	4(ret0),t0
+	cmpib,tr 0,r0,.Lstrlen_loop
+	uxor,nbz r0,t0,r0
+.Lstrlen_not_aligned:
+	uaddcm	arg0,ret0,t1
+	shladd	t1,3,r0,t1
+	mtsar	t1
+	depwi	-1,%sar,32,t0
+	uxor,nbz r0,t0,r0
+.Lstrlen_loop:
+	b,l,n	.Lstrlen_end_loop,r0
+	ldw,ma	4(ret0),t0
+	cmpib,tr 0,r0,.Lstrlen_loop
+	uxor,nbz r0,t0,r0
+.Lstrlen_end_loop:
+	extrw,u,<> t0,7,8,r0
+	addib,tr,n -3,ret0,.Lstrlen_out
+	extrw,u,<> t0,15,8,r0
+	addib,tr,n -2,ret0,.Lstrlen_out
+	extrw,u,<> t0,23,8,r0
+	addi	-1,ret0,ret0
+.Lstrlen_out:
+	bv r0(rp)
+	uaddcm ret0,arg0,ret0
+.Lstrlen_null_ptr:
+	bv,n r0(rp)
+ENDPROC_CFI(strlen)
+
+
+	.align 16
+ENTRY_CFI(strcpy)
+	ldb	0(arg1),t0
+	stb	t0,0(arg0)
+	ldo	0(arg0),ret0
+	ldo	1(arg1),t1
+	cmpb,=	r0,t0,2f
+	ldo	1(arg0),t2
+1:	ldb	0(t1),arg1
+	stb	arg1,0(t2)
+	ldo	1(t1),t1
+	cmpb,<> r0,arg1,1b
+	ldo	1(t2),t2
+2:	bv,n	r0(rp)
+ENDPROC_CFI(strcpy)
+
+
+	.align 16
+ENTRY_CFI(strncpy)
+	ldb	0(arg1),t0
+	stb	t0,0(arg0)
+	ldo	1(arg1),t1
+	ldo	0(arg0),ret0
+	cmpb,=	r0,t0,2f
+	ldo	1(arg0),arg1
+1:	ldo	-1(arg2),arg2
+	cmpb,COND(=),n r0,arg2,2f
+	ldb	0(t1),arg0
+	stb	arg0,0(arg1)
+	ldo	1(t1),t1
+	cmpb,<> r0,arg0,1b
+	ldo	1(arg1),arg1
+2:	bv,n	r0(rp)
+ENDPROC_CFI(strncpy)
+
+
+	.align 16
+ENTRY_CFI(strcat)
+	ldb	0(arg0),t0
+	cmpb,=	t0,r0,2f
+	ldo	0(arg0),ret0
+	ldo	1(arg0),arg0
+1:	ldb	0(arg0),t1
+	cmpb,<>,n r0,t1,1b
+	ldo	1(arg0),arg0
+2:	ldb	0(arg1),t2
+	stb	t2,0(arg0)
+	ldo	1(arg0),arg0
+	ldb	0(arg1),t0
+	cmpb,<>	r0,t0,2b
+	ldo	1(arg1),arg1
+	bv,n	r0(rp)
+ENDPROC_CFI(strcat)
+
+	.end