Message ID | 20190206222110.GA19527@ls3530.dellerweb.de (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
Series | parisc: Add assembly implementations for strlen, strcpy, strncpy and strcat | expand |
Hi, On Wed, Feb 06, 2019 at 11:21:10PM +0100, Helge Deller wrote: > Add performance-optimized versions of the strlen, strcpy, strncpy and > strcat string functions. > > Signed-off-by: Helge Deller <deller@gmx.de> > I did a quick test on my C3750 with latest git plus this patch and found no obvious Problems. Tested-by: Sven Schnelle <svens@stackframe.org>
On Wed, Feb 6, 2019 at 2:21 PM Helge Deller <deller@gmx.de> wrote: > > Add performance-optimized versions of the strlen, strcpy, strncpy and > strcat string functions. Can you say anything else about them? E.g., are you using some feature that the compiler isn't able to generate? Do you have performance data? Except for strlen, they look like straightforward translations of the simple C implementations.
On 07.02.19 18:46, Matt Turner wrote: > On Wed, Feb 6, 2019 at 2:21 PM Helge Deller <deller@gmx.de> wrote: >> >> Add performance-optimized versions of the strlen, strcpy, strncpy and >> strcat string functions. > > Can you say anything else about them? No. > E.g., are you using some feature that the compiler isn't able to > generate? Some assembler instructions are used which the compiler doesn't use by default for such use cases. > Do you have performance data? Not yet. Maybe someone wants to do that? > Except for strlen, they look like straightforward translations of the > simple C implementations. The code is much smaller than what the compiler generates and the layout of the bytes-stores are targetted the behaviour of the parisc 64bit CPUs. Helge
On Thu, Feb 7, 2019 at 2:22 PM Helge Deller <deller@gmx.de> wrote: > > > On 07.02.19 18:46, Matt Turner wrote: > > On Wed, Feb 6, 2019 at 2:21 PM Helge Deller <deller@gmx.de> wrote: > >> > >> Add performance-optimized versions of the strlen, strcpy, strncpy and > >> strcat string functions. > > > > Can you say anything else about them? > > No. > > > E.g., are you using some feature that the compiler isn't able to > > generate? > > Some assembler instructions are used which the compiler doesn't > use by default for such use cases. > > > Do you have performance data? > > Not yet. > Maybe someone wants to do that? > > > Except for strlen, they look like straightforward translations of the > > simple C implementations. > > The code is much smaller than what the compiler generates and > the layout of the bytes-stores are targetted the behaviour of > the parisc 64bit CPUs. So is that the rationale? I'm not sure how anyone can offer review if they don't know what you're trying to accomplish.
diff --git a/arch/parisc/include/asm/string.h b/arch/parisc/include/asm/string.h index f6e1132f4e35..62a9be5e55b2 100644 --- a/arch/parisc/include/asm/string.h +++ b/arch/parisc/include/asm/string.h @@ -8,4 +8,16 @@ extern void * memset(void *, int, size_t); #define __HAVE_ARCH_MEMCPY void * memcpy(void * dest,const void *src,size_t count); +#define __HAVE_ARCH_STRLEN +extern size_t strlen(const char *s); + +#define __HAVE_ARCH_STRCPY +extern char *strcpy(char *dest, const char *src); + +#define __HAVE_ARCH_STRNCPY +extern char *strncpy(char *dest, const char *src, size_t count); + +#define __HAVE_ARCH_STRCAT +extern char *strcat(char *dest, const char *src); + #endif diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 7baa2265d439..9a72823e939c 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -30,6 +30,10 @@ #include <linux/string.h> EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); #include <linux/atomic.h> EXPORT_SYMBOL(__xchg8); diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile index f2dac4d73b1b..c2d266358358 100644 --- a/arch/parisc/lib/Makefile +++ b/arch/parisc/lib/Makefile @@ -3,6 +3,6 @@ # lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ - ucmpdi2.o delay.o + ucmpdi2.o delay.o string.o obj-y := iomap.o diff --git a/arch/parisc/lib/string.S b/arch/parisc/lib/string.S new file mode 100644 index 000000000000..35b555b11c76 --- /dev/null +++ b/arch/parisc/lib/string.S @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PA-RISC assembly string functions + * + * Copyright (C) 2019 Helge Deller <deller@gmx.de> + */ + +#include <asm/assembly.h> +#include <linux/linkage.h> + + .text + .level LEVEL + + t0 = r20 + t1 = r21 + t2 = r22 + + .align 16 +ENTRY_CFI(strlen) + or,COND(<>) arg0,r0,ret0 + b,l,n .Lstrlen_null_ptr,r0 + depwi 0,31,2,ret0 + cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned + ldw,ma 4(ret0),t0 + cmpib,tr 0,r0,.Lstrlen_loop + uxor,nbz r0,t0,r0 +.Lstrlen_not_aligned: + uaddcm arg0,ret0,t1 + shladd t1,3,r0,t1 + mtsar t1 + depwi -1,%sar,32,t0 + uxor,nbz r0,t0,r0 +.Lstrlen_loop: + b,l,n .Lstrlen_end_loop,r0 + ldw,ma 4(ret0),t0 + cmpib,tr 0,r0,.Lstrlen_loop + uxor,nbz r0,t0,r0 +.Lstrlen_end_loop: + extrw,u,<> t0,7,8,r0 + addib,tr,n -3,ret0,.Lstrlen_out + extrw,u,<> t0,15,8,r0 + addib,tr,n -2,ret0,.Lstrlen_out + extrw,u,<> t0,23,8,r0 + addi -1,ret0,ret0 +.Lstrlen_out: + bv r0(rp) + uaddcm ret0,arg0,ret0 +.Lstrlen_null_ptr: + bv,n r0(rp) +ENDPROC_CFI(strlen) + + + .align 16 +ENTRY_CFI(strcpy) + ldb 0(arg1),t0 + stb t0,0(arg0) + ldo 0(arg0),ret0 + ldo 1(arg1),t1 + cmpb,= r0,t0,2f + ldo 1(arg0),t2 +1: ldb 0(t1),arg1 + stb arg1,0(t2) + ldo 1(t1),t1 + cmpb,<> r0,arg1,1b + ldo 1(t2),t2 +2: bv,n r0(rp) +ENDPROC_CFI(strcpy) + + + .align 16 +ENTRY_CFI(strncpy) + ldb 0(arg1),t0 + stb t0,0(arg0) + ldo 1(arg1),t1 + ldo 0(arg0),ret0 + cmpb,= r0,t0,2f + ldo 1(arg0),arg1 +1: ldo -1(arg2),arg2 + cmpb,COND(=),n r0,arg2,2f + ldb 0(t1),arg0 + stb arg0,0(arg1) + ldo 1(t1),t1 + cmpb,<> r0,arg0,1b + ldo 1(arg1),arg1 +2: bv,n r0(rp) +ENDPROC_CFI(strncpy) + + + .align 16 +ENTRY_CFI(strcat) + ldb 0(arg0),t0 + cmpb,= t0,r0,2f + ldo 0(arg0),ret0 + ldo 1(arg0),arg0 +1: ldb 0(arg0),t1 + cmpb,<>,n r0,t1,1b + ldo 1(arg0),arg0 +2: ldb 0(arg1),t2 + stb t2,0(arg0) + ldo 1(arg0),arg0 + ldb 0(arg1),t0 + cmpb,<> r0,t0,2b + ldo 1(arg1),arg1 + bv,n r0(rp) +ENDPROC_CFI(strcat) + + .end
Add performance-optimized versions of the strlen, strcpy, strncpy and strcat string functions. Signed-off-by: Helge Deller <deller@gmx.de>