Message ID | 20230109181755.2383085-5-heiko@sntech.de (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Palmer Dabbelt |
Headers | show |
Series | Zbb string optimizations and call support in alternatives | expand |
On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > Depending on supported extensions on specific RISC-V cores, > optimized str* functions might make sense. > > This adds basic infrastructure to allow patching the function calls > via alternatives later on. > > The Linux kernel provides standard implementations for string functions > but when architectures want to extend them, they need to provide their > own. > > The added generic string functions are done in assembler (taken from > disassembling the main-kernel functions for now) to allow us to control > the used registers and extend them with optimized variants. > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> asm looks the same as what I previously provided a review for, but I assume you dropped cos of the EFI stub stuff? You can have it back I suppose.. Reviewed-by: Conor Dooley <conor.dooley@microchip.com> > --- > arch/riscv/include/asm/string.h | 10 +++++++++ > arch/riscv/kernel/riscv_ksyms.c | 3 +++ > arch/riscv/lib/Makefile | 3 +++ > arch/riscv/lib/strcmp.S | 37 ++++++++++++++++++++++++++++++ > arch/riscv/lib/strlen.S | 28 +++++++++++++++++++++++ > arch/riscv/lib/strncmp.S | 40 +++++++++++++++++++++++++++++++++ > arch/riscv/purgatory/Makefile | 13 +++++++++++ > 7 files changed, 134 insertions(+) > create mode 100644 arch/riscv/lib/strcmp.S > create mode 100644 arch/riscv/lib/strlen.S > create mode 100644 arch/riscv/lib/strncmp.S > > diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h > index 909049366555..a96b1fea24fe 100644 > --- a/arch/riscv/include/asm/string.h > +++ b/arch/riscv/include/asm/string.h > @@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); > #define __HAVE_ARCH_MEMMOVE > extern asmlinkage void *memmove(void *, const void *, size_t); > extern asmlinkage void *__memmove(void *, const void *, size_t); > + > +#define __HAVE_ARCH_STRCMP > +extern asmlinkage int strcmp(const char *cs, const char *ct); > + > +#define __HAVE_ARCH_STRLEN > +extern asmlinkage __kernel_size_t strlen(const char *); > + > +#define __HAVE_ARCH_STRNCMP > +extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count); > + > /* For those files which don't want to check by kasan. */ > #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) > #define memcpy(dst, src, len) __memcpy(dst, src, len) > diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c > index 5ab1c7e1a6ed..a72879b4249a 100644 > --- a/arch/riscv/kernel/riscv_ksyms.c > +++ b/arch/riscv/kernel/riscv_ksyms.c > @@ -12,6 +12,9 @@ > EXPORT_SYMBOL(memset); > EXPORT_SYMBOL(memcpy); > EXPORT_SYMBOL(memmove); > +EXPORT_SYMBOL(strcmp); > +EXPORT_SYMBOL(strlen); > +EXPORT_SYMBOL(strncmp); > EXPORT_SYMBOL(__memset); > EXPORT_SYMBOL(__memcpy); > EXPORT_SYMBOL(__memmove); > diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile > index 25d5c9664e57..6c74b0bedd60 100644 > --- a/arch/riscv/lib/Makefile > +++ b/arch/riscv/lib/Makefile > @@ -3,6 +3,9 @@ lib-y += delay.o > lib-y += memcpy.o > lib-y += memset.o > lib-y += memmove.o > +lib-y += strcmp.o > +lib-y += strlen.o > +lib-y += strncmp.o > lib-$(CONFIG_MMU) += uaccess.o > lib-$(CONFIG_64BIT) += tishift.o > > diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S > new file mode 100644 > index 000000000000..94440fb8390c > --- /dev/null > +++ b/arch/riscv/lib/strcmp.S > @@ -0,0 +1,37 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#include <linux/linkage.h> > +#include <asm/asm.h> > +#include <asm-generic/export.h> > + > +/* int strcmp(const char *cs, const char *ct) */ > +SYM_FUNC_START(strcmp) > + /* > + * Returns > + * a0 - comparison result, value like strcmp > + * > + * Parameters > + * a0 - string1 > + * a1 - string2 > + * > + * Clobbers > + * t0, t1, t2 > + */ > + mv t2, a1 > +1: > + lbu t1, 0(a0) > + lbu t0, 0(a1) > + addi a0, a0, 1 > + addi a1, a1, 1 > + beq t1, t0, 3f > + li a0, 1 > + bgeu t1, t0, 2f > + li a0, -1 > +2: > + mv a1, t2 > + ret > +3: > + bnez t1, 1b > + li a0, 0 > + j 2b > +SYM_FUNC_END(strcmp) > diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S > new file mode 100644 > index 000000000000..09a7aaff26c8 > --- /dev/null > +++ b/arch/riscv/lib/strlen.S > @@ -0,0 +1,28 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#include <linux/linkage.h> > +#include <asm/asm.h> > +#include <asm-generic/export.h> > + > +/* int strlen(const char *s) */ > +SYM_FUNC_START(strlen) > + /* > + * Returns > + * a0 - string length > + * > + * Parameters > + * a0 - String to measure > + * > + * Clobbers: > + * t0, t1 > + */ > + mv t1, a0 > +1: > + lbu t0, 0(t1) > + bnez t0, 2f > + sub a0, t1, a0 > + ret > +2: > + addi t1, t1, 1 > + j 1b > +SYM_FUNC_END(strlen) > diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S > new file mode 100644 > index 000000000000..493ab6febcb2 > --- /dev/null > +++ b/arch/riscv/lib/strncmp.S > @@ -0,0 +1,40 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#include <linux/linkage.h> > +#include <asm/asm.h> > +#include <asm-generic/export.h> > + > +/* int strncmp(const char *cs, const char *ct, size_t count) */ > +SYM_FUNC_START(strncmp) > + /* > + * Returns > + * a0 - comparison result, value like strncmp > + * > + * Parameters > + * a0 - string1 > + * a1 - string2 > + * a2 - number of characters to compare > + * > + * Clobbers > + * t0, t1, t2 > + */ > + li t0, 0 > +1: > + beq a2, t0, 4f > + add t1, a0, t0 > + add t2, a1, t0 > + lbu t1, 0(t1) > + lbu t2, 0(t2) > + beq t1, t2, 3f > + li a0, 1 > + bgeu t1, t2, 2f > + li a0, -1 > +2: > + ret > +3: > + addi t0, t0, 1 > + bnez t1, 1b > +4: > + li a0, 0 > + j 2b > +SYM_FUNC_END(strncmp) > diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile > index dd58e1d99397..d16bf715a586 100644 > --- a/arch/riscv/purgatory/Makefile > +++ b/arch/riscv/purgatory/Makefile > @@ -2,6 +2,7 @@ > OBJECT_FILES_NON_STANDARD := y > > purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o > +purgatory-y += strcmp.o strlen.o strncmp.o > > targets += $(purgatory-y) > PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) > @@ -18,6 +19,15 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE > $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE > $(call if_changed_rule,as_o_S) > > +$(obj)/strcmp.o: $(srctree)/arch/riscv/lib/strcmp.S FORCE > + $(call if_changed_rule,as_o_S) > + > +$(obj)/strlen.o: $(srctree)/arch/riscv/lib/strlen.S FORCE > + $(call if_changed_rule,as_o_S) > + > +$(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE > + $(call if_changed_rule,as_o_S) > + > $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE > $(call if_changed_rule,cc_o_c) > > @@ -77,6 +87,9 @@ CFLAGS_ctype.o += $(PURGATORY_CFLAGS) > AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 > AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 > AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 > +AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 > +AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 > +AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 > > $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE > $(call if_changed,ld) > -- > 2.35.1 >
Am Montag, 9. Januar 2023, 23:37:45 CET schrieb Conor Dooley: > On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > Depending on supported extensions on specific RISC-V cores, > > optimized str* functions might make sense. > > > > This adds basic infrastructure to allow patching the function calls > > via alternatives later on. > > > > The Linux kernel provides standard implementations for string functions > > but when architectures want to extend them, they need to provide their > > own. > > > > The added generic string functions are done in assembler (taken from > > disassembling the main-kernel functions for now) to allow us to control > > the used registers and extend them with optimized variants. > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > > asm looks the same as what I previously provided a review for, but I > assume you dropped cos of the EFI stub stuff? You can have it back I > suppose.. > Reviewed-by: Conor Dooley <conor.dooley@microchip.com> That was the | Technically patch4 got a review from Andrew, but that was still with | the inline approach, so I didn't bring it over to v4. part in the cover-letter, thanks for bringing the rb back :-) Heiko > > --- > > arch/riscv/include/asm/string.h | 10 +++++++++ > > arch/riscv/kernel/riscv_ksyms.c | 3 +++ > > arch/riscv/lib/Makefile | 3 +++ > > arch/riscv/lib/strcmp.S | 37 ++++++++++++++++++++++++++++++ > > arch/riscv/lib/strlen.S | 28 +++++++++++++++++++++++ > > arch/riscv/lib/strncmp.S | 40 +++++++++++++++++++++++++++++++++ > > arch/riscv/purgatory/Makefile | 13 +++++++++++ > > 7 files changed, 134 insertions(+) > > create mode 100644 arch/riscv/lib/strcmp.S > > create mode 100644 arch/riscv/lib/strlen.S > > create mode 100644 arch/riscv/lib/strncmp.S > > > > diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h > > index 909049366555..a96b1fea24fe 100644 > > --- a/arch/riscv/include/asm/string.h > > +++ b/arch/riscv/include/asm/string.h > > @@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); > > #define __HAVE_ARCH_MEMMOVE > > extern asmlinkage void *memmove(void *, const void *, size_t); > > extern asmlinkage void *__memmove(void *, const void *, size_t); > > + > > +#define __HAVE_ARCH_STRCMP > > +extern asmlinkage int strcmp(const char *cs, const char *ct); > > + > > +#define __HAVE_ARCH_STRLEN > > +extern asmlinkage __kernel_size_t strlen(const char *); > > + > > +#define __HAVE_ARCH_STRNCMP > > +extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count); > > + > > /* For those files which don't want to check by kasan. */ > > #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) > > #define memcpy(dst, src, len) __memcpy(dst, src, len) > > diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c > > index 5ab1c7e1a6ed..a72879b4249a 100644 > > --- a/arch/riscv/kernel/riscv_ksyms.c > > +++ b/arch/riscv/kernel/riscv_ksyms.c > > @@ -12,6 +12,9 @@ > > EXPORT_SYMBOL(memset); > > EXPORT_SYMBOL(memcpy); > > EXPORT_SYMBOL(memmove); > > +EXPORT_SYMBOL(strcmp); > > +EXPORT_SYMBOL(strlen); > > +EXPORT_SYMBOL(strncmp); > > EXPORT_SYMBOL(__memset); > > EXPORT_SYMBOL(__memcpy); > > EXPORT_SYMBOL(__memmove); > > diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile > > index 25d5c9664e57..6c74b0bedd60 100644 > > --- a/arch/riscv/lib/Makefile > > +++ b/arch/riscv/lib/Makefile > > @@ -3,6 +3,9 @@ lib-y += delay.o > > lib-y += memcpy.o > > lib-y += memset.o > > lib-y += memmove.o > > +lib-y += strcmp.o > > +lib-y += strlen.o > > +lib-y += strncmp.o > > lib-$(CONFIG_MMU) += uaccess.o > > lib-$(CONFIG_64BIT) += tishift.o > > > > diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S > > new file mode 100644 > > index 000000000000..94440fb8390c > > --- /dev/null > > +++ b/arch/riscv/lib/strcmp.S > > @@ -0,0 +1,37 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > + > > +#include <linux/linkage.h> > > +#include <asm/asm.h> > > +#include <asm-generic/export.h> > > + > > +/* int strcmp(const char *cs, const char *ct) */ > > +SYM_FUNC_START(strcmp) > > + /* > > + * Returns > > + * a0 - comparison result, value like strcmp > > + * > > + * Parameters > > + * a0 - string1 > > + * a1 - string2 > > + * > > + * Clobbers > > + * t0, t1, t2 > > + */ > > + mv t2, a1 > > +1: > > + lbu t1, 0(a0) > > + lbu t0, 0(a1) > > + addi a0, a0, 1 > > + addi a1, a1, 1 > > + beq t1, t0, 3f > > + li a0, 1 > > + bgeu t1, t0, 2f > > + li a0, -1 > > +2: > > + mv a1, t2 > > + ret > > +3: > > + bnez t1, 1b > > + li a0, 0 > > + j 2b > > +SYM_FUNC_END(strcmp) > > diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S > > new file mode 100644 > > index 000000000000..09a7aaff26c8 > > --- /dev/null > > +++ b/arch/riscv/lib/strlen.S > > @@ -0,0 +1,28 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > + > > +#include <linux/linkage.h> > > +#include <asm/asm.h> > > +#include <asm-generic/export.h> > > + > > +/* int strlen(const char *s) */ > > +SYM_FUNC_START(strlen) > > + /* > > + * Returns > > + * a0 - string length > > + * > > + * Parameters > > + * a0 - String to measure > > + * > > + * Clobbers: > > + * t0, t1 > > + */ > > + mv t1, a0 > > +1: > > + lbu t0, 0(t1) > > + bnez t0, 2f > > + sub a0, t1, a0 > > + ret > > +2: > > + addi t1, t1, 1 > > + j 1b > > +SYM_FUNC_END(strlen) > > diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S > > new file mode 100644 > > index 000000000000..493ab6febcb2 > > --- /dev/null > > +++ b/arch/riscv/lib/strncmp.S > > @@ -0,0 +1,40 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > + > > +#include <linux/linkage.h> > > +#include <asm/asm.h> > > +#include <asm-generic/export.h> > > + > > +/* int strncmp(const char *cs, const char *ct, size_t count) */ > > +SYM_FUNC_START(strncmp) > > + /* > > + * Returns > > + * a0 - comparison result, value like strncmp > > + * > > + * Parameters > > + * a0 - string1 > > + * a1 - string2 > > + * a2 - number of characters to compare > > + * > > + * Clobbers > > + * t0, t1, t2 > > + */ > > + li t0, 0 > > +1: > > + beq a2, t0, 4f > > + add t1, a0, t0 > > + add t2, a1, t0 > > + lbu t1, 0(t1) > > + lbu t2, 0(t2) > > + beq t1, t2, 3f > > + li a0, 1 > > + bgeu t1, t2, 2f > > + li a0, -1 > > +2: > > + ret > > +3: > > + addi t0, t0, 1 > > + bnez t1, 1b > > +4: > > + li a0, 0 > > + j 2b > > +SYM_FUNC_END(strncmp) > > diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile > > index dd58e1d99397..d16bf715a586 100644 > > --- a/arch/riscv/purgatory/Makefile > > +++ b/arch/riscv/purgatory/Makefile > > @@ -2,6 +2,7 @@ > > OBJECT_FILES_NON_STANDARD := y > > > > purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o > > +purgatory-y += strcmp.o strlen.o strncmp.o > > > > targets += $(purgatory-y) > > PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) > > @@ -18,6 +19,15 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE > > $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE > > $(call if_changed_rule,as_o_S) > > > > +$(obj)/strcmp.o: $(srctree)/arch/riscv/lib/strcmp.S FORCE > > + $(call if_changed_rule,as_o_S) > > + > > +$(obj)/strlen.o: $(srctree)/arch/riscv/lib/strlen.S FORCE > > + $(call if_changed_rule,as_o_S) > > + > > +$(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE > > + $(call if_changed_rule,as_o_S) > > + > > $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE > > $(call if_changed_rule,cc_o_c) > > > > @@ -77,6 +87,9 @@ CFLAGS_ctype.o += $(PURGATORY_CFLAGS) > > AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 > > AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 > > AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 > > +AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 > > +AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 > > +AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 > > > > $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE > > $(call if_changed,ld) >
On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > Depending on supported extensions on specific RISC-V cores, > optimized str* functions might make sense. > > This adds basic infrastructure to allow patching the function calls > via alternatives later on. > > The Linux kernel provides standard implementations for string functions > but when architectures want to extend them, they need to provide their > own. And the compiler provides builtins. In the previous series it appeared to be a bad idea to compile the kernel with the compiler's builtins disabled. How will the optimized string functions which will be based on this patch be selected? Thanks, drew
Hi Andrew, Am Dienstag, 10. Januar 2023, 10:39:36 CET schrieb Andrew Jones: > On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > Depending on supported extensions on specific RISC-V cores, > > optimized str* functions might make sense. > > > > This adds basic infrastructure to allow patching the function calls > > via alternatives later on. > > > > The Linux kernel provides standard implementations for string functions > > but when architectures want to extend them, they need to provide their > > own. > > And the compiler provides builtins. In the previous series it appeared > to be a bad idea to compile the kernel with the compiler's builtins > disabled. How will the optimized string functions which will be based > on this patch be selected? yep, the consensus seemingly was that the compiler knows best when to use builtins for some cases (which is probably correct), hence the move away from the inline bases. So I guess the first decision is the compiler's wether to use a builtin or the kernel string function (same as for mem*) . In my tests, I did see both getting used - so it's definitly not lost work :-) . After that when landing in these here, we want to select the best variant for the host-system the kernel runs on. I.e. this one as baseline or for example using zbb as an "alternative". As for the "more" variants, I currently have more patches on top, that then use an ALTERNATIVE_2 ALTERNATIVE_2("nop", "j variant_zbb_unaligned", 0, CPUFEATURE_ZBB | CPUFEATURE_FAST_UNALIGNED, 0, CONFIG_RISCV_ISA_ZBB, "j variant_zbb", 0, CPUFEATURE_ZBB, CPUFEATURE_FAST_UNALIGNED, CONFIG_RISCV_ISA_ZBB) with the "errata_id" being used as a bitfield to use extension combinations. And a "not"-field, so I can do a has-zbb + has-not-fast-unaligned Heiko
On Tue, Jan 10, 2023 at 11:46:40AM +0100, Heiko Stübner wrote: > Hi Andrew, > > Am Dienstag, 10. Januar 2023, 10:39:36 CET schrieb Andrew Jones: > > On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > > > Depending on supported extensions on specific RISC-V cores, > > > optimized str* functions might make sense. > > > > > > This adds basic infrastructure to allow patching the function calls > > > via alternatives later on. > > > > > > The Linux kernel provides standard implementations for string functions > > > but when architectures want to extend them, they need to provide their > > > own. > > > > And the compiler provides builtins. In the previous series it appeared > > to be a bad idea to compile the kernel with the compiler's builtins > > disabled. How will the optimized string functions which will be based > > on this patch be selected? > > yep, the consensus seemingly was that the compiler knows best when > to use builtins for some cases (which is probably correct), hence the move > away from the inline bases. > > So I guess the first decision is the compiler's wether to use a builtin or > the kernel string function (same as for mem*) . > > In my tests, I did see both getting used - so it's definitly not lost work :-) . > > After that when landing in these here, we want to select the best variant > for the host-system the kernel runs on. > > I.e. this one as baseline or for example using zbb as an "alternative". > > As for the "more" variants, I currently have more patches on top, that > then use an ALTERNATIVE_2 > > ALTERNATIVE_2("nop", > "j variant_zbb_unaligned", 0, CPUFEATURE_ZBB | CPUFEATURE_FAST_UNALIGNED, 0, CONFIG_RISCV_ISA_ZBB, > "j variant_zbb", 0, CPUFEATURE_ZBB, CPUFEATURE_FAST_UNALIGNED, CONFIG_RISCV_ISA_ZBB) > > with the "errata_id" being used as a bitfield to use extension combinations. > And a "not"-field, so I can do a has-zbb + has-not-fast-unaligned Thanks, Heiko. This was the info I needed. It might be nice to put some of it in the commit message too. drew
On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > Depending on supported extensions on specific RISC-V cores, > optimized str* functions might make sense. > > This adds basic infrastructure to allow patching the function calls > via alternatives later on. > > The Linux kernel provides standard implementations for string functions > but when architectures want to extend them, they need to provide their > own. > > The added generic string functions are done in assembler (taken from > disassembling the main-kernel functions for now) to allow us to control > the used registers and extend them with optimized variants. > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > --- > arch/riscv/include/asm/string.h | 10 +++++++++ > arch/riscv/kernel/riscv_ksyms.c | 3 +++ > arch/riscv/lib/Makefile | 3 +++ > arch/riscv/lib/strcmp.S | 37 ++++++++++++++++++++++++++++++ > arch/riscv/lib/strlen.S | 28 +++++++++++++++++++++++ > arch/riscv/lib/strncmp.S | 40 +++++++++++++++++++++++++++++++++ > arch/riscv/purgatory/Makefile | 13 +++++++++++ > 7 files changed, 134 insertions(+) > create mode 100644 arch/riscv/lib/strcmp.S > create mode 100644 arch/riscv/lib/strlen.S > create mode 100644 arch/riscv/lib/strncmp.S > > diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h > index 909049366555..a96b1fea24fe 100644 > --- a/arch/riscv/include/asm/string.h > +++ b/arch/riscv/include/asm/string.h > @@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); > #define __HAVE_ARCH_MEMMOVE > extern asmlinkage void *memmove(void *, const void *, size_t); > extern asmlinkage void *__memmove(void *, const void *, size_t); > + > +#define __HAVE_ARCH_STRCMP > +extern asmlinkage int strcmp(const char *cs, const char *ct); > + > +#define __HAVE_ARCH_STRLEN > +extern asmlinkage __kernel_size_t strlen(const char *); > + > +#define __HAVE_ARCH_STRNCMP > +extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count); > + > /* For those files which don't want to check by kasan. */ > #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) > #define memcpy(dst, src, len) __memcpy(dst, src, len) > diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c > index 5ab1c7e1a6ed..a72879b4249a 100644 > --- a/arch/riscv/kernel/riscv_ksyms.c > +++ b/arch/riscv/kernel/riscv_ksyms.c > @@ -12,6 +12,9 @@ > EXPORT_SYMBOL(memset); > EXPORT_SYMBOL(memcpy); > EXPORT_SYMBOL(memmove); > +EXPORT_SYMBOL(strcmp); > +EXPORT_SYMBOL(strlen); > +EXPORT_SYMBOL(strncmp); > EXPORT_SYMBOL(__memset); > EXPORT_SYMBOL(__memcpy); > EXPORT_SYMBOL(__memmove); > diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile > index 25d5c9664e57..6c74b0bedd60 100644 > --- a/arch/riscv/lib/Makefile > +++ b/arch/riscv/lib/Makefile > @@ -3,6 +3,9 @@ lib-y += delay.o > lib-y += memcpy.o > lib-y += memset.o > lib-y += memmove.o > +lib-y += strcmp.o > +lib-y += strlen.o > +lib-y += strncmp.o > lib-$(CONFIG_MMU) += uaccess.o > lib-$(CONFIG_64BIT) += tishift.o > > diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S > new file mode 100644 > index 000000000000..94440fb8390c > --- /dev/null > +++ b/arch/riscv/lib/strcmp.S > @@ -0,0 +1,37 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#include <linux/linkage.h> > +#include <asm/asm.h> > +#include <asm-generic/export.h> > + > +/* int strcmp(const char *cs, const char *ct) */ > +SYM_FUNC_START(strcmp) > + /* > + * Returns > + * a0 - comparison result, value like strcmp > + * > + * Parameters > + * a0 - string1 > + * a1 - string2 > + * > + * Clobbers > + * t0, t1, t2 > + */ > + mv t2, a1 The above instruction and the 'mv a1, t2' below appear to be attempting to preserve a1, but that shouldn't be necessary. > +1: > + lbu t1, 0(a0) > + lbu t0, 0(a1) I'd rather have t0 be 0(a0) and t1 be 0(a1) > + addi a0, a0, 1 > + addi a1, a1, 1 > + beq t1, t0, 3f > + li a0, 1 > + bgeu t1, t0, 2f > + li a0, -1 > +2: > + mv a1, t2 > + ret > +3: > + bnez t1, 1b > + li a0, 0 > + j 2b For fun I removed one conditional and one unconditional branch (untested) 1: lbu t0, 0(a0) lbu t1, 0(a1) addi a0, a0, 1 addi a1, a1, 1 bne t0, t1, 2f bnez t0, 1b li a0, 0 ret 2: slt a1, t1, t0 slli a1, a1, 1 li a0, -1 add a0, a0, a1 ret > +SYM_FUNC_END(strcmp) > diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S > new file mode 100644 > index 000000000000..09a7aaff26c8 > --- /dev/null > +++ b/arch/riscv/lib/strlen.S > @@ -0,0 +1,28 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#include <linux/linkage.h> > +#include <asm/asm.h> > +#include <asm-generic/export.h> > + > +/* int strlen(const char *s) */ > +SYM_FUNC_START(strlen) > + /* > + * Returns > + * a0 - string length > + * > + * Parameters > + * a0 - String to measure > + * > + * Clobbers: > + * t0, t1 > + */ > + mv t1, a0 > +1: > + lbu t0, 0(t1) > + bnez t0, 2f > + sub a0, t1, a0 > + ret > +2: > + addi t1, t1, 1 > + j 1b Slightly reorganizing looks better (to me) mv t1, a0 1: lbu t0, 0(t1) beqz t0, 2f addi t1, t1, 1 j 1b 2: sub a0, t1, a0 ret > +SYM_FUNC_END(strlen) > diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S > new file mode 100644 > index 000000000000..493ab6febcb2 > --- /dev/null > +++ b/arch/riscv/lib/strncmp.S > @@ -0,0 +1,40 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#include <linux/linkage.h> > +#include <asm/asm.h> > +#include <asm-generic/export.h> > + > +/* int strncmp(const char *cs, const char *ct, size_t count) */ > +SYM_FUNC_START(strncmp) > + /* > + * Returns > + * a0 - comparison result, value like strncmp > + * > + * Parameters > + * a0 - string1 > + * a1 - string2 > + * a2 - number of characters to compare > + * > + * Clobbers > + * t0, t1, t2 > + */ > + li t0, 0 > +1: > + beq a2, t0, 4f > + add t1, a0, t0 > + add t2, a1, t0 > + lbu t1, 0(t1) > + lbu t2, 0(t2) > + beq t1, t2, 3f > + li a0, 1 > + bgeu t1, t2, 2f > + li a0, -1 > +2: > + ret > +3: > + addi t0, t0, 1 > + bnez t1, 1b > +4: > + li a0, 0 > + j 2b (untested) li t2, 0 1: beq a2, t2, 2f lbu t0, 0(a0) lbu t1, 0(a1) addi a0, a0, 1 addi a1, a1, 1 bne t0, t1, 3f addi t2, t2, 1 bnez t0, 1b 2: li a0, 0 ret 3: slt a1, t1, t0 slli a1, a1, 1 li a0, -1 add a0, a0, a1 ret > +SYM_FUNC_END(strncmp) > diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile > index dd58e1d99397..d16bf715a586 100644 > --- a/arch/riscv/purgatory/Makefile > +++ b/arch/riscv/purgatory/Makefile > @@ -2,6 +2,7 @@ > OBJECT_FILES_NON_STANDARD := y > > purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o > +purgatory-y += strcmp.o strlen.o strncmp.o > > targets += $(purgatory-y) > PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) > @@ -18,6 +19,15 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE > $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE > $(call if_changed_rule,as_o_S) > > +$(obj)/strcmp.o: $(srctree)/arch/riscv/lib/strcmp.S FORCE > + $(call if_changed_rule,as_o_S) > + > +$(obj)/strlen.o: $(srctree)/arch/riscv/lib/strlen.S FORCE > + $(call if_changed_rule,as_o_S) > + > +$(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE > + $(call if_changed_rule,as_o_S) > + > $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE > $(call if_changed_rule,cc_o_c) > > @@ -77,6 +87,9 @@ CFLAGS_ctype.o += $(PURGATORY_CFLAGS) > AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 > AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 > AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 > +AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 > +AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 > +AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 > > $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE > $(call if_changed,ld) > -- > 2.35.1 > With at least the removal of the unnecessary preserving of a1 in strcmp, then it looks correct to me, so Reviewed-by: Andrew Jones <ajones@ventanamicro.com> But I think there's room for making it more readable, and maybe even optimized, as I've tried to do. Thanks, drew
On Tue, Jan 10, 2023 at 01:13:20PM +0100, Andrew Jones wrote: > On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: ... > > + > > +/* int strcmp(const char *cs, const char *ct) */ > > +SYM_FUNC_START(strcmp) > > + /* > > + * Returns > > + * a0 - comparison result, value like strcmp > > + * > > + * Parameters > > + * a0 - string1 > > + * a1 - string2 > > + * > > + * Clobbers > > + * t0, t1, t2 > > + */ > > + mv t2, a1 > > The above instruction and the 'mv a1, t2' below appear to be attempting > to preserve a1, but that shouldn't be necessary. > > > +1: > > + lbu t1, 0(a0) > > + lbu t0, 0(a1) > > I'd rather have t0 be 0(a0) and t1 be 0(a1) > > > + addi a0, a0, 1 > > + addi a1, a1, 1 > > + beq t1, t0, 3f > > + li a0, 1 > > + bgeu t1, t0, 2f > > + li a0, -1 > > +2: > > + mv a1, t2 > > + ret > > +3: > > + bnez t1, 1b > > + li a0, 0 > > + j 2b > > For fun I removed one conditional and one unconditional branch (untested) > > 1: > lbu t0, 0(a0) > lbu t1, 0(a1) > addi a0, a0, 1 > addi a1, a1, 1 > bne t0, t1, 2f > bnez t0, 1b > li a0, 0 > ret > 2: > slt a1, t1, t0 > slli a1, a1, 1 > li a0, -1 > add a0, a0, a1 If we just need < 0 and > 0 then the above four instructions can become sub a0, t0, t1 > ret > Similar comment for strncmp. Thanks, drew
On Tue, Jan 10, 2023 at 12:16:47PM +0100, Andrew Jones wrote: > On Tue, Jan 10, 2023 at 11:46:40AM +0100, Heiko Stübner wrote: > > Hi Andrew, > > > > Am Dienstag, 10. Januar 2023, 10:39:36 CET schrieb Andrew Jones: > > > On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: > > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > > > > > Depending on supported extensions on specific RISC-V cores, > > > > optimized str* functions might make sense. > > > > > > > > This adds basic infrastructure to allow patching the function calls > > > > via alternatives later on. > > > > > > > > The Linux kernel provides standard implementations for string functions > > > > but when architectures want to extend them, they need to provide their > > > > own. > > > > > > And the compiler provides builtins. In the previous series it appeared > > > to be a bad idea to compile the kernel with the compiler's builtins > > > disabled. How will the optimized string functions which will be based > > > on this patch be selected? > > > > yep, the consensus seemingly was that the compiler knows best when > > to use builtins for some cases (which is probably correct), hence the move > > away from the inline bases. > > > > So I guess the first decision is the compiler's wether to use a builtin or > > the kernel string function (same as for mem*) . Hi Heiko, Thinking about this some more, I'd still like to understand how/when the compiler makes this choice. Are compiler flags involved? Or do some heuristics dictate the choice? Thanks, drew
Drew, the reason why alignment is critical for correctness of optimisations in userspace applications is that exception semantics must be retained when crossing page boundaries. Philipp. On Wed, 11 Jan 2023 at 14:39, Christoph Müllner <christoph.muellner@vrull.eu> wrote: > > > > On Wed, Jan 11, 2023 at 1:34 PM Andrew Jones <ajones@ventanamicro.com> wrote: >> >> On Tue, Jan 10, 2023 at 12:16:47PM +0100, Andrew Jones wrote: >> > On Tue, Jan 10, 2023 at 11:46:40AM +0100, Heiko Stübner wrote: >> > > Hi Andrew, >> > > >> > > Am Dienstag, 10. Januar 2023, 10:39:36 CET schrieb Andrew Jones: >> > > > On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: >> > > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> >> > > > > >> > > > > Depending on supported extensions on specific RISC-V cores, >> > > > > optimized str* functions might make sense. >> > > > > >> > > > > This adds basic infrastructure to allow patching the function calls >> > > > > via alternatives later on. >> > > > > >> > > > > The Linux kernel provides standard implementations for string functions >> > > > > but when architectures want to extend them, they need to provide their >> > > > > own. >> > > > >> > > > And the compiler provides builtins. In the previous series it appeared >> > > > to be a bad idea to compile the kernel with the compiler's builtins >> > > > disabled. How will the optimized string functions which will be based >> > > > on this patch be selected? >> > > >> > > yep, the consensus seemingly was that the compiler knows best when >> > > to use builtins for some cases (which is probably correct), hence the move >> > > away from the inline bases. >> > > >> > > So I guess the first decision is the compiler's wether to use a builtin or >> > > the kernel string function (same as for mem*) . >> >> Hi Heiko, >> >> Thinking about this some more, I'd still like to understand how/when the >> compiler makes this choice. Are compiler flags involved? Or do some >> heuristics dictate the choice? > > > 1) Arch-independent expansion is used e.g. for calculating the result for statically known strings. > 2) Arch-specific expansion (not mainline, but my series can be found here: [1]) can do whatever is reasonable. > My series emits a bounded unrolled comparison loop for str(n)cmp with a bound that can be adjusted via flag. > But it only triggers if both strings are known to be aligned. > > [1] https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605993.html > > BR > Christoph > >> >> Thanks, >> drew
On Wed, Jan 11, 2023 at 02:39:36PM +0100, Christoph Müllner wrote: > On Wed, Jan 11, 2023 at 1:34 PM Andrew Jones <ajones@ventanamicro.com> > wrote: > > > On Tue, Jan 10, 2023 at 12:16:47PM +0100, Andrew Jones wrote: > > > On Tue, Jan 10, 2023 at 11:46:40AM +0100, Heiko Stübner wrote: > > > > Hi Andrew, > > > > > > > > Am Dienstag, 10. Januar 2023, 10:39:36 CET schrieb Andrew Jones: > > > > > On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: > > > > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > > > > > > > > > Depending on supported extensions on specific RISC-V cores, > > > > > > optimized str* functions might make sense. > > > > > > > > > > > > This adds basic infrastructure to allow patching the function calls > > > > > > via alternatives later on. > > > > > > > > > > > > The Linux kernel provides standard implementations for string > > functions > > > > > > but when architectures want to extend them, they need to provide > > their > > > > > > own. > > > > > > > > > > And the compiler provides builtins. In the previous series it > > appeared > > > > > to be a bad idea to compile the kernel with the compiler's builtins > > > > > disabled. How will the optimized string functions which will be based > > > > > on this patch be selected? > > > > > > > > yep, the consensus seemingly was that the compiler knows best when > > > > to use builtins for some cases (which is probably correct), hence the > > move > > > > away from the inline bases. > > > > > > > > So I guess the first decision is the compiler's wether to use a > > builtin or > > > > the kernel string function (same as for mem*) . > > > > Hi Heiko, > > > > Thinking about this some more, I'd still like to understand how/when the > > compiler makes this choice. Are compiler flags involved? Or do some > > heuristics dictate the choice? > > > > 1) Arch-independent expansion is used e.g. for calculating the result for > statically known strings. > 2) Arch-specific expansion (not mainline, but my series can be found here: > [1]) can do whatever is reasonable. > My series emits a bounded unrolled comparison loop for str(n)cmp with a > bound that can be adjusted via flag. > But it only triggers if both strings are known to be aligned. > > [1] https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605993.html > Thanks Christoph! drew
Hi Andrew, thanks a lot for taking the time to provide these extensive and really helpful comments. Am Dienstag, 10. Januar 2023, 13:13:20 CET schrieb Andrew Jones: > On Mon, Jan 09, 2023 at 07:17:54PM +0100, Heiko Stuebner wrote: > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > Depending on supported extensions on specific RISC-V cores, > > optimized str* functions might make sense. > > > > This adds basic infrastructure to allow patching the function calls > > via alternatives later on. > > > > The Linux kernel provides standard implementations for string functions > > but when architectures want to extend them, they need to provide their > > own. > > > > The added generic string functions are done in assembler (taken from > > disassembling the main-kernel functions for now) to allow us to control > > the used registers and extend them with optimized variants. > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > > --- > > arch/riscv/include/asm/string.h | 10 +++++++++ > > arch/riscv/kernel/riscv_ksyms.c | 3 +++ > > arch/riscv/lib/Makefile | 3 +++ > > arch/riscv/lib/strcmp.S | 37 ++++++++++++++++++++++++++++++ > > arch/riscv/lib/strlen.S | 28 +++++++++++++++++++++++ > > arch/riscv/lib/strncmp.S | 40 +++++++++++++++++++++++++++++++++ > > arch/riscv/purgatory/Makefile | 13 +++++++++++ > > 7 files changed, 134 insertions(+) > > create mode 100644 arch/riscv/lib/strcmp.S > > create mode 100644 arch/riscv/lib/strlen.S > > create mode 100644 arch/riscv/lib/strncmp.S > > > > diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h > > index 909049366555..a96b1fea24fe 100644 > > --- a/arch/riscv/include/asm/string.h > > +++ b/arch/riscv/include/asm/string.h > > @@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); > > #define __HAVE_ARCH_MEMMOVE > > extern asmlinkage void *memmove(void *, const void *, size_t); > > extern asmlinkage void *__memmove(void *, const void *, size_t); > > + > > +#define __HAVE_ARCH_STRCMP > > +extern asmlinkage int strcmp(const char *cs, const char *ct); > > + > > +#define __HAVE_ARCH_STRLEN > > +extern asmlinkage __kernel_size_t strlen(const char *); > > + > > +#define __HAVE_ARCH_STRNCMP > > +extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count); > > + > > /* For those files which don't want to check by kasan. */ > > #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) > > #define memcpy(dst, src, len) __memcpy(dst, src, len) > > diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c > > index 5ab1c7e1a6ed..a72879b4249a 100644 > > --- a/arch/riscv/kernel/riscv_ksyms.c > > +++ b/arch/riscv/kernel/riscv_ksyms.c > > @@ -12,6 +12,9 @@ > > EXPORT_SYMBOL(memset); > > EXPORT_SYMBOL(memcpy); > > EXPORT_SYMBOL(memmove); > > +EXPORT_SYMBOL(strcmp); > > +EXPORT_SYMBOL(strlen); > > +EXPORT_SYMBOL(strncmp); > > EXPORT_SYMBOL(__memset); > > EXPORT_SYMBOL(__memcpy); > > EXPORT_SYMBOL(__memmove); > > diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile > > index 25d5c9664e57..6c74b0bedd60 100644 > > --- a/arch/riscv/lib/Makefile > > +++ b/arch/riscv/lib/Makefile > > @@ -3,6 +3,9 @@ lib-y += delay.o > > lib-y += memcpy.o > > lib-y += memset.o > > lib-y += memmove.o > > +lib-y += strcmp.o > > +lib-y += strlen.o > > +lib-y += strncmp.o > > lib-$(CONFIG_MMU) += uaccess.o > > lib-$(CONFIG_64BIT) += tishift.o > > > > diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S > > new file mode 100644 > > index 000000000000..94440fb8390c > > --- /dev/null > > +++ b/arch/riscv/lib/strcmp.S > > @@ -0,0 +1,37 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > + > > +#include <linux/linkage.h> > > +#include <asm/asm.h> > > +#include <asm-generic/export.h> > > + > > +/* int strcmp(const char *cs, const char *ct) */ > > +SYM_FUNC_START(strcmp) > > + /* > > + * Returns > > + * a0 - comparison result, value like strcmp > > + * > > + * Parameters > > + * a0 - string1 > > + * a1 - string2 > > + * > > + * Clobbers > > + * t0, t1, t2 > > + */ > > + mv t2, a1 > > The above instruction and the 'mv a1, t2' below appear to be attempting > to preserve a1, but that shouldn't be necessary. correct and gone now > > +1: > > + lbu t1, 0(a0) > > + lbu t0, 0(a1) > > I'd rather have t0 be 0(a0) and t1 be 0(a1) ok > > + addi a0, a0, 1 > > + addi a1, a1, 1 > > + beq t1, t0, 3f > > + li a0, 1 > > + bgeu t1, t0, 2f > > + li a0, -1 > > +2: > > + mv a1, t2 > > + ret > > +3: > > + bnez t1, 1b > > + li a0, 0 > > + j 2b > > For fun I removed one conditional and one unconditional branch (untested) > > 1: > lbu t0, 0(a0) > lbu t1, 0(a1) > addi a0, a0, 1 > addi a1, a1, 1 > bne t0, t1, 2f > bnez t0, 1b > li a0, 0 > ret > 2: > slt a1, t1, t0 > slli a1, a1, 1 > li a0, -1 > add a0, a0, a1 > ret yep that - looks correct - and also seems to produce correct results also including your sub a0, t0, t1 comment from the followup that then produces the same result als the zbb-variant. And I've also verified the - 0, if the s1 and s2 are equal; - a negative value if s1 is less than s2; - a positive value if s1 is greater than s2. return value calling convention with documentation And added a comment above it, pointing out this fact for the next person stumbling over this :-) > > +SYM_FUNC_END(strcmp) > > diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S > > new file mode 100644 > > index 000000000000..09a7aaff26c8 > > --- /dev/null > > +++ b/arch/riscv/lib/strlen.S > > @@ -0,0 +1,28 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > + > > +#include <linux/linkage.h> > > +#include <asm/asm.h> > > +#include <asm-generic/export.h> > > + > > +/* int strlen(const char *s) */ > > +SYM_FUNC_START(strlen) > > + /* > > + * Returns > > + * a0 - string length > > + * > > + * Parameters > > + * a0 - String to measure > > + * > > + * Clobbers: > > + * t0, t1 > > + */ > > + mv t1, a0 > > +1: > > + lbu t0, 0(t1) > > + bnez t0, 2f > > + sub a0, t1, a0 > > + ret > > +2: > > + addi t1, t1, 1 > > + j 1b > > Slightly reorganizing looks better (to me) > > mv t1, a0 > 1: > lbu t0, 0(t1) > beqz t0, 2f > addi t1, t1, 1 > j 1b > 2: > sub a0, t1, a0 > ret ok > > +SYM_FUNC_END(strlen) > > diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S > > new file mode 100644 > > index 000000000000..493ab6febcb2 > > --- /dev/null > > +++ b/arch/riscv/lib/strncmp.S > > @@ -0,0 +1,40 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > + > > +#include <linux/linkage.h> > > +#include <asm/asm.h> > > +#include <asm-generic/export.h> > > + > > +/* int strncmp(const char *cs, const char *ct, size_t count) */ > > +SYM_FUNC_START(strncmp) > > + /* > > + * Returns > > + * a0 - comparison result, value like strncmp > > + * > > + * Parameters > > + * a0 - string1 > > + * a1 - string2 > > + * a2 - number of characters to compare > > + * > > + * Clobbers > > + * t0, t1, t2 > > + */ > > + li t0, 0 > > +1: > > + beq a2, t0, 4f > > + add t1, a0, t0 > > + add t2, a1, t0 > > + lbu t1, 0(t1) > > + lbu t2, 0(t2) > > + beq t1, t2, 3f > > + li a0, 1 > > + bgeu t1, t2, 2f > > + li a0, -1 > > +2: > > + ret > > +3: > > + addi t0, t0, 1 > > + bnez t1, 1b > > +4: > > + li a0, 0 > > + j 2b > > (untested) > > li t2, 0 > 1: > beq a2, t2, 2f > lbu t0, 0(a0) > lbu t1, 0(a1) > addi a0, a0, 1 > addi a1, a1, 1 > bne t0, t1, 3f > addi t2, t2, 1 > bnez t0, 1b > 2: > li a0, 0 > ret > 3: > slt a1, t1, t0 > slli a1, a1, 1 > li a0, -1 > add a0, a0, a1 > ret same here, I did go over the changed assembly and verified that it produces the same results as the original and then did a second pass to also add the sub a0, t0, t1 replacment. > > +SYM_FUNC_END(strncmp) > > diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile > > index dd58e1d99397..d16bf715a586 100644 > > --- a/arch/riscv/purgatory/Makefile > > +++ b/arch/riscv/purgatory/Makefile > > @@ -2,6 +2,7 @@ > > OBJECT_FILES_NON_STANDARD := y > > > > purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o > > +purgatory-y += strcmp.o strlen.o strncmp.o > > > > targets += $(purgatory-y) > > PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) > > @@ -18,6 +19,15 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE > > $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE > > $(call if_changed_rule,as_o_S) > > > > +$(obj)/strcmp.o: $(srctree)/arch/riscv/lib/strcmp.S FORCE > > + $(call if_changed_rule,as_o_S) > > + > > +$(obj)/strlen.o: $(srctree)/arch/riscv/lib/strlen.S FORCE > > + $(call if_changed_rule,as_o_S) > > + > > +$(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE > > + $(call if_changed_rule,as_o_S) > > + > > $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE > > $(call if_changed_rule,cc_o_c) > > > > @@ -77,6 +87,9 @@ CFLAGS_ctype.o += $(PURGATORY_CFLAGS) > > AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 > > AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 > > AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 > > +AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 > > +AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 > > +AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 > > > > $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE > > $(call if_changed,ld) > > > > With at least the removal of the unnecessary preserving of a1 in strcmp, > then it looks correct to me, so > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > > But I think there's room for making it more readable, and maybe even > optimized, as I've tried to do. Thanks a lot Heiko
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 909049366555..a96b1fea24fe 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); #define __HAVE_ARCH_MEMMOVE extern asmlinkage void *memmove(void *, const void *, size_t); extern asmlinkage void *__memmove(void *, const void *, size_t); + +#define __HAVE_ARCH_STRCMP +extern asmlinkage int strcmp(const char *cs, const char *ct); + +#define __HAVE_ARCH_STRLEN +extern asmlinkage __kernel_size_t strlen(const char *); + +#define __HAVE_ARCH_STRNCMP +extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count); + /* For those files which don't want to check by kasan. */ #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) #define memcpy(dst, src, len) __memcpy(dst, src, len) diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 5ab1c7e1a6ed..a72879b4249a 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,6 +12,9 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 25d5c9664e57..6c74b0bedd60 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,9 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o lib-y += memmove.o +lib-y += strcmp.o +lib-y += strlen.o +lib-y += strncmp.o lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S new file mode 100644 index 000000000000..94440fb8390c --- /dev/null +++ b/arch/riscv/lib/strcmp.S @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm-generic/export.h> + +/* int strcmp(const char *cs, const char *ct) */ +SYM_FUNC_START(strcmp) + /* + * Returns + * a0 - comparison result, value like strcmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * + * Clobbers + * t0, t1, t2 + */ + mv t2, a1 +1: + lbu t1, 0(a0) + lbu t0, 0(a1) + addi a0, a0, 1 + addi a1, a1, 1 + beq t1, t0, 3f + li a0, 1 + bgeu t1, t0, 2f + li a0, -1 +2: + mv a1, t2 + ret +3: + bnez t1, 1b + li a0, 0 + j 2b +SYM_FUNC_END(strcmp) diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S new file mode 100644 index 000000000000..09a7aaff26c8 --- /dev/null +++ b/arch/riscv/lib/strlen.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm-generic/export.h> + +/* int strlen(const char *s) */ +SYM_FUNC_START(strlen) + /* + * Returns + * a0 - string length + * + * Parameters + * a0 - String to measure + * + * Clobbers: + * t0, t1 + */ + mv t1, a0 +1: + lbu t0, 0(t1) + bnez t0, 2f + sub a0, t1, a0 + ret +2: + addi t1, t1, 1 + j 1b +SYM_FUNC_END(strlen) diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S new file mode 100644 index 000000000000..493ab6febcb2 --- /dev/null +++ b/arch/riscv/lib/strncmp.S @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm-generic/export.h> + +/* int strncmp(const char *cs, const char *ct, size_t count) */ +SYM_FUNC_START(strncmp) + /* + * Returns + * a0 - comparison result, value like strncmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * a2 - number of characters to compare + * + * Clobbers + * t0, t1, t2 + */ + li t0, 0 +1: + beq a2, t0, 4f + add t1, a0, t0 + add t2, a1, t0 + lbu t1, 0(t1) + lbu t2, 0(t2) + beq t1, t2, 3f + li a0, 1 + bgeu t1, t2, 2f + li a0, -1 +2: + ret +3: + addi t0, t0, 1 + bnez t1, 1b +4: + li a0, 0 + j 2b +SYM_FUNC_END(strncmp) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index dd58e1d99397..d16bf715a586 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -2,6 +2,7 @@ OBJECT_FILES_NON_STANDARD := y purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o +purgatory-y += strcmp.o strlen.o strncmp.o targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) @@ -18,6 +19,15 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE $(call if_changed_rule,as_o_S) +$(obj)/strcmp.o: $(srctree)/arch/riscv/lib/strcmp.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/strlen.o: $(srctree)/arch/riscv/lib/strlen.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE + $(call if_changed_rule,as_o_S) + $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) @@ -77,6 +87,9 @@ CFLAGS_ctype.o += $(PURGATORY_CFLAGS) AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld)