diff mbox series

[7/7] RISC-V: add zbb support to string functions

Message ID 20221110164924.529386-8-heiko@sntech.de (mailing list archive)
State Superseded
Headers show
Series Zbb string optimizations and call support in alternatives | expand

Checks

Context Check Description
conchuod/tree_selection fail Guessing tree name failed

Commit Message

Heiko Stuebner Nov. 10, 2022, 4:49 p.m. UTC
From: Heiko Stuebner <heiko.stuebner@vrull.eu>

Add handling for ZBB extension and add support for using it as a
variant for optimized string functions.

Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
---
 arch/riscv/Kconfig                   |  23 ++++++
 arch/riscv/include/asm/errata_list.h |   3 +-
 arch/riscv/include/asm/hwcap.h       |   1 +
 arch/riscv/include/asm/string.h      |  29 ++++++--
 arch/riscv/kernel/cpu.c              |   1 +
 arch/riscv/kernel/cpufeature.c       |  18 +++++
 arch/riscv/lib/Makefile              |   3 +
 arch/riscv/lib/strcmp_zbb.S          |  91 +++++++++++++++++++++++
 arch/riscv/lib/strlen_zbb.S          |  98 +++++++++++++++++++++++++
 arch/riscv/lib/strncmp_zbb.S         | 106 +++++++++++++++++++++++++++
 10 files changed, 366 insertions(+), 7 deletions(-)
 create mode 100644 arch/riscv/lib/strcmp_zbb.S
 create mode 100644 arch/riscv/lib/strlen_zbb.S
 create mode 100644 arch/riscv/lib/strncmp_zbb.S

Comments

Conor Dooley Nov. 13, 2022, 11:29 p.m. UTC | #1
Hey Heiko,
I always seem to forget to open my mails.. I swear I'm not being rude!

In the back of my head while looking at this series, I've been wondering
about Palmer's hwcap stuff. Wonder what the craic is there - I assume
he's just been too busy with the profile stuff. Ditto Ztso, but iirc
that's related. Anyway, that's just an aside as I was reading through
the patch & typed it somewhere lest I forget to bring it up elsewhere.

On Thu, Nov 10, 2022 at 05:49:24PM +0100, Heiko Stuebner wrote:
> From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> 
> Add handling for ZBB extension and add support for using it as a
> variant for optimized string functions.
> 
> Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> ---
>  arch/riscv/Kconfig                   |  23 ++++++
>  arch/riscv/include/asm/errata_list.h |   3 +-
>  arch/riscv/include/asm/hwcap.h       |   1 +
>  arch/riscv/include/asm/string.h      |  29 ++++++--
>  arch/riscv/kernel/cpu.c              |   1 +
>  arch/riscv/kernel/cpufeature.c       |  18 +++++
>  arch/riscv/lib/Makefile              |   3 +
>  arch/riscv/lib/strcmp_zbb.S          |  91 +++++++++++++++++++++++
>  arch/riscv/lib/strlen_zbb.S          |  98 +++++++++++++++++++++++++
>  arch/riscv/lib/strncmp_zbb.S         | 106 +++++++++++++++++++++++++++
>  10 files changed, 366 insertions(+), 7 deletions(-)
>  create mode 100644 arch/riscv/lib/strcmp_zbb.S
>  create mode 100644 arch/riscv/lib/strlen_zbb.S
>  create mode 100644 arch/riscv/lib/strncmp_zbb.S
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index acfc4d298aab..56633931e808 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -411,6 +411,29 @@ config RISCV_ISA_SVPBMT
>  
>  	   If you don't know what to do here, say Y.
>  
> +config TOOLCHAIN_HAS_ZBB
> +	bool
> +	default y
> +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb)
> +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb)
> +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900

Drew wants to switch away from this method and use insn defs instead -
and I must admit it does look cleaner! Check out his Zicboz series and
see what you think of it.

> +config RISCV_ISA_ZBB
> +	bool "Zbb extension support for "

Missing some words in this line?

> +	depends on TOOLCHAIN_HAS_ZBB
> +	depends on !XIP_KERNEL && MMU
> +	select RISCV_ALTERNATIVE
> +	default y
> +	help
> +	   Adds support to dynamically detect the presence of the ZBB
> +	   extension (basic bit manipulation) and enable its usage.
> +
> +	   The Zbb extension provides instructions to accelerate a number
> +	   of bit-specific operations (count bit population, sign extending,
> +	   bitrotation, etc).
> +
> +	   If you don't know what to do here, say Y.
> +
>  config TOOLCHAIN_HAS_ZICBOM
>  	bool
>  	default y
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index 4180312d2a70..95e626b7281e 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -24,7 +24,8 @@
>  
>  #define	CPUFEATURE_SVPBMT 0
>  #define	CPUFEATURE_ZICBOM 1
> -#define	CPUFEATURE_NUMBER 2
> +#define	CPUFEATURE_ZBB 2
> +#define	CPUFEATURE_NUMBER 3
>  
>  #ifdef __ASSEMBLY__
>  
> diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> index b22525290073..ac5555fd9788 100644
> --- a/arch/riscv/include/asm/hwcap.h
> +++ b/arch/riscv/include/asm/hwcap.h
> @@ -59,6 +59,7 @@ enum riscv_isa_ext_id {
>  	RISCV_ISA_EXT_ZIHINTPAUSE,
>  	RISCV_ISA_EXT_SSTC,
>  	RISCV_ISA_EXT_SVINVAL,
> +	RISCV_ISA_EXT_ZBB,

With ZIHINTPAUSE before SSTC and SVINIVAL I assume this is not something
we are canonically ordering but I never, ever know which ones we are
allowed to re-order at will.

>  	RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
>  };

> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> index bf9dd6764bad..66ff36a57e20 100644
> --- a/arch/riscv/kernel/cpu.c
> +++ b/arch/riscv/kernel/cpu.c
> @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
>  	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
>  	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
>  	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> +	__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
>  	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
>  	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
>  	__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),

This one I do know that Palmer wants canonically ordered.

> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 026512ca9c4c..f19b9d4e2dca 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -201,6 +201,7 @@ void __init riscv_fill_hwcap(void)
>  			} else {
>  				SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
>  				SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
> +				SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
>  				SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
>  				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
>  				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);

This one looks like it is, sstc aside. Same question as above, can I
reorder this one? I'll send a patch for it if I can...

> diff --git a/arch/riscv/lib/strcmp_zbb.S b/arch/riscv/lib/strcmp_zbb.S
> new file mode 100644
> index 000000000000..aff9b941d3ee
> --- /dev/null
> +++ b/arch/riscv/lib/strcmp_zbb.S
> @@ -0,0 +1,91 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2022 VRULL GmbH
> + * Author: Christoph Muellner <christoph.muellner@vrull.eu>

Is a Co-developed-by: appropriate then?

> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +#include <asm-generic/export.h>
> +
> +#define src1		a0
> +#define result		a0
> +#define src2		t5
> +#define data1		t0
> +#define data2		t1
> +#define align		t2
> +#define data1_orcb	t3
> +#define m1		t4
> +
> +.option push
> +.option arch,+zbb
> +
> +/* int __strcmp_zbb(const char *cs, const char *ct) */
> +ENTRY(__strcmp_zbb)
> +	/*
> +	 * Returns
> +	 *   a0 - comparison result, like strncmp
> +	 *
> +	 * Parameters
> +	 *   a0 - string1
> +	 *   a1 - string2
> +	 *   a2 - number of characters to compare

Same copy paste mistake here with a2?

> +	 *
> +	 * Clobbers
> +	 *   t0, t1, t2, t3, t4, t5
> +	 */
> +	mv	src2, a1
> +
> +	or	align, src1, src2
> +	li	m1, -1
> +	and	align, align, SZREG-1
> +	bnez	align, 3f

Line of whitespace here would be nice.

> +	/* Main loop for aligned string.  */
> +	.p2align 3
> +1:
> +	REG_L	data1, 0(src1)
> +	REG_L	data2, 0(src2)
> +	orc.b	data1_orcb, data1
> +	bne	data1_orcb, m1, 2f
> +	addi	src1, src1, SZREG
> +	addi	src2, src2, SZREG
> +	beq	data1, data2, 1b
> +
> +	/* Words don't match, and no null byte in the first
> +	 * word. Get bytes in big-endian order and compare.  */
> +#ifndef CONFIG_CPU_BIG_ENDIAN

I know this is a lift from the reference implementation in the spec, but
do we actually need this ifndef section?

> +	rev8	data1, data1
> +	rev8	data2, data2
> +#endif
> +	/* Synthesize (data1 >= data2) ? 1 : -1 in a branchless sequence.  */
> +	sltu	result, data1, data2
> +	neg	result, result
> +	ori	result, result, 1
> +	ret
> +
> +2:
> +	/* Found a null byte.
> +	 * If words don't match, fall back to simple loop.  */

Can the multiline comments match the usual multiline comment style that
you used as the start of the function?

> +	bne	data1, data2, 3f
> +
> +	/* Otherwise, strings are equal.  */
> +	li	result, 0
> +	ret
> +
> +	/* Simple loop for misaligned strings.  */
> +	.p2align 3
> +3:
> +	lbu	data1, 0(src1)
> +	lbu	data2, 0(src2)
> +	addi	src1, src1, 1
> +	addi	src2, src2, 1
> +	bne	data1, data2, 4f
> +	bnez	data1, 3b
> +
> +4:
> +	sub	result, data1, data2
> +	ret
> +END(__strcmp_zbb)
> +EXPORT_SYMBOL(__strcmp_zbb)
> +
> +.option pop
> diff --git a/arch/riscv/lib/strlen_zbb.S b/arch/riscv/lib/strlen_zbb.S
> new file mode 100644
> index 000000000000..bc8d3607a32f
> --- /dev/null
> +++ b/arch/riscv/lib/strlen_zbb.S
> @@ -0,0 +1,98 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2022 VRULL GmbH
> + * Author: Christoph Muellner <christoph.muellner@vrull.eu>
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +#include <asm-generic/export.h>
> +
> +#define src		a0
> +#define result		a0
> +#define addr		t0
> +#define data		t1
> +#define offset		t2
> +#define offset_bits	t2
> +#define valid_bytes	t3
> +#define m1		t3
> +
> +#ifdef CONFIG_CPU_BIG_ENDIAN
> +# define CZ	clz
> +# define SHIFT	sll
> +#else
> +# define CZ	ctz
> +# define SHIFT	srl
> +#endif
> +
> +.option push
> +.option arch,+zbb
> +
> +/* int __strlen_zbb(const char *s) */
> +ENTRY(__strlen_zbb)
> +	/*
> +	 * Returns
> +	 *   a0 - string length
> +	 *
> +	 * Parameters
> +	 *   a0 - String to measure
> +	 *
> +	 * Clobbers
> +	 *   t0, t1, t2, t3
> +	 */
> +
> +	/* Number of irrelevant bytes in the first word.  */
> +	andi	offset, src, SZREG-1
> +	/* Align pointer.  */
> +	andi	addr, src, -SZREG
> +
> +	li	valid_bytes, SZREG
> +	sub	valid_bytes, valid_bytes, offset
> +	slli	offset_bits, offset, RISCV_LGPTR
> +
> +	/* Get the first word.  */
> +	REG_L	data, 0(addr)

A line of whitespace prior to the comments would go a long way here with
making this a little more readable - especially as a diff in a
mailclient.

I am oh-so-far from an expert on this kind of stuff, but these three
functions seem to match up with the reference implementations in the
spec. With the couple different nit pick bits fixed, feel free to tack
on:
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>

Anyways, hopefully I've not missed a bunch of should-be-obvious things
while trying to review the series..

Thanks,
Conor.

> +	/* Shift away the partial data we loaded to remove the irrelevant bytes
> +	 * preceding the string with the effect of adding NUL bytes at the
> +	 * end of the string.  */
> +	SHIFT	data, data, offset_bits
> +	/* Convert non-NUL into 0xff and NUL into 0x00.  */
> +	orc.b	data, data
> +	/* Convert non-NUL into 0x00 and NUL into 0xff.  */
> +	not	data, data
> +	/* Search for the first set bit (corresponding to a NUL byte in the
> +	 * original chunk).  */
> +	CZ	data, data
> +	/* The first chunk is special: commpare against the number
> +	 * of valid bytes in this chunk.  */
> +	srli	result, data, 3
> +	bgtu	valid_bytes, result, 3f
> +
> +	/* Prepare for the word comparison loop.  */
> +	addi	offset, addr, SZREG
> +	li	m1, -1
> +
> +	/* Our critical loop is 4 instructions and processes data in
> +	 * 4 byte or 8 byte chunks.  */
> +	.p2align 3
> +1:
> +	REG_L	data, SZREG(addr)
> +	addi	addr, addr, SZREG
> +	orc.b	data, data
> +	beq	data, m1, 1b
> +2:
> +	not	data, data
> +	CZ	data, data
> +	/* Get number of processed words.  */
> +	sub	offset, addr, offset
> +	/* Add number of characters in the first word.  */
> +	add	result, result, offset
> +	srli	data, data, 3
> +	/* Add number of characters in the last word.  */
> +	add	result, result, data
> +3:
> +	ret
> +END(__strlen_zbb)
> +EXPORT_SYMBOL(__strlen_zbb)
> +
> +.option pop
> diff --git a/arch/riscv/lib/strncmp_zbb.S b/arch/riscv/lib/strncmp_zbb.S
> new file mode 100644
> index 000000000000..852c8425d238
> --- /dev/null
> +++ b/arch/riscv/lib/strncmp_zbb.S
> @@ -0,0 +1,106 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2022 VRULL GmbH
> + * Author: Christoph Muellner <christoph.muellner@vrull.eu>
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +#include <asm-generic/export.h>
> +
> +#define src1		a0
> +#define result		a0
> +#define src2		t6
> +#define len		a2
> +#define data1		t0
> +#define data2		t1
> +#define align		t2
> +#define data1_orcb	t3
> +#define limit		t4
> +#define m1		t5
> +
> +.option push
> +.option arch,+zbb
> +
> +/* int __strncmp_zbb(const char *cs, const char *ct, size_t count) */
> +ENTRY(__strncmp_zbb)
> +	/*
> +	 * Returns
> +	 *   a0 - comparison result, like strncmp
> +	 *
> +	 * Parameters
> +	 *   a0 - string1
> +	 *   a1 - string2
> +	 *   a2 - number of characters to compare
> +	 *
> +	 * Clobbers
> +	 *   t0, t1, t2, t3, t4, t5, t6
> +	 */
> +	mv	src2, a1
> +
> +	or	align, src1, src2
> +	li	m1, -1
> +	and	align, align, SZREG-1
> +	add	limit, src1, len
> +	bnez	align, 4f
> +
> +	/* Adjust limit for fast-path.  */
> +	addi	limit, limit, -SZREG
> +	/* Main loop for aligned string.  */
> +	.p2align 3
> +1:
> +	bgt	src1, limit, 3f
> +	REG_L	data1, 0(src1)
> +	REG_L	data2, 0(src2)
> +	orc.b	data1_orcb, data1
> +	bne	data1_orcb, m1, 2f
> +	addi	src1, src1, SZREG
> +	addi	src2, src2, SZREG
> +	beq	data1, data2, 1b
> +
> +	/* Words don't match, and no null byte in the first
> +	 * word. Get bytes in big-endian order and compare.  */
> +#ifndef CONFIG_CPU_BIG_ENDIAN
> +	rev8	data1, data1
> +	rev8	data2, data2
> +#endif
> +	/* Synthesize (data1 >= data2) ? 1 : -1 in a branchless sequence.  */
> +	sltu	result, data1, data2
> +	neg	result, result
> +	ori	result, result, 1
> +	ret
> +
> +2:
> +	/* Found a null byte.
> +	 * If words don't match, fall back to simple loop.  */
> +	bne	data1, data2, 3f
> +
> +	/* Otherwise, strings are equal.  */
> +	li	result, 0
> +	ret
> +
> +	/* Simple loop for misaligned strings.  */
> +3:
> +	/* Restore limit for slow-path.  */
> +	addi	limit, limit, SZREG
> +	.p2align 3
> +4:
> +	bge	src1, limit, 6f
> +	lbu	data1, 0(src1)
> +	lbu	data2, 0(src2)
> +	addi	src1, src1, 1
> +	addi	src2, src2, 1
> +	bne	data1, data2, 5f
> +	bnez	data1, 4b
> +
> +5:
> +	sub	result, data1, data2
> +	ret
> +
> +6:
> +	li	result, 0
> +	ret
> +END(__strncmp_zbb)
> +EXPORT_SYMBOL(__strncmp_zbb)
> +
> +.option pop
> -- 
> 2.35.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Heiko Stuebner Nov. 13, 2022, 11:47 p.m. UTC | #2
Hi Conor,

Am Montag, 14. November 2022, 00:29:35 CET schrieb Conor Dooley:
> I always seem to forget to open my mails.. I swear I'm not being rude!

no worries :-) .

I also seem to miss one or another channel of communication way more often
than I'd like.


> In the back of my head while looking at this series, I've been wondering
> about Palmer's hwcap stuff. Wonder what the craic is there - I assume
> he's just been too busy with the profile stuff. Ditto Ztso, but iirc
> that's related. Anyway, that's just an aside as I was reading through
> the patch & typed it somewhere lest I forget to bring it up elsewhere.

That is essentially the next step for me.

The far away goal is doing the stuff we talked about at LPC - i.e. allowing
multiple variants for this and then selecting the hopefully fastest one
to patch in :-) .

Though the whole thing has way to many moving pieces for my taste,
so I'm trying to solve this step by step.

This series "simply" provides an optimization for cores supporting the
zbb extension. It works standalone and "solves" my sub-problems of
how to patch these core functions and also allowing function calls
from alternatives :-) .
[and by that may also help the people working on those more
 involved dma-operation variants, that aren't zicbom :-) ]


Next up is the mem* were I essentially have variants for "fast unaligned"
access systems - which then will use Palmer's hwcap work.
His series describes the necessary dt-property to declare if a core
can support said fast unaligned access.


Heiko

> On Thu, Nov 10, 2022 at 05:49:24PM +0100, Heiko Stuebner wrote:
> > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > 
> > Add handling for ZBB extension and add support for using it as a
> > variant for optimized string functions.
> > 
> > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > ---
> >  arch/riscv/Kconfig                   |  23 ++++++
> >  arch/riscv/include/asm/errata_list.h |   3 +-
> >  arch/riscv/include/asm/hwcap.h       |   1 +
> >  arch/riscv/include/asm/string.h      |  29 ++++++--
> >  arch/riscv/kernel/cpu.c              |   1 +
> >  arch/riscv/kernel/cpufeature.c       |  18 +++++
> >  arch/riscv/lib/Makefile              |   3 +
> >  arch/riscv/lib/strcmp_zbb.S          |  91 +++++++++++++++++++++++
> >  arch/riscv/lib/strlen_zbb.S          |  98 +++++++++++++++++++++++++
> >  arch/riscv/lib/strncmp_zbb.S         | 106 +++++++++++++++++++++++++++
> >  10 files changed, 366 insertions(+), 7 deletions(-)
> >  create mode 100644 arch/riscv/lib/strcmp_zbb.S
> >  create mode 100644 arch/riscv/lib/strlen_zbb.S
> >  create mode 100644 arch/riscv/lib/strncmp_zbb.S
> > 
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index acfc4d298aab..56633931e808 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -411,6 +411,29 @@ config RISCV_ISA_SVPBMT
> >  
> >  	   If you don't know what to do here, say Y.
> >  
> > +config TOOLCHAIN_HAS_ZBB
> > +	bool
> > +	default y
> > +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb)
> > +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb)
> > +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
> 
> Drew wants to switch away from this method and use insn defs instead -
> and I must admit it does look cleaner! Check out his Zicboz series and
> see what you think of it.
> 
> > +config RISCV_ISA_ZBB
> > +	bool "Zbb extension support for "
> 
> Missing some words in this line?
> 
> > +	depends on TOOLCHAIN_HAS_ZBB
> > +	depends on !XIP_KERNEL && MMU
> > +	select RISCV_ALTERNATIVE
> > +	default y
> > +	help
> > +	   Adds support to dynamically detect the presence of the ZBB
> > +	   extension (basic bit manipulation) and enable its usage.
> > +
> > +	   The Zbb extension provides instructions to accelerate a number
> > +	   of bit-specific operations (count bit population, sign extending,
> > +	   bitrotation, etc).
> > +
> > +	   If you don't know what to do here, say Y.
> > +
> >  config TOOLCHAIN_HAS_ZICBOM
> >  	bool
> >  	default y
> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> > index 4180312d2a70..95e626b7281e 100644
> > --- a/arch/riscv/include/asm/errata_list.h
> > +++ b/arch/riscv/include/asm/errata_list.h
> > @@ -24,7 +24,8 @@
> >  
> >  #define	CPUFEATURE_SVPBMT 0
> >  #define	CPUFEATURE_ZICBOM 1
> > -#define	CPUFEATURE_NUMBER 2
> > +#define	CPUFEATURE_ZBB 2
> > +#define	CPUFEATURE_NUMBER 3
> >  
> >  #ifdef __ASSEMBLY__
> >  
> > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> > index b22525290073..ac5555fd9788 100644
> > --- a/arch/riscv/include/asm/hwcap.h
> > +++ b/arch/riscv/include/asm/hwcap.h
> > @@ -59,6 +59,7 @@ enum riscv_isa_ext_id {
> >  	RISCV_ISA_EXT_ZIHINTPAUSE,
> >  	RISCV_ISA_EXT_SSTC,
> >  	RISCV_ISA_EXT_SVINVAL,
> > +	RISCV_ISA_EXT_ZBB,
> 
> With ZIHINTPAUSE before SSTC and SVINIVAL I assume this is not something
> we are canonically ordering but I never, ever know which ones we are
> allowed to re-order at will.
> 
> >  	RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
> >  };
> 
> > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > index bf9dd6764bad..66ff36a57e20 100644
> > --- a/arch/riscv/kernel/cpu.c
> > +++ b/arch/riscv/kernel/cpu.c
> > @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
> >  	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
> >  	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
> >  	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > +	__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
> >  	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
> >  	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> >  	__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> 
> This one I do know that Palmer wants canonically ordered.
> 
> > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > index 026512ca9c4c..f19b9d4e2dca 100644
> > --- a/arch/riscv/kernel/cpufeature.c
> > +++ b/arch/riscv/kernel/cpufeature.c
> > @@ -201,6 +201,7 @@ void __init riscv_fill_hwcap(void)
> >  			} else {
> >  				SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
> >  				SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
> > +				SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
> >  				SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
> >  				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
> >  				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
> 
> This one looks like it is, sstc aside. Same question as above, can I
> reorder this one? I'll send a patch for it if I can...
> 
> > diff --git a/arch/riscv/lib/strcmp_zbb.S b/arch/riscv/lib/strcmp_zbb.S
> > new file mode 100644
> > index 000000000000..aff9b941d3ee
> > --- /dev/null
> > +++ b/arch/riscv/lib/strcmp_zbb.S
> > @@ -0,0 +1,91 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright (c) 2022 VRULL GmbH
> > + * Author: Christoph Muellner <christoph.muellner@vrull.eu>
> 
> Is a Co-developed-by: appropriate then?
> 
> > + */
> > +
> > +#include <linux/linkage.h>
> > +#include <asm/asm.h>
> > +#include <asm-generic/export.h>
> > +
> > +#define src1		a0
> > +#define result		a0
> > +#define src2		t5
> > +#define data1		t0
> > +#define data2		t1
> > +#define align		t2
> > +#define data1_orcb	t3
> > +#define m1		t4
> > +
> > +.option push
> > +.option arch,+zbb
> > +
> > +/* int __strcmp_zbb(const char *cs, const char *ct) */
> > +ENTRY(__strcmp_zbb)
> > +	/*
> > +	 * Returns
> > +	 *   a0 - comparison result, like strncmp
> > +	 *
> > +	 * Parameters
> > +	 *   a0 - string1
> > +	 *   a1 - string2
> > +	 *   a2 - number of characters to compare
> 
> Same copy paste mistake here with a2?
> 
> > +	 *
> > +	 * Clobbers
> > +	 *   t0, t1, t2, t3, t4, t5
> > +	 */
> > +	mv	src2, a1
> > +
> > +	or	align, src1, src2
> > +	li	m1, -1
> > +	and	align, align, SZREG-1
> > +	bnez	align, 3f
> 
> Line of whitespace here would be nice.
> 
> > +	/* Main loop for aligned string.  */
> > +	.p2align 3
> > +1:
> > +	REG_L	data1, 0(src1)
> > +	REG_L	data2, 0(src2)
> > +	orc.b	data1_orcb, data1
> > +	bne	data1_orcb, m1, 2f
> > +	addi	src1, src1, SZREG
> > +	addi	src2, src2, SZREG
> > +	beq	data1, data2, 1b
> > +
> > +	/* Words don't match, and no null byte in the first
> > +	 * word. Get bytes in big-endian order and compare.  */
> > +#ifndef CONFIG_CPU_BIG_ENDIAN
> 
> I know this is a lift from the reference implementation in the spec, but
> do we actually need this ifndef section?
> 
> > +	rev8	data1, data1
> > +	rev8	data2, data2
> > +#endif
> > +	/* Synthesize (data1 >= data2) ? 1 : -1 in a branchless sequence.  */
> > +	sltu	result, data1, data2
> > +	neg	result, result
> > +	ori	result, result, 1
> > +	ret
> > +
> > +2:
> > +	/* Found a null byte.
> > +	 * If words don't match, fall back to simple loop.  */
> 
> Can the multiline comments match the usual multiline comment style that
> you used as the start of the function?
> 
> > +	bne	data1, data2, 3f
> > +
> > +	/* Otherwise, strings are equal.  */
> > +	li	result, 0
> > +	ret
> > +
> > +	/* Simple loop for misaligned strings.  */
> > +	.p2align 3
> > +3:
> > +	lbu	data1, 0(src1)
> > +	lbu	data2, 0(src2)
> > +	addi	src1, src1, 1
> > +	addi	src2, src2, 1
> > +	bne	data1, data2, 4f
> > +	bnez	data1, 3b
> > +
> > +4:
> > +	sub	result, data1, data2
> > +	ret
> > +END(__strcmp_zbb)
> > +EXPORT_SYMBOL(__strcmp_zbb)
> > +
> > +.option pop
> > diff --git a/arch/riscv/lib/strlen_zbb.S b/arch/riscv/lib/strlen_zbb.S
> > new file mode 100644
> > index 000000000000..bc8d3607a32f
> > --- /dev/null
> > +++ b/arch/riscv/lib/strlen_zbb.S
> > @@ -0,0 +1,98 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright (c) 2022 VRULL GmbH
> > + * Author: Christoph Muellner <christoph.muellner@vrull.eu>
> > + */
> > +
> > +#include <linux/linkage.h>
> > +#include <asm/asm.h>
> > +#include <asm-generic/export.h>
> > +
> > +#define src		a0
> > +#define result		a0
> > +#define addr		t0
> > +#define data		t1
> > +#define offset		t2
> > +#define offset_bits	t2
> > +#define valid_bytes	t3
> > +#define m1		t3
> > +
> > +#ifdef CONFIG_CPU_BIG_ENDIAN
> > +# define CZ	clz
> > +# define SHIFT	sll
> > +#else
> > +# define CZ	ctz
> > +# define SHIFT	srl
> > +#endif
> > +
> > +.option push
> > +.option arch,+zbb
> > +
> > +/* int __strlen_zbb(const char *s) */
> > +ENTRY(__strlen_zbb)
> > +	/*
> > +	 * Returns
> > +	 *   a0 - string length
> > +	 *
> > +	 * Parameters
> > +	 *   a0 - String to measure
> > +	 *
> > +	 * Clobbers
> > +	 *   t0, t1, t2, t3
> > +	 */
> > +
> > +	/* Number of irrelevant bytes in the first word.  */
> > +	andi	offset, src, SZREG-1
> > +	/* Align pointer.  */
> > +	andi	addr, src, -SZREG
> > +
> > +	li	valid_bytes, SZREG
> > +	sub	valid_bytes, valid_bytes, offset
> > +	slli	offset_bits, offset, RISCV_LGPTR
> > +
> > +	/* Get the first word.  */
> > +	REG_L	data, 0(addr)
> 
> A line of whitespace prior to the comments would go a long way here with
> making this a little more readable - especially as a diff in a
> mailclient.
> 
> I am oh-so-far from an expert on this kind of stuff, but these three
> functions seem to match up with the reference implementations in the
> spec. With the couple different nit pick bits fixed, feel free to tack
> on:
> Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
> 
> Anyways, hopefully I've not missed a bunch of should-be-obvious things
> while trying to review the series..
> 
> Thanks,
> Conor.
> 
> > +	/* Shift away the partial data we loaded to remove the irrelevant bytes
> > +	 * preceding the string with the effect of adding NUL bytes at the
> > +	 * end of the string.  */
> > +	SHIFT	data, data, offset_bits
> > +	/* Convert non-NUL into 0xff and NUL into 0x00.  */
> > +	orc.b	data, data
> > +	/* Convert non-NUL into 0x00 and NUL into 0xff.  */
> > +	not	data, data
> > +	/* Search for the first set bit (corresponding to a NUL byte in the
> > +	 * original chunk).  */
> > +	CZ	data, data
> > +	/* The first chunk is special: commpare against the number
> > +	 * of valid bytes in this chunk.  */
> > +	srli	result, data, 3
> > +	bgtu	valid_bytes, result, 3f
> > +
> > +	/* Prepare for the word comparison loop.  */
> > +	addi	offset, addr, SZREG
> > +	li	m1, -1
> > +
> > +	/* Our critical loop is 4 instructions and processes data in
> > +	 * 4 byte or 8 byte chunks.  */
> > +	.p2align 3
> > +1:
> > +	REG_L	data, SZREG(addr)
> > +	addi	addr, addr, SZREG
> > +	orc.b	data, data
> > +	beq	data, m1, 1b
> > +2:
> > +	not	data, data
> > +	CZ	data, data
> > +	/* Get number of processed words.  */
> > +	sub	offset, addr, offset
> > +	/* Add number of characters in the first word.  */
> > +	add	result, result, offset
> > +	srli	data, data, 3
> > +	/* Add number of characters in the last word.  */
> > +	add	result, result, data
> > +3:
> > +	ret
> > +END(__strlen_zbb)
> > +EXPORT_SYMBOL(__strlen_zbb)
> > +
> > +.option pop
> > diff --git a/arch/riscv/lib/strncmp_zbb.S b/arch/riscv/lib/strncmp_zbb.S
> > new file mode 100644
> > index 000000000000..852c8425d238
> > --- /dev/null
> > +++ b/arch/riscv/lib/strncmp_zbb.S
> > @@ -0,0 +1,106 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright (c) 2022 VRULL GmbH
> > + * Author: Christoph Muellner <christoph.muellner@vrull.eu>
> > + */
> > +
> > +#include <linux/linkage.h>
> > +#include <asm/asm.h>
> > +#include <asm-generic/export.h>
> > +
> > +#define src1		a0
> > +#define result		a0
> > +#define src2		t6
> > +#define len		a2
> > +#define data1		t0
> > +#define data2		t1
> > +#define align		t2
> > +#define data1_orcb	t3
> > +#define limit		t4
> > +#define m1		t5
> > +
> > +.option push
> > +.option arch,+zbb
> > +
> > +/* int __strncmp_zbb(const char *cs, const char *ct, size_t count) */
> > +ENTRY(__strncmp_zbb)
> > +	/*
> > +	 * Returns
> > +	 *   a0 - comparison result, like strncmp
> > +	 *
> > +	 * Parameters
> > +	 *   a0 - string1
> > +	 *   a1 - string2
> > +	 *   a2 - number of characters to compare
> > +	 *
> > +	 * Clobbers
> > +	 *   t0, t1, t2, t3, t4, t5, t6
> > +	 */
> > +	mv	src2, a1
> > +
> > +	or	align, src1, src2
> > +	li	m1, -1
> > +	and	align, align, SZREG-1
> > +	add	limit, src1, len
> > +	bnez	align, 4f
> > +
> > +	/* Adjust limit for fast-path.  */
> > +	addi	limit, limit, -SZREG
> > +	/* Main loop for aligned string.  */
> > +	.p2align 3
> > +1:
> > +	bgt	src1, limit, 3f
> > +	REG_L	data1, 0(src1)
> > +	REG_L	data2, 0(src2)
> > +	orc.b	data1_orcb, data1
> > +	bne	data1_orcb, m1, 2f
> > +	addi	src1, src1, SZREG
> > +	addi	src2, src2, SZREG
> > +	beq	data1, data2, 1b
> > +
> > +	/* Words don't match, and no null byte in the first
> > +	 * word. Get bytes in big-endian order and compare.  */
> > +#ifndef CONFIG_CPU_BIG_ENDIAN
> > +	rev8	data1, data1
> > +	rev8	data2, data2
> > +#endif
> > +	/* Synthesize (data1 >= data2) ? 1 : -1 in a branchless sequence.  */
> > +	sltu	result, data1, data2
> > +	neg	result, result
> > +	ori	result, result, 1
> > +	ret
> > +
> > +2:
> > +	/* Found a null byte.
> > +	 * If words don't match, fall back to simple loop.  */
> > +	bne	data1, data2, 3f
> > +
> > +	/* Otherwise, strings are equal.  */
> > +	li	result, 0
> > +	ret
> > +
> > +	/* Simple loop for misaligned strings.  */
> > +3:
> > +	/* Restore limit for slow-path.  */
> > +	addi	limit, limit, SZREG
> > +	.p2align 3
> > +4:
> > +	bge	src1, limit, 6f
> > +	lbu	data1, 0(src1)
> > +	lbu	data2, 0(src2)
> > +	addi	src1, src1, 1
> > +	addi	src2, src2, 1
> > +	bne	data1, data2, 5f
> > +	bnez	data1, 4b
> > +
> > +5:
> > +	sub	result, data1, data2
> > +	ret
> > +
> > +6:
> > +	li	result, 0
> > +	ret
> > +END(__strncmp_zbb)
> > +EXPORT_SYMBOL(__strncmp_zbb)
> > +
> > +.option pop
>
Heiko Stuebner Nov. 24, 2022, 10:23 p.m. UTC | #3
> > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> > index b22525290073..ac5555fd9788 100644
> > --- a/arch/riscv/include/asm/hwcap.h
> > +++ b/arch/riscv/include/asm/hwcap.h
> > @@ -59,6 +59,7 @@ enum riscv_isa_ext_id {
> >  	RISCV_ISA_EXT_ZIHINTPAUSE,
> >  	RISCV_ISA_EXT_SSTC,
> >  	RISCV_ISA_EXT_SVINVAL,
> > +	RISCV_ISA_EXT_ZBB,
> 
> With ZIHINTPAUSE before SSTC and SVINIVAL I assume this is not something
> we are canonically ordering but I never, ever know which ones we are
> allowed to re-order at will.

I guess we could extend the comments with suitable hints,
which could help in the future.

> 
> >  	RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
> >  };
> 
> > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > index bf9dd6764bad..66ff36a57e20 100644
> > --- a/arch/riscv/kernel/cpu.c
> > +++ b/arch/riscv/kernel/cpu.c
> > @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
> >  	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
> >  	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
> >  	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > +	__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
> >  	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
> >  	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> >  	__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> 
> This one I do know that Palmer wants canonically ordered.
> 
> > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > index 026512ca9c4c..f19b9d4e2dca 100644
> > --- a/arch/riscv/kernel/cpufeature.c
> > +++ b/arch/riscv/kernel/cpufeature.c
> > @@ -201,6 +201,7 @@ void __init riscv_fill_hwcap(void)
> >  			} else {
> >  				SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
> >  				SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
> > +				SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
> >  				SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
> >  				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
> >  				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
> 
> This one looks like it is, sstc aside. Same question as above, can I
> reorder this one? I'll send a patch for it if I can...

hmm, I don't see the difference between cpu.c above
(..., svpbmt, zbb, zicbom, ...) and here
(..., svpbmt, zbb, zicbom, ...)


> > diff --git a/arch/riscv/lib/strcmp_zbb.S b/arch/riscv/lib/strcmp_zbb.S
> > new file mode 100644
> > index 000000000000..aff9b941d3ee
> > --- /dev/null
> > +++ b/arch/riscv/lib/strcmp_zbb.S
> > @@ -0,0 +1,91 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright (c) 2022 VRULL GmbH
> > + * Author: Christoph Muellner <christoph.muellner@vrull.eu>
> 
> Is a Co-developed-by: appropriate then?

I'd think so ... i.e. the assembly is from Christoph, but is originally
part of a pending glibc patchset, hence Christoph and me
decided on the co-developed thingy :-) .

> > +	/* Main loop for aligned string.  */
> > +	.p2align 3
> > +1:
> > +	REG_L	data1, 0(src1)
> > +	REG_L	data2, 0(src2)
> > +	orc.b	data1_orcb, data1
> > +	bne	data1_orcb, m1, 2f
> > +	addi	src1, src1, SZREG
> > +	addi	src2, src2, SZREG
> > +	beq	data1, data2, 1b
> > +
> > +	/* Words don't match, and no null byte in the first
> > +	 * word. Get bytes in big-endian order and compare.  */
> > +#ifndef CONFIG_CPU_BIG_ENDIAN
> 
> I know this is a lift from the reference implementation in the spec, but
> do we actually need this ifndef section?

I don't know, but _if_ big endian support comes in the future,
having one place less to break also might be helpful? :-)



Heiko
Conor Dooley Nov. 24, 2022, 10:32 p.m. UTC | #4
On Thu, Nov 24, 2022 at 11:23:08PM +0100, Heiko Stübner wrote:
> > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> > > index b22525290073..ac5555fd9788 100644
> > > --- a/arch/riscv/include/asm/hwcap.h
> > > +++ b/arch/riscv/include/asm/hwcap.h
> > > @@ -59,6 +59,7 @@ enum riscv_isa_ext_id {
> > >  	RISCV_ISA_EXT_ZIHINTPAUSE,
> > >  	RISCV_ISA_EXT_SSTC,
> > >  	RISCV_ISA_EXT_SVINVAL,
> > > +	RISCV_ISA_EXT_ZBB,
> > 
> > With ZIHINTPAUSE before SSTC and SVINIVAL I assume this is not something
> > we are canonically ordering but I never, ever know which ones we are
> > allowed to re-order at will.
> 
> I guess we could extend the comments with suitable hints,
> which could help in the future.

Aye, for the likes of me that will never, ever remember I like the idea!

> > >  	RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
> > >  };
> > 
> > > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > > index bf9dd6764bad..66ff36a57e20 100644
> > > --- a/arch/riscv/kernel/cpu.c
> > > +++ b/arch/riscv/kernel/cpu.c
> > > @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
> > >  	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
> > >  	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
> > >  	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > > +	__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
> > >  	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
> > >  	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> > >  	__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > 
> > This one I do know that Palmer wants canonically ordered.

btw, idk if you noticed but I appear to have picked canonical ordering
as today's thing to get confused about a lot.

You put zbb after the S extentions - does that meant it is *not* an
"Additional Standard Extension" but rather a regular Z one?

> > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > > index 026512ca9c4c..f19b9d4e2dca 100644
> > > --- a/arch/riscv/kernel/cpufeature.c
> > > +++ b/arch/riscv/kernel/cpufeature.c
> > > @@ -201,6 +201,7 @@ void __init riscv_fill_hwcap(void)
> > >  			} else {
> > >  				SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
> > >  				SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
> > > +				SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
> > >  				SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
> > >  				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
> > >  				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
> > 
> > This one looks like it is, sstc aside. Same question as above, can I
> > reorder this one? I'll send a patch for it if I can...
> 
> hmm, I don't see the difference between cpu.c above
> (..., svpbmt, zbb, zicbom, ...) and here
> (..., svpbmt, zbb, zicbom, ...)

sstc appears last here but first in the cpu.c hunk above.

> > > diff --git a/arch/riscv/lib/strcmp_zbb.S b/arch/riscv/lib/strcmp_zbb.S
> > > new file mode 100644
> > > index 000000000000..aff9b941d3ee
> > > --- /dev/null
> > > +++ b/arch/riscv/lib/strcmp_zbb.S
> > > @@ -0,0 +1,91 @@
> > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > +/*
> > > + * Copyright (c) 2022 VRULL GmbH
> > > + * Author: Christoph Muellner <christoph.muellner@vrull.eu>
> > 
> > Is a Co-developed-by: appropriate then?
> 
> I'd think so ... i.e. the assembly is from Christoph, but is originally
> part of a pending glibc patchset, hence Christoph and me
> decided on the co-developed thingy :-) .

Check your patch again, I don't see a Co-developed-by: tag. (That's what
I was getting at, not the validity of "Author: Christoph...")

> > > +	/* Main loop for aligned string.  */
> > > +	.p2align 3
> > > +1:
> > > +	REG_L	data1, 0(src1)
> > > +	REG_L	data2, 0(src2)
> > > +	orc.b	data1_orcb, data1
> > > +	bne	data1_orcb, m1, 2f
> > > +	addi	src1, src1, SZREG
> > > +	addi	src2, src2, SZREG
> > > +	beq	data1, data2, 1b
> > > +
> > > +	/* Words don't match, and no null byte in the first
> > > +	 * word. Get bytes in big-endian order and compare.  */
> > > +#ifndef CONFIG_CPU_BIG_ENDIAN
> > 
> > I know this is a lift from the reference implementation in the spec, but
> > do we actually need this ifndef section?
> 
> I don't know, but _if_ big endian support comes in the future,
> having one place less to break also might be helpful? :-)

One less place to have to go and fix it up, but I hope it never comes to
pass! And no harm is being close to the one in the spec...
Heiko Stuebner Nov. 24, 2022, 11:51 p.m. UTC | #5
Am Donnerstag, 24. November 2022, 23:32:58 CET schrieb Conor Dooley:
> On Thu, Nov 24, 2022 at 11:23:08PM +0100, Heiko Stübner wrote:
> > > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> > > > index b22525290073..ac5555fd9788 100644
> > > > --- a/arch/riscv/include/asm/hwcap.h
> > > > +++ b/arch/riscv/include/asm/hwcap.h
> > > > @@ -59,6 +59,7 @@ enum riscv_isa_ext_id {
> > > >  	RISCV_ISA_EXT_ZIHINTPAUSE,
> > > >  	RISCV_ISA_EXT_SSTC,
> > > >  	RISCV_ISA_EXT_SVINVAL,
> > > > +	RISCV_ISA_EXT_ZBB,
> > > 
> > > With ZIHINTPAUSE before SSTC and SVINIVAL I assume this is not something
> > > we are canonically ordering but I never, ever know which ones we are
> > > allowed to re-order at will.
> > 
> > I guess we could extend the comments with suitable hints,
> > which could help in the future.
> 
> Aye, for the likes of me that will never, ever remember I like the idea!

I'm 100% with you on this. I remember that this came up either with
svpbmt or zicbom in the past, but I still again forgot how the ordering
goes.

> 
> > > >  	RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
> > > >  };
> > > 
> > > > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > > > index bf9dd6764bad..66ff36a57e20 100644
> > > > --- a/arch/riscv/kernel/cpu.c
> > > > +++ b/arch/riscv/kernel/cpu.c
> > > > @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
> > > >  	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
> > > >  	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
> > > >  	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > > > +	__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
> > > >  	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
> > > >  	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> > > >  	__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > > 
> > > This one I do know that Palmer wants canonically ordered.
> 
> btw, idk if you noticed but I appear to have picked canonical ordering
> as today's thing to get confused about a lot.
> 
> You put zbb after the S extentions - does that meant it is *not* an
> "Additional Standard Extension" but rather a regular Z one?

This confuses me completely now :-) .

The list is still too short to see where other extensions
are placed. I guess I need to find that stuff again about
extension ordering.


> > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > > > index 026512ca9c4c..f19b9d4e2dca 100644
> > > > --- a/arch/riscv/kernel/cpufeature.c
> > > > +++ b/arch/riscv/kernel/cpufeature.c
> > > > @@ -201,6 +201,7 @@ void __init riscv_fill_hwcap(void)
> > > >  			} else {
> > > >  				SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
> > > >  				SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
> > > > +				SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
> > > >  				SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
> > > >  				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
> > > >  				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
> > > 
> > > This one looks like it is, sstc aside. Same question as above, can I
> > > reorder this one? I'll send a patch for it if I can...
> > 
> > hmm, I don't see the difference between cpu.c above
> > (..., svpbmt, zbb, zicbom, ...) and here
> > (..., svpbmt, zbb, zicbom, ...)
> 
> sstc appears last here but first in the cpu.c hunk above.
> 
> > > > diff --git a/arch/riscv/lib/strcmp_zbb.S b/arch/riscv/lib/strcmp_zbb.S
> > > > new file mode 100644
> > > > index 000000000000..aff9b941d3ee
> > > > --- /dev/null
> > > > +++ b/arch/riscv/lib/strcmp_zbb.S
> > > > @@ -0,0 +1,91 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > > +/*
> > > > + * Copyright (c) 2022 VRULL GmbH
> > > > + * Author: Christoph Muellner <christoph.muellner@vrull.eu>
> > > 
> > > Is a Co-developed-by: appropriate then?
> > 
> > I'd think so ... i.e. the assembly is from Christoph, but is originally
> > part of a pending glibc patchset, hence Christoph and me
> > decided on the co-developed thingy :-) .
> 
> Check your patch again, I don't see a Co-developed-by: tag. (That's what
> I was getting at, not the validity of "Author: Christoph...")

Now I remember, I talked with Christoph about that _after_ sending
this series. So my "git log" did show the Co-developed-by
all the time, which then confused me :-)


Heiko
Andrew Jones Nov. 25, 2022, 7:49 a.m. UTC | #6
On Fri, Nov 25, 2022 at 12:51:54AM +0100, Heiko Stuebner wrote:
> Am Donnerstag, 24. November 2022, 23:32:58 CET schrieb Conor Dooley:
> > On Thu, Nov 24, 2022 at 11:23:08PM +0100, Heiko Stübner wrote:
...
> > > > > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > > > > index bf9dd6764bad..66ff36a57e20 100644
> > > > > --- a/arch/riscv/kernel/cpu.c
> > > > > +++ b/arch/riscv/kernel/cpu.c
> > > > > @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
> > > > >  	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
> > > > >  	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
> > > > >  	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > > > > +	__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
> > > > >  	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
> > > > >  	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> > > > >  	__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > > > 
> > > > This one I do know that Palmer wants canonically ordered.
> > 
> > btw, idk if you noticed but I appear to have picked canonical ordering
> > as today's thing to get confused about a lot.
> > 
> > You put zbb after the S extentions - does that meant it is *not* an
> > "Additional Standard Extension" but rather a regular Z one?
> 
> This confuses me completely now :-) .
>

Can we instead post a patch to the spec that changes the order to
alphabetical? The only other option I see is to develop a tool which sorts
extensions and every RISC-V developer keeps it in their back pocket. A
kernel specific tool to check each list we want to keep sorted would be
nice too.

My preference would be to change the spec to alphabetical order, though,
because the spec isn't explicit[*] enough to write a tool that can handle
all cases. We'll end up needing to have conversations like this one to
write the tool and eventually the tool will be what everyone looks to,
rather than the spec...

[*] The spec uses words like 'can', 'should', and 'conventional'.

Thanks,
drew
Conor Dooley Nov. 25, 2022, 8:17 a.m. UTC | #7
On 25/11/2022 07:49, Andrew Jones wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> On Fri, Nov 25, 2022 at 12:51:54AM +0100, Heiko Stuebner wrote:
>> Am Donnerstag, 24. November 2022, 23:32:58 CET schrieb Conor Dooley:
>>> On Thu, Nov 24, 2022 at 11:23:08PM +0100, Heiko Stübner wrote:
> ...
>>>>>> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
>>>>>> index bf9dd6764bad..66ff36a57e20 100644
>>>>>> --- a/arch/riscv/kernel/cpu.c
>>>>>> +++ b/arch/riscv/kernel/cpu.c
>>>>>> @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
>>>>>>        __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
>>>>>>        __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
>>>>>>        __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
>>>>>> +     __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
>>>>>>        __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
>>>>>>        __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
>>>>>>        __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
>>>>>
>>>>> This one I do know that Palmer wants canonically ordered.
>>>
>>> btw, idk if you noticed but I appear to have picked canonical ordering
>>> as today's thing to get confused about a lot.
>>>
>>> You put zbb after the S extentions - does that meant it is *not* an
>>> "Additional Standard Extension" but rather a regular Z one?
>>
>> This confuses me completely now :-) .
>>
> 
> Can we instead post a patch to the spec that changes the order to
> alphabetical? The only other option I see is to develop a tool which sorts
> extensions and every RISC-V developer keeps it in their back pocket. A
> kernel specific tool to check each list we want to keep sorted would be
> nice too.

Is there some reason that these things need to be output in canonical
order in the first place by the kernel?
Could we say to hell with even trying to figure out what the correct
order is (since yeah, it'll be a conversation ~every time this comes up)
or is that breaking uAPI since someone's parser in userland may expect to
see canonical order only?
That's been my assumption for why it was re-sorted, @Palmer?

> My preference would be to change the spec to alphabetical order, though,
> because the spec isn't explicit[*] enough to write a tool that can handle
> all cases. We'll end up needing to have conversations like this one to
> write the tool and eventually the tool will be what everyone looks to,
> rather than the spec...

If it could be explicitly clear what constitutes an "additional standard
extension" that'd be good enough for me! Say:

diff --git a/src/naming.tex b/src/naming.tex
index bfd67d4..9d63a86 100644
--- a/src/naming.tex
+++ b/src/naming.tex
@@ -80,7 +80,9 @@ Standard extensions can also be named using a single ``Z'' followed by an
  alphabetical name and an optional version number.  For example,
  ``Zifencei'' names the instruction-fetch fence extension described in
  Chapter~\ref{chap:zifencei}; ``Zifencei2'' and ``Zifencei2p0'' name version
-2.0 of same.
+2.0 of same. The entire set of Additional Standard Extensions are documented
+in the Standard Unprivileged Extensions section of Table~\ref{isanametable}
+below.
  
  The first letter following the ``Z'' conventionally indicates the most closely
  related alphabetical extension category, IMAFDQCVH.  For the ``Zam''
Andrew Jones Nov. 25, 2022, 3:28 p.m. UTC | #8
On Fri, Nov 25, 2022 at 12:26:42PM +0100, Christoph Müllner wrote:
> On Fri, Nov 25, 2022 at 8:49 AM Andrew Jones <ajones@ventanamicro.com>
> wrote:
> 
> > On Fri, Nov 25, 2022 at 12:51:54AM +0100, Heiko Stuebner wrote:
> > > Am Donnerstag, 24. November 2022, 23:32:58 CET schrieb Conor Dooley:
> > > > On Thu, Nov 24, 2022 at 11:23:08PM +0100, Heiko Stübner wrote:
> > ...
> > > > > > > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > > > > > > index bf9dd6764bad..66ff36a57e20 100644
> > > > > > > --- a/arch/riscv/kernel/cpu.c
> > > > > > > +++ b/arch/riscv/kernel/cpu.c
> > > > > > > @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data
> > isa_ext_arr[] = {
> > > > > > >       __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
> > > > > > >       __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
> > > > > > >       __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > > > > > > +     __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
> > > > > > >       __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
> > > > > > >       __RISCV_ISA_EXT_DATA(zihintpause,
> > RISCV_ISA_EXT_ZIHINTPAUSE),
> > > > > > >       __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > > > > >
> > > > > > This one I do know that Palmer wants canonically ordered.
> > > >
> > > > btw, idk if you noticed but I appear to have picked canonical ordering
> > > > as today's thing to get confused about a lot.
> > > >
> > > > You put zbb after the S extentions - does that meant it is *not* an
> > > > "Additional Standard Extension" but rather a regular Z one?
> > >
> > > This confuses me completely now :-) .
> > >
> >
> > Can we instead post a patch to the spec that changes the order to
> > alphabetical? The only other option I see is to develop a tool which sorts
> > extensions and every RISC-V developer keeps it in their back pocket. A
> > kernel specific tool to check each list we want to keep sorted would be
> > nice too.
> >
> > My preference would be to change the spec to alphabetical order, though,
> > because the spec isn't explicit[*] enough to write a tool that can handle
> > all cases. We'll end up needing to have conversations like this one to
> > write the tool and eventually the tool will be what everyone looks to,
> > rather than the spec...
> >
> > [*] The spec uses words like 'can', 'should', and 'conventional'.
> >
> 
> The unpriv spec is clear about the canonical order (table "Standard ISA
> extension names"):

The caption under table 27.1 does indeed declare the table defines the
canonical order and that it *must* be used for the name string, but
almost everywhere else in chapter 27 the word "should" is used to suggest
how extensions be ordered (only X-extensions say where they must be).

> 1) Base ISA
> 2) Standard Unpriv Extension (non alphabetical)

The 'non alphabetical' part makes this a PITA.

And section 27.6 says the first letter "conventionally indicates...". I
suppose we can assume it "must indicate"?

> 3) Standard Supervisor-Level Extensions

Are the conventions for the first character of S-extensions defined? I've
seen 'Ss' for "Privileged arch and Supervisor-level extensions", e.g.
Sscofpmf. 'Sv' for virtual memory (I guess) related extensions, e.g.
Svinval, and we appear to be using alphabetical order for them.

> 4) Standard Machine-Level Extensions
> 5) Non-Standard (Vendor) Extensions

Anyway, for the relatively easier problem of this kernel list and this
patch, we could do something with defines like below in order to try and
keep the order right.

/*
 * Each sub-list is sorted alphabetically.
 */
#define S_EXTENSIONS                                                    \
        __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),         \
        __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),                 \
        __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),           \
        __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),

#define Zi_EXTENSIONS                                                   \
        __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),             \
        __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),

#define Zb_EXTENSIONS                                                   \
        __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZICBOM),                \

static struct riscv_isa_ext_data isa_ext_arr[] = {
        Zi_EXTENSIONS
        Zb_EXTENSIONS
        S_EXTENSIONS
        __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
};

Thanks,
drew
Christoph Müllner Nov. 25, 2022, 4:35 p.m. UTC | #9
On Fri, Nov 25, 2022 at 4:28 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Fri, Nov 25, 2022 at 12:26:42PM +0100, Christoph Müllner wrote:
> > On Fri, Nov 25, 2022 at 8:49 AM Andrew Jones <ajones@ventanamicro.com>
> > wrote:
> >
> > > On Fri, Nov 25, 2022 at 12:51:54AM +0100, Heiko Stuebner wrote:
> > > > Am Donnerstag, 24. November 2022, 23:32:58 CET schrieb Conor Dooley:
> > > > > On Thu, Nov 24, 2022 at 11:23:08PM +0100, Heiko Stübner wrote:
> > > ...
> > > > > > > > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > > > > > > > index bf9dd6764bad..66ff36a57e20 100644
> > > > > > > > --- a/arch/riscv/kernel/cpu.c
> > > > > > > > +++ b/arch/riscv/kernel/cpu.c
> > > > > > > > @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data
> > > isa_ext_arr[] = {
> > > > > > > >       __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
> > > > > > > >       __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
> > > > > > > >       __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > > > > > > > +     __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
> > > > > > > >       __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
> > > > > > > >       __RISCV_ISA_EXT_DATA(zihintpause,
> > > RISCV_ISA_EXT_ZIHINTPAUSE),
> > > > > > > >       __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > > > > > >
> > > > > > > This one I do know that Palmer wants canonically ordered.
> > > > >
> > > > > btw, idk if you noticed but I appear to have picked canonical ordering
> > > > > as today's thing to get confused about a lot.
> > > > >
> > > > > You put zbb after the S extentions - does that meant it is *not* an
> > > > > "Additional Standard Extension" but rather a regular Z one?
> > > >
> > > > This confuses me completely now :-) .
> > > >
> > >
> > > Can we instead post a patch to the spec that changes the order to
> > > alphabetical? The only other option I see is to develop a tool which sorts
> > > extensions and every RISC-V developer keeps it in their back pocket. A
> > > kernel specific tool to check each list we want to keep sorted would be
> > > nice too.
> > >
> > > My preference would be to change the spec to alphabetical order, though,
> > > because the spec isn't explicit[*] enough to write a tool that can handle
> > > all cases. We'll end up needing to have conversations like this one to
> > > write the tool and eventually the tool will be what everyone looks to,
> > > rather than the spec...
> > >
> > > [*] The spec uses words like 'can', 'should', and 'conventional'.
> > >
> >
> > The unpriv spec is clear about the canonical order (table "Standard ISA
> > extension names"):
>
> The caption under table 27.1 does indeed declare the table defines the
> canonical order and that it *must* be used for the name string, but
> almost everywhere else in chapter 27 the word "should" is used to suggest
> how extensions be ordered (only X-extensions say where they must be).
>
> > 1) Base ISA
> > 2) Standard Unpriv Extension (non alphabetical)
>
> The 'non alphabetical' part makes this a PITA.
>
> And section 27.6 says the first letter "conventionally indicates...". I
> suppose we can assume it "must indicate"?

I think it is what it is and changing it would just open pointless discussions.
Further, I think that consistency is more important than trying to establish a
new second ordering scheme (regardless if that is simpler and/or better)
unless we have to overcome technical issues that would otherwise not
be possible.

>
> > 3) Standard Supervisor-Level Extensions
>
> Are the conventions for the first character of S-extensions defined? I've
> seen 'Ss' for "Privileged arch and Supervisor-level extensions", e.g.
> Sscofpmf. 'Sv' for virtual memory (I guess) related extensions, e.g.
> Svinval, and we appear to be using alphabetical order for them.
>
> > 4) Standard Machine-Level Extensions
> > 5) Non-Standard (Vendor) Extensions
>
> Anyway, for the relatively easier problem of this kernel list and this
> patch, we could do something with defines like below in order to try and
> keep the order right.
>
> /*
>  * Each sub-list is sorted alphabetically.
>  */
> #define S_EXTENSIONS                                                    \
>         __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),         \
>         __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),                 \
>         __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),           \
>         __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
>
> #define Zi_EXTENSIONS                                                   \
>         __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),             \
>         __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
>
> #define Zb_EXTENSIONS                                                   \
>         __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZICBOM),                \
>
> static struct riscv_isa_ext_data isa_ext_arr[] = {
>         Zi_EXTENSIONS
>         Zb_EXTENSIONS
>         S_EXTENSIONS
>         __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> };

It might be worth to look how Binutils are grouping them:
  https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=bfd/elfxx-riscv.c#l1261

They also group extensions together but use the following groups:

const struct riscv_supported_ext *riscv_all_supported_ext[] =
{
  riscv_supported_std_ext,
  riscv_supported_std_z_ext,
  riscv_supported_std_s_ext,
  riscv_supported_std_zxm_ext,
  riscv_supported_vendor_x_ext,
  NULL
};

BR
Christoph
Conor Dooley Nov. 25, 2022, 4:36 p.m. UTC | #10
Hey Drew, Christoph,

@Christoph, I did not receive your mail unfortunately as it appears to
be html. I assume you know why that's a problem and had some mailer
issues etc.

also + CC Samuel Ortiz since we discussed this elsewhere...

On Fri, Nov 25, 2022 at 04:28:21PM +0100, Andrew Jones wrote:
> On Fri, Nov 25, 2022 at 12:26:42PM +0100, Christoph Müllner wrote:
> > On Fri, Nov 25, 2022 at 8:49 AM Andrew Jones <ajones@ventanamicro.com>
> > wrote:
> > 
> > > On Fri, Nov 25, 2022 at 12:51:54AM +0100, Heiko Stuebner wrote:
> > > > Am Donnerstag, 24. November 2022, 23:32:58 CET schrieb Conor Dooley:
> > > > > On Thu, Nov 24, 2022 at 11:23:08PM +0100, Heiko Stübner wrote:
> > > ...
> > > > > > > > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > > > > > > > index bf9dd6764bad..66ff36a57e20 100644
> > > > > > > > --- a/arch/riscv/kernel/cpu.c
> > > > > > > > +++ b/arch/riscv/kernel/cpu.c
> > > > > > > > @@ -166,6 +166,7 @@ static struct riscv_isa_ext_data
> > > isa_ext_arr[] = {
> > > > > > > >       __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
> > > > > > > >       __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
> > > > > > > >       __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > > > > > > > +     __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
> > > > > > > >       __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
> > > > > > > >       __RISCV_ISA_EXT_DATA(zihintpause,
> > > RISCV_ISA_EXT_ZIHINTPAUSE),
> > > > > > > >       __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > > > > > >
> > > > > > > This one I do know that Palmer wants canonically ordered.
> > > > >
> > > > > btw, idk if you noticed but I appear to have picked canonical ordering
> > > > > as today's thing to get confused about a lot.
> > > > >
> > > > > You put zbb after the S extentions - does that meant it is *not* an
> > > > > "Additional Standard Extension" but rather a regular Z one?
> > > >
> > > > This confuses me completely now :-) .
> > > >
> > >
> > > Can we instead post a patch to the spec that changes the order to
> > > alphabetical? The only other option I see is to develop a tool which sorts
> > > extensions and every RISC-V developer keeps it in their back pocket. A
> > > kernel specific tool to check each list we want to keep sorted would be
> > > nice too.
> > >
> > > My preference would be to change the spec to alphabetical order, though,
> > > because the spec isn't explicit[*] enough to write a tool that can handle
> > > all cases. We'll end up needing to have conversations like this one to
> > > write the tool and eventually the tool will be what everyone looks to,
> > > rather than the spec...
> > >
> > > [*] The spec uses words like 'can', 'should', and 'conventional'.
> > >
> > 
> > The unpriv spec is clear about the canonical order (table "Standard ISA
> > extension names"):

So I went reading the isa-manual again for the nth time.. It seems that I
missed the sentence:
Standard machine-level instruction-set extensions are prefixed with the
three letters ``Zxm''. Woops & apologies @Samuel!

However, that table is only clear (as pointed out by Drew) on a
categorical level.

> The caption under table 27.1 does indeed declare the table defines the
> canonical order and that it *must* be used for the name string, but
> almost everywhere else in chapter 27 the word "should" is used to suggest
> how extensions be ordered (only X-extensions say where they must be).
> 
> > 1) Base ISA
> > 2) Standard Unpriv Extension (non alphabetical)
> 
> The 'non alphabetical' part makes this a PITA.
> 
> And section 27.6 says the first letter "conventionally indicates...". I
> suppose we can assume it "must indicate"?

Convention would imply it *should* but not that it must. I think, for
the kernel, we take a stronger view and say that we *will* put them in
the listed order in that table.

> > 3) Standard Supervisor-Level Extensions
> 
> Are the conventions for the first character of S-extensions defined? I've
> seen 'Ss' for "Privileged arch and Supervisor-level extensions", e.g.
> Sscofpmf. 'Sv' for virtual memory (I guess) related extensions, e.g.
> Svinval, and we appear to be using alphabetical order for them.

Again, appears to be a "should". I'd vote for doing everyone a favour
and making it "must" in this case kernel wise.
> 
> > 4) Standard Machine-Level Extensions
> > 5) Non-Standard (Vendor) Extensions
> 
> Anyway, for the relatively easier problem of this kernel list and this
> patch, we could do something with defines like below in order to try and
> keep the order right.
> 
> /*
>  * Each sub-list is sorted alphabetically.
>  */
> #define S_EXTENSIONS                                                    \
>         __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),         \
>         __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),                 \
>         __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),           \
>         __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> 
> #define Zi_EXTENSIONS                                                   \
>         __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),             \
>         __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> 
> #define Zb_EXTENSIONS                                                   \
>         __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZICBOM),                \
> 
> static struct riscv_isa_ext_data isa_ext_arr[] = {
>         Zi_EXTENSIONS
>         Zb_EXTENSIONS
>         S_EXTENSIONS
>         __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX)

The above LGTM (apart from the accident in the zbb entry!)

Thanks for all putting up with my confusion here - although the
lack of clarity appears to be rather widespread hahaha.

Thanks,
Conor.
Conor Dooley Nov. 25, 2022, 4:39 p.m. UTC | #11
On Fri, Nov 25, 2022 at 05:35:39PM +0100, Christoph Müllner wrote:
> On Fri, Nov 25, 2022 at 4:28 PM Andrew Jones <ajones@ventanamicro.com> wrote:

> > /*
> >  * Each sub-list is sorted alphabetically.
> >  */
> > #define S_EXTENSIONS                                                    \
> >         __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),         \
> >         __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),                 \
> >         __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),           \
> >         __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> >
> > #define Zi_EXTENSIONS                                                   \
> >         __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),             \
> >         __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> >
> > #define Zb_EXTENSIONS                                                   \
> >         __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZICBOM),                \
> >
> > static struct riscv_isa_ext_data isa_ext_arr[] = {
> >         Zi_EXTENSIONS
> >         Zb_EXTENSIONS
> >         S_EXTENSIONS
> >         __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > };
> 
> It might be worth to look how Binutils are grouping them:
>   https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=bfd/elfxx-riscv.c#l1261
> 
> They also group extensions together but use the following groups:
> 
> const struct riscv_supported_ext *riscv_all_supported_ext[] =
> {
>   riscv_supported_std_ext,
>   riscv_supported_std_z_ext,
>   riscv_supported_std_s_ext,
>   riscv_supported_std_zxm_ext,
>   riscv_supported_vendor_x_ext,
>   NULL
> };

I think in this case, Drew's differentiation between Zi & Zb is an
improvement :)
Christoph Müllner Nov. 25, 2022, 5:02 p.m. UTC | #12
On Fri, Nov 25, 2022 at 5:39 PM Conor Dooley <conor@kernel.org> wrote:
>
> On Fri, Nov 25, 2022 at 05:35:39PM +0100, Christoph Müllner wrote:
> > On Fri, Nov 25, 2022 at 4:28 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> > > /*
> > >  * Each sub-list is sorted alphabetically.
> > >  */
> > > #define S_EXTENSIONS                                                    \
> > >         __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),         \
> > >         __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),                 \
> > >         __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),           \
> > >         __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > >
> > > #define Zi_EXTENSIONS                                                   \
> > >         __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),             \
> > >         __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> > >
> > > #define Zb_EXTENSIONS                                                   \
> > >         __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZICBOM),                \
> > >
> > > static struct riscv_isa_ext_data isa_ext_arr[] = {
> > >         Zi_EXTENSIONS
> > >         Zb_EXTENSIONS
> > >         S_EXTENSIONS
> > >         __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > > };
> >
> > It might be worth to look how Binutils are grouping them:
> >   https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=bfd/elfxx-riscv.c#l1261
> >
> > They also group extensions together but use the following groups:
> >
> > const struct riscv_supported_ext *riscv_all_supported_ext[] =
> > {
> >   riscv_supported_std_ext,
> >   riscv_supported_std_z_ext,
> >   riscv_supported_std_s_ext,
> >   riscv_supported_std_zxm_ext,
> >   riscv_supported_vendor_x_ext,
> >   NULL
> > };
>
> I think in this case, Drew's differentiation between Zi & Zb is an
> improvement :)


That might be the case, but it also brings up the following questions:
Where to put Zf* extensions (between Zi and Zb)?
Where to put Zk* and Zv* (after Zb*)?
Adding a subgroup for each Z{N}* extension might result in many small
groups with little benefit.
Conor Dooley Nov. 25, 2022, 5:11 p.m. UTC | #13
On Fri, Nov 25, 2022 at 06:02:12PM +0100, Christoph Müllner wrote:
> On Fri, Nov 25, 2022 at 5:39 PM Conor Dooley <conor@kernel.org> wrote:
> >
> > On Fri, Nov 25, 2022 at 05:35:39PM +0100, Christoph Müllner wrote:
> > > On Fri, Nov 25, 2022 at 4:28 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > > > /*
> > > >  * Each sub-list is sorted alphabetically.
> > > >  */
> > > > #define S_EXTENSIONS                                                    \
> > > >         __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),         \
> > > >         __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),                 \
> > > >         __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),           \
> > > >         __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > > >
> > > > #define Zi_EXTENSIONS                                                   \
> > > >         __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),             \
> > > >         __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> > > >
> > > > #define Zb_EXTENSIONS                                                   \
> > > >         __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZICBOM),                \
> > > >
> > > > static struct riscv_isa_ext_data isa_ext_arr[] = {
> > > >         Zi_EXTENSIONS
> > > >         Zb_EXTENSIONS
> > > >         S_EXTENSIONS
> > > >         __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > > > };
> > >
> > > It might be worth to look how Binutils are grouping them:
> > >   https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=bfd/elfxx-riscv.c#l1261
> > >
> > > They also group extensions together but use the following groups:
> > >
> > > const struct riscv_supported_ext *riscv_all_supported_ext[] =
> > > {
> > >   riscv_supported_std_ext,
> > >   riscv_supported_std_z_ext,
> > >   riscv_supported_std_s_ext,
> > >   riscv_supported_std_zxm_ext,
> > >   riscv_supported_vendor_x_ext,
> > >   NULL
> > > };
> >
> > I think in this case, Drew's differentiation between Zi & Zb is an
> > improvement :)
> 
> 
> That might be the case, but it also brings up the following questions:
> Where to put Zf* extensions (between Zi and Zb)?

I don't think that differs either way?

> Where to put Zk* and Zv* (after Zb*)?

Ditto?

> Adding a subgroup for each Z{N}* extension might result in many small
> groups with little benefit.

If it prevents another one of these ordering episodes, I think it's
worth doing! If a "group" only has one element it could alternatively be
added directly also.

Also neat, you got your mailer sorted :)
Christoph Müllner Nov. 25, 2022, 5:42 p.m. UTC | #14
On Fri, Nov 25, 2022 at 6:11 PM Conor Dooley <conor@kernel.org> wrote:
>
> On Fri, Nov 25, 2022 at 06:02:12PM +0100, Christoph Müllner wrote:
> > On Fri, Nov 25, 2022 at 5:39 PM Conor Dooley <conor@kernel.org> wrote:
> > >
> > > On Fri, Nov 25, 2022 at 05:35:39PM +0100, Christoph Müllner wrote:
> > > > On Fri, Nov 25, 2022 at 4:28 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> > >
> > > > > /*
> > > > >  * Each sub-list is sorted alphabetically.
> > > > >  */
> > > > > #define S_EXTENSIONS                                                    \
> > > > >         __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),         \
> > > > >         __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),                 \
> > > > >         __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),           \
> > > > >         __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
> > > > >
> > > > > #define Zi_EXTENSIONS                                                   \
> > > > >         __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),             \
> > > > >         __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> > > > >
> > > > > #define Zb_EXTENSIONS                                                   \
> > > > >         __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZICBOM),                \
> > > > >
> > > > > static struct riscv_isa_ext_data isa_ext_arr[] = {
> > > > >         Zi_EXTENSIONS
> > > > >         Zb_EXTENSIONS
> > > > >         S_EXTENSIONS
> > > > >         __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
> > > > > };
> > > >
> > > > It might be worth to look how Binutils are grouping them:
> > > >   https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=bfd/elfxx-riscv.c#l1261
> > > >
> > > > They also group extensions together but use the following groups:
> > > >
> > > > const struct riscv_supported_ext *riscv_all_supported_ext[] =
> > > > {
> > > >   riscv_supported_std_ext,
> > > >   riscv_supported_std_z_ext,
> > > >   riscv_supported_std_s_ext,
> > > >   riscv_supported_std_zxm_ext,
> > > >   riscv_supported_vendor_x_ext,
> > > >   NULL
> > > > };
> > >
> > > I think in this case, Drew's differentiation between Zi & Zb is an
> > > improvement :)
> >
> >
> > That might be the case, but it also brings up the following questions:
> > Where to put Zf* extensions (between Zi and Zb)?
>
> I don't think that differs either way?
>
> > Where to put Zk* and Zv* (after Zb*)?
>
> Ditto?
>
> > Adding a subgroup for each Z{N}* extension might result in many small
> > groups with little benefit.
>
> If it prevents another one of these ordering episodes, I think it's
> worth doing! If a "group" only has one element it could alternatively be
> added directly also.

Sounds good then.

>
> Also neat, you got your mailer sorted :)

One just needs to remember to check the "Plain text mode" box.
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index acfc4d298aab..56633931e808 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -411,6 +411,29 @@  config RISCV_ISA_SVPBMT
 
 	   If you don't know what to do here, say Y.
 
+config TOOLCHAIN_HAS_ZBB
+	bool
+	default y
+	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb)
+	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb)
+	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
+
+config RISCV_ISA_ZBB
+	bool "Zbb extension support for "
+	depends on TOOLCHAIN_HAS_ZBB
+	depends on !XIP_KERNEL && MMU
+	select RISCV_ALTERNATIVE
+	default y
+	help
+	   Adds support to dynamically detect the presence of the ZBB
+	   extension (basic bit manipulation) and enable its usage.
+
+	   The Zbb extension provides instructions to accelerate a number
+	   of bit-specific operations (count bit population, sign extending,
+	   bitrotation, etc).
+
+	   If you don't know what to do here, say Y.
+
 config TOOLCHAIN_HAS_ZICBOM
 	bool
 	default y
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 4180312d2a70..95e626b7281e 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -24,7 +24,8 @@ 
 
 #define	CPUFEATURE_SVPBMT 0
 #define	CPUFEATURE_ZICBOM 1
-#define	CPUFEATURE_NUMBER 2
+#define	CPUFEATURE_ZBB 2
+#define	CPUFEATURE_NUMBER 3
 
 #ifdef __ASSEMBLY__
 
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index b22525290073..ac5555fd9788 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -59,6 +59,7 @@  enum riscv_isa_ext_id {
 	RISCV_ISA_EXT_ZIHINTPAUSE,
 	RISCV_ISA_EXT_SSTC,
 	RISCV_ISA_EXT_SVINVAL,
+	RISCV_ISA_EXT_ZBB,
 	RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
 };
 
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index b98481d2d154..806c402c874e 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -6,6 +6,8 @@ 
 #ifndef _ASM_RISCV_STRING_H
 #define _ASM_RISCV_STRING_H
 
+#include <asm/alternative-macros.h>
+#include <asm/errata_list.h>
 #include <linux/types.h>
 #include <linux/linkage.h>
 
@@ -21,6 +23,7 @@  extern asmlinkage void *__memmove(void *, const void *, size_t);
 
 #define __HAVE_ARCH_STRCMP
 extern asmlinkage int __strcmp_generic(const char *cs, const char *ct);
+extern asmlinkage int __strcmp_zbb(const char *cs, const char *ct);
 
 static inline int strcmp(const char *cs, const char *ct)
 {
@@ -31,10 +34,14 @@  static inline int strcmp(const char *cs, const char *ct)
 	register const char *a1 asm("a1") = ct;
 	register int a0_out asm("a0");
 
-	asm volatile("call __strcmp_generic\n\t"
+	asm volatile(
+		ALTERNATIVE(
+			"call __strcmp_generic\n\t",
+			"call __strcmp_zbb\n\t",
+			0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
 		: "=r"(a0_out)
 		: "r"(a0), "r"(a1)
-		: "ra", "t0", "t1", "t2");
+		: "ra", "t0", "t1", "t2", "t3", "t4", "t5");
 
 	return a0_out;
 #endif
@@ -43,6 +50,8 @@  static inline int strcmp(const char *cs, const char *ct)
 #define __HAVE_ARCH_STRNCMP
 extern asmlinkage int __strncmp_generic(const char *cs,
 					const char *ct, size_t count);
+extern asmlinkage int __strncmp_zbb(const char *cs,
+				    const char *ct, size_t count);
 
 static inline int strncmp(const char *cs, const char *ct, size_t count)
 {
@@ -54,10 +63,14 @@  static inline int strncmp(const char *cs, const char *ct, size_t count)
 	register size_t a2 asm("a2") = count;
 	register int a0_out asm("a0");
 
-	asm volatile("call __strncmp_generic\n\t"
+	asm volatile(
+		ALTERNATIVE(
+			"call __strncmp_generic\n\t",
+			"call __strncmp_zbb\n\t",
+			0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
 		: "=r"(a0_out)
 		: "r"(a0), "r"(a1), "r"(a2)
-		: "ra", "t0", "t1", "t2");
+		: "ra", "t0", "t1", "t2", "t3", "t4", "t5", "t6");
 
 	return a0_out;
 #endif
@@ -65,6 +78,7 @@  static inline int strncmp(const char *cs, const char *ct, size_t count)
 
 #define __HAVE_ARCH_STRLEN
 extern asmlinkage __kernel_size_t __strlen_generic(const char *);
+extern asmlinkage __kernel_size_t __strlen_zbb(const char *);
 
 static inline __kernel_size_t strlen(const char *s)
 {
@@ -75,10 +89,13 @@  static inline __kernel_size_t strlen(const char *s)
 	register int a0_out asm("a0");
 
 	asm volatile(
-		"call __strlen_generic\n\t"
+		ALTERNATIVE(
+			"call __strlen_generic\n\t",
+			"call __strlen_zbb\n\t",
+			0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
 		: "=r"(a0_out)
 		: "r"(a0)
-		: "ra", "t0", "t1");
+		: "ra", "t0", "t1", "t2", "t3");
 
 	return a0_out;
 #endif
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index bf9dd6764bad..66ff36a57e20 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -166,6 +166,7 @@  static struct riscv_isa_ext_data isa_ext_arr[] = {
 	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
 	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
 	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+	__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
 	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
 	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
 	__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 026512ca9c4c..f19b9d4e2dca 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -201,6 +201,7 @@  void __init riscv_fill_hwcap(void)
 			} else {
 				SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
 				SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
+				SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
 				SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
 				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
 				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
@@ -278,6 +279,20 @@  static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage)
 	return true;
 }
 
+static bool __init_or_module cpufeature_probe_zbb(unsigned int stage)
+{
+	if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB))
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
+		return false;
+
+	if (!riscv_isa_extension_available(NULL, ZBB))
+		return false;
+
+	return true;
+}
+
 /*
  * Probe presence of individual extensions.
  *
@@ -295,6 +310,9 @@  static u32 __init_or_module cpufeature_probe(unsigned int stage)
 	if (cpufeature_probe_zicbom(stage))
 		cpu_req_feature |= BIT(CPUFEATURE_ZICBOM);
 
+	if (cpufeature_probe_zbb(stage))
+		cpu_req_feature |= BIT(CPUFEATURE_ZBB);
+
 	return cpu_req_feature;
 }
 
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 6c74b0bedd60..b632483f851c 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -4,8 +4,11 @@  lib-y			+= memcpy.o
 lib-y			+= memset.o
 lib-y			+= memmove.o
 lib-y			+= strcmp.o
+lib-$(CONFIG_RISCV_ISA_ZBB) += strcmp_zbb.o
 lib-y			+= strlen.o
+lib-$(CONFIG_RISCV_ISA_ZBB) += strlen_zbb.o
 lib-y			+= strncmp.o
+lib-$(CONFIG_RISCV_ISA_ZBB) += strncmp_zbb.o
 lib-$(CONFIG_MMU)	+= uaccess.o
 lib-$(CONFIG_64BIT)	+= tishift.o
 
diff --git a/arch/riscv/lib/strcmp_zbb.S b/arch/riscv/lib/strcmp_zbb.S
new file mode 100644
index 000000000000..aff9b941d3ee
--- /dev/null
+++ b/arch/riscv/lib/strcmp_zbb.S
@@ -0,0 +1,91 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 VRULL GmbH
+ * Author: Christoph Muellner <christoph.muellner@vrull.eu>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm-generic/export.h>
+
+#define src1		a0
+#define result		a0
+#define src2		t5
+#define data1		t0
+#define data2		t1
+#define align		t2
+#define data1_orcb	t3
+#define m1		t4
+
+.option push
+.option arch,+zbb
+
+/* int __strcmp_zbb(const char *cs, const char *ct) */
+ENTRY(__strcmp_zbb)
+	/*
+	 * Returns
+	 *   a0 - comparison result, like strncmp
+	 *
+	 * Parameters
+	 *   a0 - string1
+	 *   a1 - string2
+	 *   a2 - number of characters to compare
+	 *
+	 * Clobbers
+	 *   t0, t1, t2, t3, t4, t5
+	 */
+	mv	src2, a1
+
+	or	align, src1, src2
+	li	m1, -1
+	and	align, align, SZREG-1
+	bnez	align, 3f
+	/* Main loop for aligned string.  */
+	.p2align 3
+1:
+	REG_L	data1, 0(src1)
+	REG_L	data2, 0(src2)
+	orc.b	data1_orcb, data1
+	bne	data1_orcb, m1, 2f
+	addi	src1, src1, SZREG
+	addi	src2, src2, SZREG
+	beq	data1, data2, 1b
+
+	/* Words don't match, and no null byte in the first
+	 * word. Get bytes in big-endian order and compare.  */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+	rev8	data1, data1
+	rev8	data2, data2
+#endif
+	/* Synthesize (data1 >= data2) ? 1 : -1 in a branchless sequence.  */
+	sltu	result, data1, data2
+	neg	result, result
+	ori	result, result, 1
+	ret
+
+2:
+	/* Found a null byte.
+	 * If words don't match, fall back to simple loop.  */
+	bne	data1, data2, 3f
+
+	/* Otherwise, strings are equal.  */
+	li	result, 0
+	ret
+
+	/* Simple loop for misaligned strings.  */
+	.p2align 3
+3:
+	lbu	data1, 0(src1)
+	lbu	data2, 0(src2)
+	addi	src1, src1, 1
+	addi	src2, src2, 1
+	bne	data1, data2, 4f
+	bnez	data1, 3b
+
+4:
+	sub	result, data1, data2
+	ret
+END(__strcmp_zbb)
+EXPORT_SYMBOL(__strcmp_zbb)
+
+.option pop
diff --git a/arch/riscv/lib/strlen_zbb.S b/arch/riscv/lib/strlen_zbb.S
new file mode 100644
index 000000000000..bc8d3607a32f
--- /dev/null
+++ b/arch/riscv/lib/strlen_zbb.S
@@ -0,0 +1,98 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 VRULL GmbH
+ * Author: Christoph Muellner <christoph.muellner@vrull.eu>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm-generic/export.h>
+
+#define src		a0
+#define result		a0
+#define addr		t0
+#define data		t1
+#define offset		t2
+#define offset_bits	t2
+#define valid_bytes	t3
+#define m1		t3
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+# define CZ	clz
+# define SHIFT	sll
+#else
+# define CZ	ctz
+# define SHIFT	srl
+#endif
+
+.option push
+.option arch,+zbb
+
+/* int __strlen_zbb(const char *s) */
+ENTRY(__strlen_zbb)
+	/*
+	 * Returns
+	 *   a0 - string length
+	 *
+	 * Parameters
+	 *   a0 - String to measure
+	 *
+	 * Clobbers
+	 *   t0, t1, t2, t3
+	 */
+
+	/* Number of irrelevant bytes in the first word.  */
+	andi	offset, src, SZREG-1
+	/* Align pointer.  */
+	andi	addr, src, -SZREG
+
+	li	valid_bytes, SZREG
+	sub	valid_bytes, valid_bytes, offset
+	slli	offset_bits, offset, RISCV_LGPTR
+
+	/* Get the first word.  */
+	REG_L	data, 0(addr)
+	/* Shift away the partial data we loaded to remove the irrelevant bytes
+	 * preceding the string with the effect of adding NUL bytes at the
+	 * end of the string.  */
+	SHIFT	data, data, offset_bits
+	/* Convert non-NUL into 0xff and NUL into 0x00.  */
+	orc.b	data, data
+	/* Convert non-NUL into 0x00 and NUL into 0xff.  */
+	not	data, data
+	/* Search for the first set bit (corresponding to a NUL byte in the
+	 * original chunk).  */
+	CZ	data, data
+	/* The first chunk is special: commpare against the number
+	 * of valid bytes in this chunk.  */
+	srli	result, data, 3
+	bgtu	valid_bytes, result, 3f
+
+	/* Prepare for the word comparison loop.  */
+	addi	offset, addr, SZREG
+	li	m1, -1
+
+	/* Our critical loop is 4 instructions and processes data in
+	 * 4 byte or 8 byte chunks.  */
+	.p2align 3
+1:
+	REG_L	data, SZREG(addr)
+	addi	addr, addr, SZREG
+	orc.b	data, data
+	beq	data, m1, 1b
+2:
+	not	data, data
+	CZ	data, data
+	/* Get number of processed words.  */
+	sub	offset, addr, offset
+	/* Add number of characters in the first word.  */
+	add	result, result, offset
+	srli	data, data, 3
+	/* Add number of characters in the last word.  */
+	add	result, result, data
+3:
+	ret
+END(__strlen_zbb)
+EXPORT_SYMBOL(__strlen_zbb)
+
+.option pop
diff --git a/arch/riscv/lib/strncmp_zbb.S b/arch/riscv/lib/strncmp_zbb.S
new file mode 100644
index 000000000000..852c8425d238
--- /dev/null
+++ b/arch/riscv/lib/strncmp_zbb.S
@@ -0,0 +1,106 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 VRULL GmbH
+ * Author: Christoph Muellner <christoph.muellner@vrull.eu>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm-generic/export.h>
+
+#define src1		a0
+#define result		a0
+#define src2		t6
+#define len		a2
+#define data1		t0
+#define data2		t1
+#define align		t2
+#define data1_orcb	t3
+#define limit		t4
+#define m1		t5
+
+.option push
+.option arch,+zbb
+
+/* int __strncmp_zbb(const char *cs, const char *ct, size_t count) */
+ENTRY(__strncmp_zbb)
+	/*
+	 * Returns
+	 *   a0 - comparison result, like strncmp
+	 *
+	 * Parameters
+	 *   a0 - string1
+	 *   a1 - string2
+	 *   a2 - number of characters to compare
+	 *
+	 * Clobbers
+	 *   t0, t1, t2, t3, t4, t5, t6
+	 */
+	mv	src2, a1
+
+	or	align, src1, src2
+	li	m1, -1
+	and	align, align, SZREG-1
+	add	limit, src1, len
+	bnez	align, 4f
+
+	/* Adjust limit for fast-path.  */
+	addi	limit, limit, -SZREG
+	/* Main loop for aligned string.  */
+	.p2align 3
+1:
+	bgt	src1, limit, 3f
+	REG_L	data1, 0(src1)
+	REG_L	data2, 0(src2)
+	orc.b	data1_orcb, data1
+	bne	data1_orcb, m1, 2f
+	addi	src1, src1, SZREG
+	addi	src2, src2, SZREG
+	beq	data1, data2, 1b
+
+	/* Words don't match, and no null byte in the first
+	 * word. Get bytes in big-endian order and compare.  */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+	rev8	data1, data1
+	rev8	data2, data2
+#endif
+	/* Synthesize (data1 >= data2) ? 1 : -1 in a branchless sequence.  */
+	sltu	result, data1, data2
+	neg	result, result
+	ori	result, result, 1
+	ret
+
+2:
+	/* Found a null byte.
+	 * If words don't match, fall back to simple loop.  */
+	bne	data1, data2, 3f
+
+	/* Otherwise, strings are equal.  */
+	li	result, 0
+	ret
+
+	/* Simple loop for misaligned strings.  */
+3:
+	/* Restore limit for slow-path.  */
+	addi	limit, limit, SZREG
+	.p2align 3
+4:
+	bge	src1, limit, 6f
+	lbu	data1, 0(src1)
+	lbu	data2, 0(src2)
+	addi	src1, src1, 1
+	addi	src2, src2, 1
+	bne	data1, data2, 5f
+	bnez	data1, 4b
+
+5:
+	sub	result, data1, data2
+	ret
+
+6:
+	li	result, 0
+	ret
+END(__strncmp_zbb)
+EXPORT_SYMBOL(__strncmp_zbb)
+
+.option pop