diff mbox series

[1/5] riscv: Checksum header

Message ID 20230826-optimize_checksum-v1-1-937501b4522a@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series riscv: Add fine-tuned checksum functions | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next at HEAD 9f944d2e0ab3
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 4 and now 4
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig fail Failed to build the tree with this patch.
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig fail Failed to build the tree with this patch.
conchuod/build_rv32_defconfig success Build OK
conchuod/dtb_warn_rv64 success Errors and warnings before: 12 this patch: 12
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch warning CHECK: extern prototypes should be avoided in .h files WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
conchuod/build_rv64_nommu_k210_defconfig fail Build failed
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig fail Build failed

Commit Message

Charlie Jenkins Aug. 27, 2023, 1:26 a.m. UTC
Provide checksum algorithms that have been designed to leverage riscv
instructions such as rotate. In 64-bit, can take advantage of the larger
register to avoid some overflow checking.

Add configuration for Zba extension and add march for Zba and Zbb.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 arch/riscv/Kconfig                | 23 +++++++++++
 arch/riscv/Makefile               |  2 +
 arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+)

Comments

Conor Dooley Aug. 27, 2023, 1:42 a.m. UTC | #1
On Sat, Aug 26, 2023 at 06:26:06PM -0700, Charlie Jenkins wrote:
> Provide checksum algorithms that have been designed to leverage riscv
> instructions such as rotate. In 64-bit, can take advantage of the larger
> register to avoid some overflow checking.
> 
> Add configuration for Zba extension and add march for Zba and Zbb.
> 
> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> ---
>  arch/riscv/Kconfig                | 23 +++++++++++
>  arch/riscv/Makefile               |  2 +
>  arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 111 insertions(+)
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 4c07b9189c86..8d7e475ca28d 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
>  
>  	  If you don't know what to do here, say Y.
>  
> +config TOOLCHAIN_HAS_ZBA
> +	bool
> +	default y
> +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
> +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
> +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
> +	depends on AS_HAS_OPTION_ARCH
> +
> +config RISCV_ISA_ZBA
> +	bool "Zba extension support for bit manipulation instructions"
> +	depends on TOOLCHAIN_HAS_ZBA
> +	depends on MMU
> +	depends on RISCV_ALTERNATIVE
> +	default y
> +	help
> +	   Adds support to dynamically detect the presence of the ZBA
> +	   extension (basic bit manipulation) and enable its usage.
> +
> +	   The Zba extension provides instructions to accelerate a number
> +	   of bit-specific address creation operations.
> +
> +	   If you don't know what to do here, say Y.
> +
>  config TOOLCHAIN_HAS_ZBB
>  	bool
>  	default y
> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> index 6ec6d52a4180..51fa3f67fc9a 100644
> --- a/arch/riscv/Makefile
> +++ b/arch/riscv/Makefile
> @@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
>  riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
>  riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
>  riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
> +riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
> +riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb

AFAICT, this is going to break immediately on any system that enables
RISCV_ISA_ZBA (which will happen by default) but does not support the
extension. You made the option depend on RISCV_ALTERNATIVE, but I do
not see any use of alternatives in the code to actually perform the
dynamic detection of Zba.
Note that for fd & v, we add it to riscv-march-y, but then immediately
remove it again before passing to the compiler, only allow them in
AFLAGS:
	# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
	# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
	KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')

What am I missing?

Thanks,
Conor.
Palmer Dabbelt Aug. 27, 2023, 2 a.m. UTC | #2
On Sat, 26 Aug 2023 18:42:41 PDT (-0700), Conor Dooley wrote:
> On Sat, Aug 26, 2023 at 06:26:06PM -0700, Charlie Jenkins wrote:
>> Provide checksum algorithms that have been designed to leverage riscv
>> instructions such as rotate. In 64-bit, can take advantage of the larger
>> register to avoid some overflow checking.
>> 
>> Add configuration for Zba extension and add march for Zba and Zbb.
>> 
>> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
>> ---
>>  arch/riscv/Kconfig                | 23 +++++++++++
>>  arch/riscv/Makefile               |  2 +
>>  arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 111 insertions(+)
>> 
>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>> index 4c07b9189c86..8d7e475ca28d 100644
>> --- a/arch/riscv/Kconfig
>> +++ b/arch/riscv/Kconfig
>> @@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
>>  
>>  	  If you don't know what to do here, say Y.
>>  
>> +config TOOLCHAIN_HAS_ZBA
>> +	bool
>> +	default y
>> +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
>> +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
>> +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
>> +	depends on AS_HAS_OPTION_ARCH
>> +
>> +config RISCV_ISA_ZBA
>> +	bool "Zba extension support for bit manipulation instructions"
>> +	depends on TOOLCHAIN_HAS_ZBA
>> +	depends on MMU
>> +	depends on RISCV_ALTERNATIVE
>> +	default y
>> +	help
>> +	   Adds support to dynamically detect the presence of the ZBA
>> +	   extension (basic bit manipulation) and enable its usage.
>> +
>> +	   The Zba extension provides instructions to accelerate a number
>> +	   of bit-specific address creation operations.
>> +
>> +	   If you don't know what to do here, say Y.
>> +
>>  config TOOLCHAIN_HAS_ZBB
>>  	bool
>>  	default y
>> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
>> index 6ec6d52a4180..51fa3f67fc9a 100644
>> --- a/arch/riscv/Makefile
>> +++ b/arch/riscv/Makefile
>> @@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
>>  riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
>>  riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
>>  riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
>> +riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
>> +riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb
>
> AFAICT, this is going to break immediately on any system that enables
> RISCV_ISA_ZBA (which will happen by default) but does not support the
> extension. You made the option depend on RISCV_ALTERNATIVE, but I do
> not see any use of alternatives in the code to actually perform the
> dynamic detection of Zba.

I guess we kind of have an ambiguity here: for stuff like C we just 
unconditionally use the instructions, but for the rest we probe first.  
We should probably have three states for each extension: disabled, 
dynamically detected, and assumed.

> Note that for fd & v, we add it to riscv-march-y, but then immediately
> remove it again before passing to the compiler, only allow them in
> AFLAGS:
> 	# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> 	# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> 	KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
>
> What am I missing?

FD and V both have state that can be saved lazily, so we can't let 
arbitrary code use them.  The extensions formally known as B don't add 
state, so they are safe to flip on in arbitrary places (aside from the 
issues you pointed out above).

>
> Thanks,
> Conor.
Conor Dooley Aug. 27, 2023, 10:28 a.m. UTC | #3
On Sat, Aug 26, 2023 at 07:00:47PM -0700, Palmer Dabbelt wrote:
> On Sat, 26 Aug 2023 18:42:41 PDT (-0700), Conor Dooley wrote:
> > On Sat, Aug 26, 2023 at 06:26:06PM -0700, Charlie Jenkins wrote:
> > > Provide checksum algorithms that have been designed to leverage riscv
> > > instructions such as rotate. In 64-bit, can take advantage of the larger
> > > register to avoid some overflow checking.
> > > 
> > > Add configuration for Zba extension and add march for Zba and Zbb.
> > > 
> > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > ---
> > >  arch/riscv/Kconfig                | 23 +++++++++++
> > >  arch/riscv/Makefile               |  2 +
> > >  arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 111 insertions(+)
> > > 
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index 4c07b9189c86..8d7e475ca28d 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
> > >  	  If you don't know what to do here, say Y.
> > > +config TOOLCHAIN_HAS_ZBA
> > > +	bool
> > > +	default y
> > > +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
> > > +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
> > > +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
> > > +	depends on AS_HAS_OPTION_ARCH
> > > +
> > > +config RISCV_ISA_ZBA
> > > +	bool "Zba extension support for bit manipulation instructions"
> > > +	depends on TOOLCHAIN_HAS_ZBA
> > > +	depends on MMU
> > > +	depends on RISCV_ALTERNATIVE
> > > +	default y
> > > +	help
> > > +	   Adds support to dynamically detect the presence of the ZBA
> > > +	   extension (basic bit manipulation) and enable its usage.
> > > +
> > > +	   The Zba extension provides instructions to accelerate a number
> > > +	   of bit-specific address creation operations.
> > > +
> > > +	   If you don't know what to do here, say Y.
> > > +
> > >  config TOOLCHAIN_HAS_ZBB
> > >  	bool
> > >  	default y
> > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > index 6ec6d52a4180..51fa3f67fc9a 100644
> > > --- a/arch/riscv/Makefile
> > > +++ b/arch/riscv/Makefile
> > > @@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
> > >  riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
> > >  riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
> > >  riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
> > > +riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
> > > +riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb
> > 
> > AFAICT, this is going to break immediately on any system that enables
> > RISCV_ISA_ZBA (which will happen by default) but does not support the
> > extension. You made the option depend on RISCV_ALTERNATIVE, but I do
> > not see any use of alternatives in the code to actually perform the
> > dynamic detection of Zba.
> 
> I guess we kind of have an ambiguity here: for stuff like C we just
> unconditionally use the instructions, but for the rest we probe first.  We
> should probably have three states for each extension: disabled, dynamically
> detected, and assumed.

You mean, just add some comments to the makefile surrounding each
section or to some rst documentation?

> > Note that for fd & v, we add it to riscv-march-y, but then immediately
> > remove it again before passing to the compiler, only allow them in
> > AFLAGS:
> > 	# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > 	# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > 	KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > 
> > What am I missing?
> 
> FD and V both have state that can be saved lazily, so we can't let arbitrary
> code use them.  The extensions formally known as B don't add state, so they
> are safe to flip on in arbitrary places (aside from the issues you pointed
> out above).

I probably went about this badly since you missed the point. I was
trying to point out that for anything other than the compressed
extensions in the block above that we only pass them in march to the
assembler, and not to the compiler, in contrast to this patch which just
always passes them. I should have pointed to how we handled the
in-kernel Zbb stuff & asked how this was any different, would probably
have been clearer.
Conor Dooley Aug. 27, 2023, 12:25 p.m. UTC | #4
On Sun, Aug 27, 2023 at 11:28:33AM +0100, Conor Dooley wrote:
> On Sat, Aug 26, 2023 at 07:00:47PM -0700, Palmer Dabbelt wrote:
> > On Sat, 26 Aug 2023 18:42:41 PDT (-0700), Conor Dooley wrote:
> > > On Sat, Aug 26, 2023 at 06:26:06PM -0700, Charlie Jenkins wrote:
> > > > Provide checksum algorithms that have been designed to leverage riscv
> > > > instructions such as rotate. In 64-bit, can take advantage of the larger
> > > > register to avoid some overflow checking.
> > > > 
> > > > Add configuration for Zba extension and add march for Zba and Zbb.
> > > > 
> > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > > ---
> > > >  arch/riscv/Kconfig                | 23 +++++++++++
> > > >  arch/riscv/Makefile               |  2 +
> > > >  arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
> > > >  3 files changed, 111 insertions(+)
> > > > 
> > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > index 4c07b9189c86..8d7e475ca28d 100644
> > > > --- a/arch/riscv/Kconfig
> > > > +++ b/arch/riscv/Kconfig
> > > > @@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
> > > >  	  If you don't know what to do here, say Y.
> > > > +config TOOLCHAIN_HAS_ZBA
> > > > +	bool
> > > > +	default y
> > > > +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
> > > > +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
> > > > +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
> > > > +	depends on AS_HAS_OPTION_ARCH
> > > > +
> > > > +config RISCV_ISA_ZBA
> > > > +	bool "Zba extension support for bit manipulation instructions"
> > > > +	depends on TOOLCHAIN_HAS_ZBA
> > > > +	depends on MMU
> > > > +	depends on RISCV_ALTERNATIVE
> > > > +	default y
> > > > +	help
> > > > +	   Adds support to dynamically detect the presence of the ZBA
> > > > +	   extension (basic bit manipulation) and enable its usage.
> > > > +
> > > > +	   The Zba extension provides instructions to accelerate a number
> > > > +	   of bit-specific address creation operations.
> > > > +
> > > > +	   If you don't know what to do here, say Y.
> > > > +
> > > >  config TOOLCHAIN_HAS_ZBB
> > > >  	bool
> > > >  	default y
> > > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > > index 6ec6d52a4180..51fa3f67fc9a 100644
> > > > --- a/arch/riscv/Makefile
> > > > +++ b/arch/riscv/Makefile
> > > > @@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
> > > >  riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
> > > >  riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
> > > >  riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
> > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
> > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb
> > > 
> > > AFAICT, this is going to break immediately on any system that enables
> > > RISCV_ISA_ZBA (which will happen by default) but does not support the
> > > extension. You made the option depend on RISCV_ALTERNATIVE, but I do
> > > not see any use of alternatives in the code to actually perform the
> > > dynamic detection of Zba.
> > 
> > I guess we kind of have an ambiguity here: for stuff like C we just
> > unconditionally use the instructions, but for the rest we probe first.  We
> > should probably have three states for each extension: disabled, dynamically
> > detected, and assumed.
> 
> You mean, just add some comments to the makefile surrounding each
> section or to some rst documentation?

Also, the code here doesn't build w/
	warning: invalid argument to '-march': '_zba_zbb_zicsr_zifencei_zihintpause'
so there's something else wrong with TOOLCHAIN_HAS_ZBA :)

> 
> > > Note that for fd & v, we add it to riscv-march-y, but then immediately
> > > remove it again before passing to the compiler, only allow them in
> > > AFLAGS:
> > > 	# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > > 	# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > > 	KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > > 
> > > What am I missing?
> > 
> > FD and V both have state that can be saved lazily, so we can't let arbitrary
> > code use them.  The extensions formally known as B don't add state, so they
> > are safe to flip on in arbitrary places (aside from the issues you pointed
> > out above).
> 
> I probably went about this badly since you missed the point. I was
> trying to point out that for anything other than the compressed
> extensions in the block above that we only pass them in march to the
> assembler, and not to the compiler, in contrast to this patch which just
> always passes them. I should have pointed to how we handled the
> in-kernel Zbb stuff & asked how this was any different, would probably
> have been clearer.
> 



> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Charlie Jenkins Aug. 28, 2023, 4:55 p.m. UTC | #5
On Sun, Aug 27, 2023 at 01:25:27PM +0100, Conor Dooley wrote:
> On Sun, Aug 27, 2023 at 11:28:33AM +0100, Conor Dooley wrote:
> > On Sat, Aug 26, 2023 at 07:00:47PM -0700, Palmer Dabbelt wrote:
> > > On Sat, 26 Aug 2023 18:42:41 PDT (-0700), Conor Dooley wrote:
> > > > On Sat, Aug 26, 2023 at 06:26:06PM -0700, Charlie Jenkins wrote:
> > > > > Provide checksum algorithms that have been designed to leverage riscv
> > > > > instructions such as rotate. In 64-bit, can take advantage of the larger
> > > > > register to avoid some overflow checking.
> > > > > 
> > > > > Add configuration for Zba extension and add march for Zba and Zbb.
> > > > > 
> > > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > > > ---
> > > > >  arch/riscv/Kconfig                | 23 +++++++++++
> > > > >  arch/riscv/Makefile               |  2 +
> > > > >  arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
> > > > >  3 files changed, 111 insertions(+)
> > > > > 
> > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > index 4c07b9189c86..8d7e475ca28d 100644
> > > > > --- a/arch/riscv/Kconfig
> > > > > +++ b/arch/riscv/Kconfig
> > > > > @@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
> > > > >  	  If you don't know what to do here, say Y.
> > > > > +config TOOLCHAIN_HAS_ZBA
> > > > > +	bool
> > > > > +	default y
> > > > > +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
> > > > > +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
> > > > > +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
> > > > > +	depends on AS_HAS_OPTION_ARCH
> > > > > +
> > > > > +config RISCV_ISA_ZBA
> > > > > +	bool "Zba extension support for bit manipulation instructions"
> > > > > +	depends on TOOLCHAIN_HAS_ZBA
> > > > > +	depends on MMU
> > > > > +	depends on RISCV_ALTERNATIVE
> > > > > +	default y
> > > > > +	help
> > > > > +	   Adds support to dynamically detect the presence of the ZBA
> > > > > +	   extension (basic bit manipulation) and enable its usage.
> > > > > +
> > > > > +	   The Zba extension provides instructions to accelerate a number
> > > > > +	   of bit-specific address creation operations.
> > > > > +
> > > > > +	   If you don't know what to do here, say Y.
> > > > > +
> > > > >  config TOOLCHAIN_HAS_ZBB
> > > > >  	bool
> > > > >  	default y
> > > > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > > > index 6ec6d52a4180..51fa3f67fc9a 100644
> > > > > --- a/arch/riscv/Makefile
> > > > > +++ b/arch/riscv/Makefile
> > > > > @@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
> > > > >  riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
> > > > >  riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
> > > > >  riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
> > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
> > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb
> > > > 
> > > > AFAICT, this is going to break immediately on any system that enables
> > > > RISCV_ISA_ZBA (which will happen by default) but does not support the
> > > > extension. You made the option depend on RISCV_ALTERNATIVE, but I do
> > > > not see any use of alternatives in the code to actually perform the
> > > > dynamic detection of Zba.
> > > 
> > > I guess we kind of have an ambiguity here: for stuff like C we just
> > > unconditionally use the instructions, but for the rest we probe first.  We
> > > should probably have three states for each extension: disabled, dynamically
> > > detected, and assumed.
> > 
> > You mean, just add some comments to the makefile surrounding each
> > section or to some rst documentation?
> 
> Also, the code here doesn't build w/
> 	warning: invalid argument to '-march': '_zba_zbb_zicsr_zifencei_zihintpause'
> so there's something else wrong with TOOLCHAIN_HAS_ZBA :)
It is odd that this is missing 'rv64ima' or 'rv32ima' at the beginning of
this string. What configuration are you using that could cause that to
be left off?

Compiling with defconfig automatically enables Zba and appears to not
cause this issue. I realized that I put the header definitions for
do_csum and csum_ipv6_magic in this patch instead of the next one so the
code will fail to compile from this but not due to march settings.
> 
> > 
> > > > Note that for fd & v, we add it to riscv-march-y, but then immediately
> > > > remove it again before passing to the compiler, only allow them in
> > > > AFLAGS:
> > > > 	# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > > > 	# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > > > 	KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > > > 
> > > > What am I missing?
> > > 
> > > FD and V both have state that can be saved lazily, so we can't let arbitrary
> > > code use them.  The extensions formally known as B don't add state, so they
> > > are safe to flip on in arbitrary places (aside from the issues you pointed
> > > out above).
> > 
> > I probably went about this badly since you missed the point. I was
> > trying to point out that for anything other than the compressed
> > extensions in the block above that we only pass them in march to the
> > assembler, and not to the compiler, in contrast to this patch which just
> > always passes them. I should have pointed to how we handled the
> > in-kernel Zbb stuff & asked how this was any different, would probably
> > have been clearer.
> > 
I supposed it might be better if I submit these changes in a different
patch so we can have more discussion there. Zbb was previously only used
by assembly files (arch/riscv/lib/strcmp.S, arch/riscv/lib/strlen.S,
arch/riscv/lib/strncmp.S). I wanted to add them to the compiler so that
that C programs could leverage these extensions. However, I neglected to
consider machines that compile the kernel with these extensions but have
cores without these extensions. The purpose of using these extensions is
to save a couple of clock cycles, so if it is necessary to first
check if the extension is enabled it may not be worth it for these
functions.

> 
> 
> 
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
>
Conor Dooley Aug. 28, 2023, 5:08 p.m. UTC | #6
On Mon, Aug 28, 2023 at 09:55:49AM -0700, Charlie Jenkins wrote:
> On Sun, Aug 27, 2023 at 01:25:27PM +0100, Conor Dooley wrote:
> > On Sun, Aug 27, 2023 at 11:28:33AM +0100, Conor Dooley wrote:
> > > On Sat, Aug 26, 2023 at 07:00:47PM -0700, Palmer Dabbelt wrote:
> > > > On Sat, 26 Aug 2023 18:42:41 PDT (-0700), Conor Dooley wrote:
> > > > > On Sat, Aug 26, 2023 at 06:26:06PM -0700, Charlie Jenkins wrote:
> > > > > > Provide checksum algorithms that have been designed to leverage riscv
> > > > > > instructions such as rotate. In 64-bit, can take advantage of the larger
> > > > > > register to avoid some overflow checking.
> > > > > > 
> > > > > > Add configuration for Zba extension and add march for Zba and Zbb.
> > > > > > 
> > > > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > > > > ---
> > > > > >  arch/riscv/Kconfig                | 23 +++++++++++
> > > > > >  arch/riscv/Makefile               |  2 +
> > > > > >  arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
> > > > > >  3 files changed, 111 insertions(+)
> > > > > > 
> > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > > index 4c07b9189c86..8d7e475ca28d 100644
> > > > > > --- a/arch/riscv/Kconfig
> > > > > > +++ b/arch/riscv/Kconfig
> > > > > > @@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
> > > > > >  	  If you don't know what to do here, say Y.
> > > > > > +config TOOLCHAIN_HAS_ZBA
> > > > > > +	bool
> > > > > > +	default y
> > > > > > +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
> > > > > > +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
> > > > > > +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
> > > > > > +	depends on AS_HAS_OPTION_ARCH
> > > > > > +
> > > > > > +config RISCV_ISA_ZBA
> > > > > > +	bool "Zba extension support for bit manipulation instructions"
> > > > > > +	depends on TOOLCHAIN_HAS_ZBA
> > > > > > +	depends on MMU
> > > > > > +	depends on RISCV_ALTERNATIVE
> > > > > > +	default y
> > > > > > +	help
> > > > > > +	   Adds support to dynamically detect the presence of the ZBA
> > > > > > +	   extension (basic bit manipulation) and enable its usage.
> > > > > > +
> > > > > > +	   The Zba extension provides instructions to accelerate a number
> > > > > > +	   of bit-specific address creation operations.
> > > > > > +
> > > > > > +	   If you don't know what to do here, say Y.
> > > > > > +
> > > > > >  config TOOLCHAIN_HAS_ZBB
> > > > > >  	bool
> > > > > >  	default y
> > > > > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > > > > index 6ec6d52a4180..51fa3f67fc9a 100644
> > > > > > --- a/arch/riscv/Makefile
> > > > > > +++ b/arch/riscv/Makefile
> > > > > > @@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
> > > > > >  riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
> > > > > >  riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
> > > > > >  riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
> > > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
> > > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb
> > > > > 
> > > > > AFAICT, this is going to break immediately on any system that enables
> > > > > RISCV_ISA_ZBA (which will happen by default) but does not support the
> > > > > extension. You made the option depend on RISCV_ALTERNATIVE, but I do
> > > > > not see any use of alternatives in the code to actually perform the
> > > > > dynamic detection of Zba.
> > > > 
> > > > I guess we kind of have an ambiguity here: for stuff like C we just
> > > > unconditionally use the instructions, but for the rest we probe first.  We
> > > > should probably have three states for each extension: disabled, dynamically
> > > > detected, and assumed.
> > > 
> > > You mean, just add some comments to the makefile surrounding each
> > > section or to some rst documentation?
> > 
> > Also, the code here doesn't build w/
> > 	warning: invalid argument to '-march': '_zba_zbb_zicsr_zifencei_zihintpause'
> > so there's something else wrong with TOOLCHAIN_HAS_ZBA :)
> It is odd that this is missing 'rv64ima' or 'rv32ima' at the beginning of
> this string. What configuration are you using that could cause that to
> be left off?

I don't know, but that configuration is pretty pervasive. The patchwork
CI blew up too & that is using kernel.org toolchains built by Arnd:
https://mirrors.edge.kernel.org/pub/tools/crosstool/

> Compiling with defconfig automatically enables Zba and appears to not
> cause this issue. I realized that I put the header definitions for
> do_csum and csum_ipv6_magic in this patch instead of the next one so the
> code will fail to compile from this but not due to march settings.
> > 
> > > 
> > > > > Note that for fd & v, we add it to riscv-march-y, but then immediately
> > > > > remove it again before passing to the compiler, only allow them in
> > > > > AFLAGS:
> > > > > 	# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > > > > 	# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > > > > 	KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > > > > 
> > > > > What am I missing?
> > > > 
> > > > FD and V both have state that can be saved lazily, so we can't let arbitrary
> > > > code use them.  The extensions formally known as B don't add state, so they
> > > > are safe to flip on in arbitrary places (aside from the issues you pointed
> > > > out above).
> > > 
> > > I probably went about this badly since you missed the point. I was
> > > trying to point out that for anything other than the compressed
> > > extensions in the block above that we only pass them in march to the
> > > assembler, and not to the compiler, in contrast to this patch which just
> > > always passes them. I should have pointed to how we handled the
> > > in-kernel Zbb stuff & asked how this was any different, would probably
> > > have been clearer.
> > > 
> I supposed it might be better if I submit these changes in a different
> patch so we can have more discussion there. Zbb was previously only used
> by assembly files (arch/riscv/lib/strcmp.S, arch/riscv/lib/strlen.S,
> arch/riscv/lib/strncmp.S). I wanted to add them to the compiler so that
> that C programs could leverage these extensions. However, I neglected to
> consider machines that compile the kernel with these extensions but have
> cores without these extensions.

Less so cores, since we don't support heterogeneous stuff, and moreso
platforms that do not support the extensions. It's expected that the
same kernel could in theory be used across a wide variety of systems.

> The purpose of using these extensions is
> to save a couple of clock cycles, so if it is necessary to first
> check if the extension is enabled it may not be worth it for these
> functions.

That's still possible, it's what the alternatives mechanism exists for.
During boot the codepaths are patched to use what works for a given
machine, check out the code that makes use of Zbb or
riscv_has_extension_[un]likely(). You'd need to do something like the
existing users of Zbb instructions does, with an alternative used to
avoid the custom asm implementations when the hardware does not support
them. (That's what the CONFIG_ALTERNATIVE & CONFIG_AS_HAS_OPTION_ARCH
options you made the TOOLCHAIN_HAS_ZBA depend on are for).
Charlie Jenkins Aug. 28, 2023, 6:20 p.m. UTC | #7
On Mon, Aug 28, 2023 at 06:08:40PM +0100, Conor Dooley wrote:
> On Mon, Aug 28, 2023 at 09:55:49AM -0700, Charlie Jenkins wrote:
> > On Sun, Aug 27, 2023 at 01:25:27PM +0100, Conor Dooley wrote:
> > > On Sun, Aug 27, 2023 at 11:28:33AM +0100, Conor Dooley wrote:
> > > > On Sat, Aug 26, 2023 at 07:00:47PM -0700, Palmer Dabbelt wrote:
> > > > > On Sat, 26 Aug 2023 18:42:41 PDT (-0700), Conor Dooley wrote:
> > > > > > On Sat, Aug 26, 2023 at 06:26:06PM -0700, Charlie Jenkins wrote:
> > > > > > > Provide checksum algorithms that have been designed to leverage riscv
> > > > > > > instructions such as rotate. In 64-bit, can take advantage of the larger
> > > > > > > register to avoid some overflow checking.
> > > > > > > 
> > > > > > > Add configuration for Zba extension and add march for Zba and Zbb.
> > > > > > > 
> > > > > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > > > > > ---
> > > > > > >  arch/riscv/Kconfig                | 23 +++++++++++
> > > > > > >  arch/riscv/Makefile               |  2 +
> > > > > > >  arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
> > > > > > >  3 files changed, 111 insertions(+)
> > > > > > > 
> > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > > > index 4c07b9189c86..8d7e475ca28d 100644
> > > > > > > --- a/arch/riscv/Kconfig
> > > > > > > +++ b/arch/riscv/Kconfig
> > > > > > > @@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
> > > > > > >  	  If you don't know what to do here, say Y.
> > > > > > > +config TOOLCHAIN_HAS_ZBA
> > > > > > > +	bool
> > > > > > > +	default y
> > > > > > > +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
> > > > > > > +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
> > > > > > > +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
> > > > > > > +	depends on AS_HAS_OPTION_ARCH
> > > > > > > +
> > > > > > > +config RISCV_ISA_ZBA
> > > > > > > +	bool "Zba extension support for bit manipulation instructions"
> > > > > > > +	depends on TOOLCHAIN_HAS_ZBA
> > > > > > > +	depends on MMU
> > > > > > > +	depends on RISCV_ALTERNATIVE
> > > > > > > +	default y
> > > > > > > +	help
> > > > > > > +	   Adds support to dynamically detect the presence of the ZBA
> > > > > > > +	   extension (basic bit manipulation) and enable its usage.
> > > > > > > +
> > > > > > > +	   The Zba extension provides instructions to accelerate a number
> > > > > > > +	   of bit-specific address creation operations.
> > > > > > > +
> > > > > > > +	   If you don't know what to do here, say Y.
> > > > > > > +
> > > > > > >  config TOOLCHAIN_HAS_ZBB
> > > > > > >  	bool
> > > > > > >  	default y
> > > > > > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > > > > > index 6ec6d52a4180..51fa3f67fc9a 100644
> > > > > > > --- a/arch/riscv/Makefile
> > > > > > > +++ b/arch/riscv/Makefile
> > > > > > > @@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
> > > > > > >  riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
> > > > > > >  riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
> > > > > > >  riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
> > > > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
> > > > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb
> > > > > > 
> > > > > > AFAICT, this is going to break immediately on any system that enables
> > > > > > RISCV_ISA_ZBA (which will happen by default) but does not support the
> > > > > > extension. You made the option depend on RISCV_ALTERNATIVE, but I do
> > > > > > not see any use of alternatives in the code to actually perform the
> > > > > > dynamic detection of Zba.
> > > > > 
> > > > > I guess we kind of have an ambiguity here: for stuff like C we just
> > > > > unconditionally use the instructions, but for the rest we probe first.  We
> > > > > should probably have three states for each extension: disabled, dynamically
> > > > > detected, and assumed.
> > > > 
> > > > You mean, just add some comments to the makefile surrounding each
> > > > section or to some rst documentation?
> > > 
> > > Also, the code here doesn't build w/
> > > 	warning: invalid argument to '-march': '_zba_zbb_zicsr_zifencei_zihintpause'
> > > so there's something else wrong with TOOLCHAIN_HAS_ZBA :)
> > It is odd that this is missing 'rv64ima' or 'rv32ima' at the beginning of
> > this string. What configuration are you using that could cause that to
> > be left off?
> 
> I don't know, but that configuration is pretty pervasive. The patchwork
> CI blew up too & that is using kernel.org toolchains built by Arnd:
> https://mirrors.edge.kernel.org/pub/tools/crosstool/
> 
> > Compiling with defconfig automatically enables Zba and appears to not
> > cause this issue. I realized that I put the header definitions for
> > do_csum and csum_ipv6_magic in this patch instead of the next one so the
> > code will fail to compile from this but not due to march settings.
> > > 
> > > > 
> > > > > > Note that for fd & v, we add it to riscv-march-y, but then immediately
> > > > > > remove it again before passing to the compiler, only allow them in
> > > > > > AFLAGS:
> > > > > > 	# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > > > > > 	# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > > > > > 	KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > > > > > 
> > > > > > What am I missing?
> > > > > 
> > > > > FD and V both have state that can be saved lazily, so we can't let arbitrary
> > > > > code use them.  The extensions formally known as B don't add state, so they
> > > > > are safe to flip on in arbitrary places (aside from the issues you pointed
> > > > > out above).
> > > > 
> > > > I probably went about this badly since you missed the point. I was
> > > > trying to point out that for anything other than the compressed
> > > > extensions in the block above that we only pass them in march to the
> > > > assembler, and not to the compiler, in contrast to this patch which just
> > > > always passes them. I should have pointed to how we handled the
> > > > in-kernel Zbb stuff & asked how this was any different, would probably
> > > > have been clearer.
> > > > 
> > I supposed it might be better if I submit these changes in a different
> > patch so we can have more discussion there. Zbb was previously only used
> > by assembly files (arch/riscv/lib/strcmp.S, arch/riscv/lib/strlen.S,
> > arch/riscv/lib/strncmp.S). I wanted to add them to the compiler so that
> > that C programs could leverage these extensions. However, I neglected to
> > consider machines that compile the kernel with these extensions but have
> > cores without these extensions.
> 
> Less so cores, since we don't support heterogeneous stuff, and moreso
> platforms that do not support the extensions. It's expected that the
> same kernel could in theory be used across a wide variety of systems.
> 
> > The purpose of using these extensions is
> > to save a couple of clock cycles, so if it is necessary to first
> > check if the extension is enabled it may not be worth it for these
> > functions.
> 
> That's still possible, it's what the alternatives mechanism exists for.
> During boot the codepaths are patched to use what works for a given
> machine, check out the code that makes use of Zbb or
> riscv_has_extension_[un]likely(). You'd need to do something like the
> existing users of Zbb instructions does, with an alternative used to
> avoid the custom asm implementations when the hardware does not support
> them. (That's what the CONFIG_ALTERNATIVE & CONFIG_AS_HAS_OPTION_ARCH
> options you made the TOOLCHAIN_HAS_ZBA depend on are for).

I can see how to get this to work if I port this code into assembly and
write two different versions (one with Zbb and one without), but
I don't see how this would work in C. Unless I am mistaken, there would
need to be some sort of wrapper around the C code that told the compiler
to compile it multiple times for different extension combinations and
then use the riscv_has_extension_[un]likely() functions to determine
which version to use at runtime. Is this feasible?
Conor Dooley Aug. 28, 2023, 6:56 p.m. UTC | #8
On Mon, Aug 28, 2023 at 11:20:39AM -0700, Charlie Jenkins wrote:
> On Mon, Aug 28, 2023 at 06:08:40PM +0100, Conor Dooley wrote:
> > On Mon, Aug 28, 2023 at 09:55:49AM -0700, Charlie Jenkins wrote:
> > > On Sun, Aug 27, 2023 at 01:25:27PM +0100, Conor Dooley wrote:
> > > > On Sun, Aug 27, 2023 at 11:28:33AM +0100, Conor Dooley wrote:
> > > > > On Sat, Aug 26, 2023 at 07:00:47PM -0700, Palmer Dabbelt wrote:
> > > > > > On Sat, 26 Aug 2023 18:42:41 PDT (-0700), Conor Dooley wrote:
> > > > > > > On Sat, Aug 26, 2023 at 06:26:06PM -0700, Charlie Jenkins wrote:
> > > > > > > > Provide checksum algorithms that have been designed to leverage riscv
> > > > > > > > instructions such as rotate. In 64-bit, can take advantage of the larger
> > > > > > > > register to avoid some overflow checking.
> > > > > > > > 
> > > > > > > > Add configuration for Zba extension and add march for Zba and Zbb.
> > > > > > > > 
> > > > > > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > > > > > > ---
> > > > > > > >  arch/riscv/Kconfig                | 23 +++++++++++
> > > > > > > >  arch/riscv/Makefile               |  2 +
> > > > > > > >  arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
> > > > > > > >  3 files changed, 111 insertions(+)
> > > > > > > > 
> > > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > > > > index 4c07b9189c86..8d7e475ca28d 100644
> > > > > > > > --- a/arch/riscv/Kconfig
> > > > > > > > +++ b/arch/riscv/Kconfig
> > > > > > > > @@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
> > > > > > > >  	  If you don't know what to do here, say Y.
> > > > > > > > +config TOOLCHAIN_HAS_ZBA
> > > > > > > > +	bool
> > > > > > > > +	default y
> > > > > > > > +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
> > > > > > > > +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
> > > > > > > > +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
> > > > > > > > +	depends on AS_HAS_OPTION_ARCH
> > > > > > > > +
> > > > > > > > +config RISCV_ISA_ZBA
> > > > > > > > +	bool "Zba extension support for bit manipulation instructions"
> > > > > > > > +	depends on TOOLCHAIN_HAS_ZBA
> > > > > > > > +	depends on MMU
> > > > > > > > +	depends on RISCV_ALTERNATIVE
> > > > > > > > +	default y
> > > > > > > > +	help
> > > > > > > > +	   Adds support to dynamically detect the presence of the ZBA
> > > > > > > > +	   extension (basic bit manipulation) and enable its usage.
> > > > > > > > +
> > > > > > > > +	   The Zba extension provides instructions to accelerate a number
> > > > > > > > +	   of bit-specific address creation operations.
> > > > > > > > +
> > > > > > > > +	   If you don't know what to do here, say Y.
> > > > > > > > +
> > > > > > > >  config TOOLCHAIN_HAS_ZBB
> > > > > > > >  	bool
> > > > > > > >  	default y
> > > > > > > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > > > > > > index 6ec6d52a4180..51fa3f67fc9a 100644
> > > > > > > > --- a/arch/riscv/Makefile
> > > > > > > > +++ b/arch/riscv/Makefile
> > > > > > > > @@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
> > > > > > > >  riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
> > > > > > > >  riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
> > > > > > > >  riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
> > > > > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
> > > > > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb
> > > > > > > 
> > > > > > > AFAICT, this is going to break immediately on any system that enables
> > > > > > > RISCV_ISA_ZBA (which will happen by default) but does not support the
> > > > > > > extension. You made the option depend on RISCV_ALTERNATIVE, but I do
> > > > > > > not see any use of alternatives in the code to actually perform the
> > > > > > > dynamic detection of Zba.
> > > > > > 
> > > > > > I guess we kind of have an ambiguity here: for stuff like C we just
> > > > > > unconditionally use the instructions, but for the rest we probe first.  We
> > > > > > should probably have three states for each extension: disabled, dynamically
> > > > > > detected, and assumed.
> > > > > 
> > > > > You mean, just add some comments to the makefile surrounding each
> > > > > section or to some rst documentation?
> > > > 
> > > > Also, the code here doesn't build w/
> > > > 	warning: invalid argument to '-march': '_zba_zbb_zicsr_zifencei_zihintpause'
> > > > so there's something else wrong with TOOLCHAIN_HAS_ZBA :)
> > > It is odd that this is missing 'rv64ima' or 'rv32ima' at the beginning of
> > > this string. What configuration are you using that could cause that to
> > > be left off?
> > 
> > I don't know, but that configuration is pretty pervasive. The patchwork
> > CI blew up too & that is using kernel.org toolchains built by Arnd:
> > https://mirrors.edge.kernel.org/pub/tools/crosstool/
> > 
> > > Compiling with defconfig automatically enables Zba and appears to not
> > > cause this issue. I realized that I put the header definitions for
> > > do_csum and csum_ipv6_magic in this patch instead of the next one so the
> > > code will fail to compile from this but not due to march settings.
> > > > 
> > > > > 
> > > > > > > Note that for fd & v, we add it to riscv-march-y, but then immediately
> > > > > > > remove it again before passing to the compiler, only allow them in
> > > > > > > AFLAGS:
> > > > > > > 	# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > > > > > > 	# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > > > > > > 	KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > > > > > > 
> > > > > > > What am I missing?
> > > > > > 
> > > > > > FD and V both have state that can be saved lazily, so we can't let arbitrary
> > > > > > code use them.  The extensions formally known as B don't add state, so they
> > > > > > are safe to flip on in arbitrary places (aside from the issues you pointed
> > > > > > out above).
> > > > > 
> > > > > I probably went about this badly since you missed the point. I was
> > > > > trying to point out that for anything other than the compressed
> > > > > extensions in the block above that we only pass them in march to the
> > > > > assembler, and not to the compiler, in contrast to this patch which just
> > > > > always passes them. I should have pointed to how we handled the
> > > > > in-kernel Zbb stuff & asked how this was any different, would probably
> > > > > have been clearer.
> > > > > 
> > > I supposed it might be better if I submit these changes in a different
> > > patch so we can have more discussion there. Zbb was previously only used
> > > by assembly files (arch/riscv/lib/strcmp.S, arch/riscv/lib/strlen.S,
> > > arch/riscv/lib/strncmp.S). I wanted to add them to the compiler so that
> > > that C programs could leverage these extensions. However, I neglected to
> > > consider machines that compile the kernel with these extensions but have
> > > cores without these extensions.
> > 
> > Less so cores, since we don't support heterogeneous stuff, and moreso
> > platforms that do not support the extensions. It's expected that the
> > same kernel could in theory be used across a wide variety of systems.
> > 
> > > The purpose of using these extensions is
> > > to save a couple of clock cycles, so if it is necessary to first
> > > check if the extension is enabled it may not be worth it for these
> > > functions.
> > 
> > That's still possible, it's what the alternatives mechanism exists for.
> > During boot the codepaths are patched to use what works for a given
> > machine, check out the code that makes use of Zbb or
> > riscv_has_extension_[un]likely(). You'd need to do something like the
> > existing users of Zbb instructions does, with an alternative used to
> > avoid the custom asm implementations when the hardware does not support
> > them. (That's what the CONFIG_ALTERNATIVE & CONFIG_AS_HAS_OPTION_ARCH
> > options you made the TOOLCHAIN_HAS_ZBA depend on are for).
> 
> I can see how to get this to work if I port this code into assembly and
> write two different versions (one with Zbb and one without), but
> I don't see how this would work in C. Unless I am mistaken, there would
> need to be some sort of wrapper around the C code that told the compiler
> to compile it multiple times for different extension combinations and
> then use the riscv_has_extension_[un]likely() functions to determine
> which version to use at runtime. Is this feasible?

IIRC, if you put all the code using Zbb etc into a compilation unit of
its own then you can set march for that unit alone, but it may well just
be easier to write a custom asm one for the Zbb case & use the c
implementation from this patch for the non-Zbb case.
Charlie Jenkins Aug. 28, 2023, 9:39 p.m. UTC | #9
On Mon, Aug 28, 2023 at 07:56:13PM +0100, Conor Dooley wrote:
> On Mon, Aug 28, 2023 at 11:20:39AM -0700, Charlie Jenkins wrote:
> > On Mon, Aug 28, 2023 at 06:08:40PM +0100, Conor Dooley wrote:
> > > On Mon, Aug 28, 2023 at 09:55:49AM -0700, Charlie Jenkins wrote:
> > > > On Sun, Aug 27, 2023 at 01:25:27PM +0100, Conor Dooley wrote:
> > > > > On Sun, Aug 27, 2023 at 11:28:33AM +0100, Conor Dooley wrote:
> > > > > > On Sat, Aug 26, 2023 at 07:00:47PM -0700, Palmer Dabbelt wrote:
> > > > > > > On Sat, 26 Aug 2023 18:42:41 PDT (-0700), Conor Dooley wrote:
> > > > > > > > On Sat, Aug 26, 2023 at 06:26:06PM -0700, Charlie Jenkins wrote:
> > > > > > > > > Provide checksum algorithms that have been designed to leverage riscv
> > > > > > > > > instructions such as rotate. In 64-bit, can take advantage of the larger
> > > > > > > > > register to avoid some overflow checking.
> > > > > > > > > 
> > > > > > > > > Add configuration for Zba extension and add march for Zba and Zbb.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > > > > > > > ---
> > > > > > > > >  arch/riscv/Kconfig                | 23 +++++++++++
> > > > > > > > >  arch/riscv/Makefile               |  2 +
> > > > > > > > >  arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
> > > > > > > > >  3 files changed, 111 insertions(+)
> > > > > > > > > 
> > > > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > > > > > index 4c07b9189c86..8d7e475ca28d 100644
> > > > > > > > > --- a/arch/riscv/Kconfig
> > > > > > > > > +++ b/arch/riscv/Kconfig
> > > > > > > > > @@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
> > > > > > > > >  	  If you don't know what to do here, say Y.
> > > > > > > > > +config TOOLCHAIN_HAS_ZBA
> > > > > > > > > +	bool
> > > > > > > > > +	default y
> > > > > > > > > +	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
> > > > > > > > > +	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
> > > > > > > > > +	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
> > > > > > > > > +	depends on AS_HAS_OPTION_ARCH
> > > > > > > > > +
> > > > > > > > > +config RISCV_ISA_ZBA
> > > > > > > > > +	bool "Zba extension support for bit manipulation instructions"
> > > > > > > > > +	depends on TOOLCHAIN_HAS_ZBA
> > > > > > > > > +	depends on MMU
> > > > > > > > > +	depends on RISCV_ALTERNATIVE
> > > > > > > > > +	default y
> > > > > > > > > +	help
> > > > > > > > > +	   Adds support to dynamically detect the presence of the ZBA
> > > > > > > > > +	   extension (basic bit manipulation) and enable its usage.
> > > > > > > > > +
> > > > > > > > > +	   The Zba extension provides instructions to accelerate a number
> > > > > > > > > +	   of bit-specific address creation operations.
> > > > > > > > > +
> > > > > > > > > +	   If you don't know what to do here, say Y.
> > > > > > > > > +
> > > > > > > > >  config TOOLCHAIN_HAS_ZBB
> > > > > > > > >  	bool
> > > > > > > > >  	default y
> > > > > > > > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > > > > > > > index 6ec6d52a4180..51fa3f67fc9a 100644
> > > > > > > > > --- a/arch/riscv/Makefile
> > > > > > > > > +++ b/arch/riscv/Makefile
> > > > > > > > > @@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
> > > > > > > > >  riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
> > > > > > > > >  riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
> > > > > > > > >  riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
> > > > > > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
> > > > > > > > > +riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb
> > > > > > > > 
> > > > > > > > AFAICT, this is going to break immediately on any system that enables
> > > > > > > > RISCV_ISA_ZBA (which will happen by default) but does not support the
> > > > > > > > extension. You made the option depend on RISCV_ALTERNATIVE, but I do
> > > > > > > > not see any use of alternatives in the code to actually perform the
> > > > > > > > dynamic detection of Zba.
> > > > > > > 
> > > > > > > I guess we kind of have an ambiguity here: for stuff like C we just
> > > > > > > unconditionally use the instructions, but for the rest we probe first.  We
> > > > > > > should probably have three states for each extension: disabled, dynamically
> > > > > > > detected, and assumed.
> > > > > > 
> > > > > > You mean, just add some comments to the makefile surrounding each
> > > > > > section or to some rst documentation?
> > > > > 
> > > > > Also, the code here doesn't build w/
> > > > > 	warning: invalid argument to '-march': '_zba_zbb_zicsr_zifencei_zihintpause'
> > > > > so there's something else wrong with TOOLCHAIN_HAS_ZBA :)
> > > > It is odd that this is missing 'rv64ima' or 'rv32ima' at the beginning of
> > > > this string. What configuration are you using that could cause that to
> > > > be left off?
> > > 
> > > I don't know, but that configuration is pretty pervasive. The patchwork
> > > CI blew up too & that is using kernel.org toolchains built by Arnd:
> > > https://mirrors.edge.kernel.org/pub/tools/crosstool/
> > > 
> > > > Compiling with defconfig automatically enables Zba and appears to not
> > > > cause this issue. I realized that I put the header definitions for
> > > > do_csum and csum_ipv6_magic in this patch instead of the next one so the
> > > > code will fail to compile from this but not due to march settings.
> > > > > 
> > > > > > 
> > > > > > > > Note that for fd & v, we add it to riscv-march-y, but then immediately
> > > > > > > > remove it again before passing to the compiler, only allow them in
> > > > > > > > AFLAGS:
> > > > > > > > 	# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > > > > > > > 	# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > > > > > > > 	KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > > > > > > > 
> > > > > > > > What am I missing?
> > > > > > > 
> > > > > > > FD and V both have state that can be saved lazily, so we can't let arbitrary
> > > > > > > code use them.  The extensions formally known as B don't add state, so they
> > > > > > > are safe to flip on in arbitrary places (aside from the issues you pointed
> > > > > > > out above).
> > > > > > 
> > > > > > I probably went about this badly since you missed the point. I was
> > > > > > trying to point out that for anything other than the compressed
> > > > > > extensions in the block above that we only pass them in march to the
> > > > > > assembler, and not to the compiler, in contrast to this patch which just
> > > > > > always passes them. I should have pointed to how we handled the
> > > > > > in-kernel Zbb stuff & asked how this was any different, would probably
> > > > > > have been clearer.
> > > > > > 
> > > > I supposed it might be better if I submit these changes in a different
> > > > patch so we can have more discussion there. Zbb was previously only used
> > > > by assembly files (arch/riscv/lib/strcmp.S, arch/riscv/lib/strlen.S,
> > > > arch/riscv/lib/strncmp.S). I wanted to add them to the compiler so that
> > > > that C programs could leverage these extensions. However, I neglected to
> > > > consider machines that compile the kernel with these extensions but have
> > > > cores without these extensions.
> > > 
> > > Less so cores, since we don't support heterogeneous stuff, and moreso
> > > platforms that do not support the extensions. It's expected that the
> > > same kernel could in theory be used across a wide variety of systems.
> > > 
> > > > The purpose of using these extensions is
> > > > to save a couple of clock cycles, so if it is necessary to first
> > > > check if the extension is enabled it may not be worth it for these
> > > > functions.
> > > 
> > > That's still possible, it's what the alternatives mechanism exists for.
> > > During boot the codepaths are patched to use what works for a given
> > > machine, check out the code that makes use of Zbb or
> > > riscv_has_extension_[un]likely(). You'd need to do something like the
> > > existing users of Zbb instructions does, with an alternative used to
> > > avoid the custom asm implementations when the hardware does not support
> > > them. (That's what the CONFIG_ALTERNATIVE & CONFIG_AS_HAS_OPTION_ARCH
> > > options you made the TOOLCHAIN_HAS_ZBA depend on are for).
> > 
> > I can see how to get this to work if I port this code into assembly and
> > write two different versions (one with Zbb and one without), but
> > I don't see how this would work in C. Unless I am mistaken, there would
> > need to be some sort of wrapper around the C code that told the compiler
> > to compile it multiple times for different extension combinations and
> > then use the riscv_has_extension_[un]likely() functions to determine
> > which version to use at runtime. Is this feasible?
> 
> IIRC, if you put all the code using Zbb etc into a compilation unit of
> its own then you can set march for that unit alone, but it may well just
> be easier to write a custom asm one for the Zbb case & use the c
> implementation from this patch for the non-Zbb case.
In include/linux/bitops.h there are implementations of rotations (i.e.
ror32, ror16). Do you think it would be an acceptable solution to move
those definitions into asm-generic and have riscv implement these
functions in assembly with Zbb and non-Zbb versions and use the
alternatives macro? Every other architecture would just use the
existing definitions but could implement their own if they wanted. The
code that would benefit from Zbb is a rotation.
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 4c07b9189c86..8d7e475ca28d 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -507,6 +507,29 @@  config RISCV_ISA_V_DEFAULT_ENABLE
 
 	  If you don't know what to do here, say Y.
 
+config TOOLCHAIN_HAS_ZBA
+	bool
+	default y
+	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
+	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
+	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
+	depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZBA
+	bool "Zba extension support for bit manipulation instructions"
+	depends on TOOLCHAIN_HAS_ZBA
+	depends on MMU
+	depends on RISCV_ALTERNATIVE
+	default y
+	help
+	   Adds support to dynamically detect the presence of the ZBA
+	   extension (basic bit manipulation) and enable its usage.
+
+	   The Zba extension provides instructions to accelerate a number
+	   of bit-specific address creation operations.
+
+	   If you don't know what to do here, say Y.
+
 config TOOLCHAIN_HAS_ZBB
 	bool
 	default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 6ec6d52a4180..51fa3f67fc9a 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -61,6 +61,8 @@  riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
 riscv-march-$(CONFIG_FPU)		:= $(riscv-march-y)fd
 riscv-march-$(CONFIG_RISCV_ISA_C)	:= $(riscv-march-y)c
 riscv-march-$(CONFIG_RISCV_ISA_V)	:= $(riscv-march-y)v
+riscv-march-$(CONFIG_RISCV_ISA_ZBA)	:= $(riscv-march-y)_zba
+riscv-march-$(CONFIG_RISCV_ISA_ZBB)	:= $(riscv-march-y)_zbb
 
 ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC
 KBUILD_CFLAGS += -Wa,-misa-spec=2.2
diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h
new file mode 100644
index 000000000000..cd98f8cde888
--- /dev/null
+++ b/arch/riscv/include/asm/checksum.h
@@ -0,0 +1,86 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IP checksum routines
+ *
+ * Copyright (C) 2023 Rivos Inc.
+ */
+#ifndef __ASM_RISCV_CHECKSUM_H
+#define __ASM_RISCV_CHECKSUM_H
+
+#include <linux/in6.h>
+#include <linux/uaccess.h>
+
+/* Default version is sufficient for 32 bit */
+#ifdef CONFIG_64BIT
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, __u8 proto, __wsum sum);
+#endif
+
+/*
+ *	Fold a partial checksum without adding pseudo headers
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	sum += (sum >> 16) | (sum << 16);
+	return (__force __sum16)(~(sum >> 16));
+}
+
+#define csum_fold csum_fold
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ *	Optimized for 32 and 64 bit platforms, with and without vector, with and
+ *	without the bitmanip extensions zba/zbb.
+ */
+#ifdef CONFIG_32BIT
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	__wsum csum = 0;
+	int pos = 0;
+
+	do {
+		csum += ((const __wsum *)iph)[pos];
+		csum += csum < ((const __wsum *)iph)[pos];
+	} while (++pos < ihl);
+	return csum_fold(csum);
+}
+#else
+
+/*
+ * Quickly compute an IP checksum with the assumption that IPv4 headers will
+ * always be in multiples of 32-bits, and have an ihl of at least 5.
+ * @ihl is the number of 32 bit segments and must be greater than or equal to 5.
+ * @iph is also assumed to be word aligned.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	unsigned long beginning;
+	unsigned long csum = 0;
+
+	beginning = ((const unsigned long *)iph)[0];
+	beginning += ((const unsigned long *)iph)[1];
+	beginning += beginning < ((const unsigned long *)iph)[1];
+	int pos = 4;
+
+	do {
+		csum += ((const unsigned int *)iph)[pos];
+	} while (++pos < ihl);
+	csum += beginning;
+	csum += csum < beginning;
+	csum += (csum >> 32) | (csum << 32); // Calculate overflow
+	return csum_fold((__force __wsum)(csum >> 32));
+}
+#endif
+#define ip_fast_csum ip_fast_csum
+
+#ifdef CONFIG_64BIT
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+#endif
+
+#include <asm-generic/checksum.h>
+
+#endif