diff mbox series

[v3,05/11] riscv: Implement arch_cmpxchg128() using Zacas

Message ID 20240717061957.140712-6-alexghiti@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series Zacas/Zabha support and qspinlocks | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail PR summary
conchuod/patch-5-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-5-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-5-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-5-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-5-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-5-test-6 success .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-5-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-5-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-5-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-5-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-5-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-5-test-12 success .github/scripts/patches/tests/verify_signedoff.sh

Commit Message

Alexandre Ghiti July 17, 2024, 6:19 a.m. UTC
Now that Zacas is supported in the kernel, let's use the double word
atomic version of amocas to improve the SLUB allocator.

Note that we have to select fixed registers, otherwise gcc fails to pick
even registers and then produces a reserved encoding which fails to
assemble.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 arch/riscv/Kconfig               |  1 +
 arch/riscv/include/asm/cmpxchg.h | 39 ++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

Comments

Andrew Jones July 17, 2024, 8:34 p.m. UTC | #1
On Wed, Jul 17, 2024 at 08:19:51AM GMT, Alexandre Ghiti wrote:
> Now that Zacas is supported in the kernel, let's use the double word
> atomic version of amocas to improve the SLUB allocator.
> 
> Note that we have to select fixed registers, otherwise gcc fails to pick
> even registers and then produces a reserved encoding which fails to
> assemble.

Oh, that's quite unfortunate... I guess we should try to get some new
RISC-V inline assembly register constraints added to support register
pairs.

> 
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
>  arch/riscv/Kconfig               |  1 +
>  arch/riscv/include/asm/cmpxchg.h | 39 ++++++++++++++++++++++++++++++++
>  2 files changed, 40 insertions(+)
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index d3b0f92f92da..0bbaec0444d0 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -104,6 +104,7 @@ config RISCV
>  	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
>  	select HARDIRQS_SW_RESEND
>  	select HAS_IOPORT if MMU
> +	select HAVE_ALIGNED_STRUCT_PAGE
>  	select HAVE_ARCH_AUDITSYSCALL
>  	select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
>  	select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index 97b24da38897..608d98522557 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -289,4 +289,43 @@ end:;									\
>  	arch_cmpxchg_release((ptr), (o), (n));				\
>  })
>  
> +#ifdef CONFIG_RISCV_ISA_ZACAS

This is also 64-bit only, so needs a CONFIG_64BIT check too.

> +
> +#define system_has_cmpxchg128()						\
> +			riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)

nit: let's let this stick out since we have 100 chars

> +
> +union __u128_halves {
> +	u128 full;
> +	struct {
> +		u64 low, high;

Should we consider big endian too?

> +	};
> +};
> +
> +#define __arch_cmpxchg128(p, o, n, cas_sfx)					\
> +({										\
> +	__typeof__(*(p)) __o = (o);						\
> +	union __u128_halves __hn = { .full = (n) };				\
> +	union __u128_halves __ho = { .full = (__o) };				\
> +	register unsigned long x6 asm ("x6") = __hn.low;			\
> +	register unsigned long x7 asm ("x7") = __hn.high;			\
> +	register unsigned long x28 asm ("x28") = __ho.low;			\
> +	register unsigned long x29 asm ("x29") = __ho.high;			\

Can we use t1,t2,t3,t4 rather than the x names?

> +										\
> +	__asm__ __volatile__ (							\
> +		"	amocas.q" cas_sfx " %0, %z3, %2"			\
> +		: "+&r" (x28), "+&r" (x29), "+A" (*(p))				\
> +		: "rJ" (x6), "rJ" (x7)						\
> +		: "memory");							\
> +										\
> +	((u128)x29 << 64) | x28;						\
> +})
> +
> +#define arch_cmpxchg128(ptr, o, n)						\
> +	__arch_cmpxchg128((ptr), (o), (n), ".aqrl")
> +
> +#define arch_cmpxchg128_local(ptr, o, n)					\
> +	__arch_cmpxchg128((ptr), (o), (n), "")
> +
> +#endif /* CONFIG_RISCV_ISA_ZACAS */
> +
>  #endif /* _ASM_RISCV_CMPXCHG_H */
> -- 
> 2.39.2

Thanks,
drew
Alexandre Ghiti July 18, 2024, 7:48 a.m. UTC | #2
Hi Drew,

On Wed, Jul 17, 2024 at 10:34 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Wed, Jul 17, 2024 at 08:19:51AM GMT, Alexandre Ghiti wrote:
> > Now that Zacas is supported in the kernel, let's use the double word
> > atomic version of amocas to improve the SLUB allocator.
> >
> > Note that we have to select fixed registers, otherwise gcc fails to pick
> > even registers and then produces a reserved encoding which fails to
> > assemble.
>
> Oh, that's quite unfortunate... I guess we should try to get some new
> RISC-V inline assembly register constraints added to support register
> pairs.

I internally informed the compilers people, I'll check their progress.

>
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> >  arch/riscv/Kconfig               |  1 +
> >  arch/riscv/include/asm/cmpxchg.h | 39 ++++++++++++++++++++++++++++++++
> >  2 files changed, 40 insertions(+)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index d3b0f92f92da..0bbaec0444d0 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -104,6 +104,7 @@ config RISCV
> >       select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
> >       select HARDIRQS_SW_RESEND
> >       select HAS_IOPORT if MMU
> > +     select HAVE_ALIGNED_STRUCT_PAGE
> >       select HAVE_ARCH_AUDITSYSCALL
> >       select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
> >       select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index 97b24da38897..608d98522557 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -289,4 +289,43 @@ end:;                                                                    \
> >       arch_cmpxchg_release((ptr), (o), (n));                          \
> >  })
> >
> > +#ifdef CONFIG_RISCV_ISA_ZACAS
>
> This is also 64-bit only, so needs a CONFIG_64BIT check too.

Yep, thanks

>
> > +
> > +#define system_has_cmpxchg128()                                              \
> > +                     riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
>
> nit: let's let this stick out since we have 100 chars

Ok

>
> > +
> > +union __u128_halves {
> > +     u128 full;
> > +     struct {
> > +             u64 low, high;
>
> Should we consider big endian too?

Should we care about big endian? We don't deal with big endian
anywhere in our kernel right now.

>
> > +     };
> > +};
> > +
> > +#define __arch_cmpxchg128(p, o, n, cas_sfx)                                  \
> > +({                                                                           \
> > +     __typeof__(*(p)) __o = (o);                                             \
> > +     union __u128_halves __hn = { .full = (n) };                             \
> > +     union __u128_halves __ho = { .full = (__o) };                           \
> > +     register unsigned long x6 asm ("x6") = __hn.low;                        \
> > +     register unsigned long x7 asm ("x7") = __hn.high;                       \
> > +     register unsigned long x28 asm ("x28") = __ho.low;                      \
> > +     register unsigned long x29 asm ("x29") = __ho.high;                     \
>
> Can we use t1,t2,t3,t4 rather than the x names?

We can, I did not because it was a bit misleading in the sense that
amocas expects an *even* register and using the tX registers, we'll
pass an *odd* register (which actually is even but still).

Anyway, I'll change that, I don't like the xX notation.

Thanks for the review,

Alex

>
> > +                                                                             \
> > +     __asm__ __volatile__ (                                                  \
> > +             "       amocas.q" cas_sfx " %0, %z3, %2"                        \
> > +             : "+&r" (x28), "+&r" (x29), "+A" (*(p))                         \
> > +             : "rJ" (x6), "rJ" (x7)                                          \
> > +             : "memory");                                                    \
> > +                                                                             \
> > +     ((u128)x29 << 64) | x28;                                                \
> > +})
> > +
> > +#define arch_cmpxchg128(ptr, o, n)                                           \
> > +     __arch_cmpxchg128((ptr), (o), (n), ".aqrl")
> > +
> > +#define arch_cmpxchg128_local(ptr, o, n)                                     \
> > +     __arch_cmpxchg128((ptr), (o), (n), "")
> > +
> > +#endif /* CONFIG_RISCV_ISA_ZACAS */
> > +
> >  #endif /* _ASM_RISCV_CMPXCHG_H */
> > --
> > 2.39.2
>
> Thanks,
> drew
Conor Dooley July 18, 2024, 8:33 a.m. UTC | #3
On Thu, Jul 18, 2024 at 09:48:42AM +0200, Alexandre Ghiti wrote:
> On Wed, Jul 17, 2024 at 10:34 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> > On Wed, Jul 17, 2024 at 08:19:51AM GMT, Alexandre Ghiti wrote:
> > > +
> > > +union __u128_halves {
> > > +     u128 full;
> > > +     struct {
> > > +             u64 low, high;
> >
> > Should we consider big endian too?
> 
> Should we care about big endian? We don't deal with big endian
> anywhere in our kernel right now.

There's one or two places I think that we do actually have some
conditional stuff for BE. The Zbb string routines I believe is one such
place, and maybe there are one or two others. In general I'm not of the
opinion that it is worth adding complexity for BE until there's
linux-capable hardware that supports it (so not QEMU or people's toy
implementations), unless it's something that userspace is able to see.
Arnd Bergmann July 18, 2024, 9:35 a.m. UTC | #4
On Thu, Jul 18, 2024, at 10:33, Conor Dooley wrote:
> On Thu, Jul 18, 2024 at 09:48:42AM +0200, Alexandre Ghiti wrote:
>> On Wed, Jul 17, 2024 at 10:34 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>> > On Wed, Jul 17, 2024 at 08:19:51AM GMT, Alexandre Ghiti wrote:
>> > > +
>> > > +union __u128_halves {
>> > > +     u128 full;
>> > > +     struct {
>> > > +             u64 low, high;
>> >
>> > Should we consider big endian too?
>> 
>> Should we care about big endian? We don't deal with big endian
>> anywhere in our kernel right now.
>
> There's one or two places I think that we do actually have some
> conditional stuff for BE. The Zbb string routines I believe is one such
> place, and maybe there are one or two others. In general I'm not of the
> opinion that it is worth adding complexity for BE until there's
> linux-capable hardware that supports it (so not QEMU or people's toy
> implementations), unless it's something that userspace is able to see.

I don't think you want to go there at all: maintaining an
extra user space ABI (or two if you add 32-bit BE as well)
has a huge long-term cost, and there is pretty much zero
benefit for a BE ABI these days.

Adding it to arm64 turned out to be a mistake. We did have
a handful of users in the first year, and it technically
still works, but I don't think there are any users left
after they managed to fix their nonportable legacy
userspace from that was ported from big-endian mips or
powerpc.

     Arnd
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d3b0f92f92da..0bbaec0444d0 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -104,6 +104,7 @@  config RISCV
 	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
 	select HARDIRQS_SW_RESEND
 	select HAS_IOPORT if MMU
+	select HAVE_ALIGNED_STRUCT_PAGE
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 97b24da38897..608d98522557 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -289,4 +289,43 @@  end:;									\
 	arch_cmpxchg_release((ptr), (o), (n));				\
 })
 
+#ifdef CONFIG_RISCV_ISA_ZACAS
+
+#define system_has_cmpxchg128()						\
+			riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
+
+union __u128_halves {
+	u128 full;
+	struct {
+		u64 low, high;
+	};
+};
+
+#define __arch_cmpxchg128(p, o, n, cas_sfx)					\
+({										\
+	__typeof__(*(p)) __o = (o);						\
+	union __u128_halves __hn = { .full = (n) };				\
+	union __u128_halves __ho = { .full = (__o) };				\
+	register unsigned long x6 asm ("x6") = __hn.low;			\
+	register unsigned long x7 asm ("x7") = __hn.high;			\
+	register unsigned long x28 asm ("x28") = __ho.low;			\
+	register unsigned long x29 asm ("x29") = __ho.high;			\
+										\
+	__asm__ __volatile__ (							\
+		"	amocas.q" cas_sfx " %0, %z3, %2"			\
+		: "+&r" (x28), "+&r" (x29), "+A" (*(p))				\
+		: "rJ" (x6), "rJ" (x7)						\
+		: "memory");							\
+										\
+	((u128)x29 << 64) | x28;						\
+})
+
+#define arch_cmpxchg128(ptr, o, n)						\
+	__arch_cmpxchg128((ptr), (o), (n), ".aqrl")
+
+#define arch_cmpxchg128_local(ptr, o, n)					\
+	__arch_cmpxchg128((ptr), (o), (n), "")
+
+#endif /* CONFIG_RISCV_ISA_ZACAS */
+
 #endif /* _ASM_RISCV_CMPXCHG_H */