diff mbox series

[v2,3/6] riscv: Add Zawrs support for spinlocks

Message ID 20240419135321.70781-11-ajones@ventanamicro.com (mailing list archive)
State Superseded
Headers show
Series riscv: Apply Zawrs when available | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-3-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-3-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-3-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-3-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-3-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-3-test-6 warning .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-3-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-3-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-3-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-3-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-3-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-3-test-12 success .github/scripts/patches/tests/verify_signedoff.sh

Commit Message

Andrew Jones April 19, 2024, 1:53 p.m. UTC
From: Christoph Müllner <christoph.muellner@vrull.eu>

RISC-V code uses the generic ticket lock implementation, which calls
the macros smp_cond_load_relaxed() and smp_cond_load_acquire().
Introduce a RISC-V specific implementation of smp_cond_load_relaxed()
which applies WRS.NTO of the Zawrs extension in order to reduce power
consumption while waiting and allows hypervisors to enable guests to
trap while waiting. smp_cond_load_acquire() doesn't need a RISC-V
specific implementation as the generic implementation is based on
smp_cond_load_relaxed() and smp_acquire__after_ctrl_dep() sufficiently
provides the acquire semantics.

This implementation is heavily based on Arm's approach which is the
approach Andrea Parri also suggested.

The Zawrs specification can be found here:
https://github.com/riscv/riscv-zawrs/blob/main/zawrs.adoc

Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
Co-developed-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
 arch/riscv/Kconfig                | 13 ++++++++
 arch/riscv/include/asm/barrier.h  | 45 ++++++++++++++++++---------
 arch/riscv/include/asm/cmpxchg.h  | 51 +++++++++++++++++++++++++++++++
 arch/riscv/include/asm/hwcap.h    |  1 +
 arch/riscv/include/asm/insn-def.h |  2 ++
 arch/riscv/kernel/cpufeature.c    |  1 +
 6 files changed, 98 insertions(+), 15 deletions(-)

Comments

Conor Dooley April 19, 2024, 3:22 p.m. UTC | #1
On Fri, Apr 19, 2024 at 03:53:25PM +0200, Andrew Jones wrote:

> +config RISCV_ISA_ZAWRS
> +	bool "Zawrs extension support for more efficient busy waiting"
> +	depends on RISCV_ALTERNATIVE
> +	default y
> +	help
> +	  The Zawrs extension defines instructions to be used in polling loops
> +	  which allow a hart to enter a low-power state or to trap to the
> +	  hypervisor while waiting on a store to a memory location. Enable the
> +	  use of these instructions in the kernel when the Zawrs extension is
> +	  detected at boot.

Ignoring the rest of the patch, and focusing on the bit relevant to our
other conversation, I think this description satisfies what I was trying
to do with the other options in terms of being clear about what exactly
it does.
Andrea Parri April 21, 2024, 9:16 p.m. UTC | #2
On Fri, Apr 19, 2024 at 03:53:25PM +0200, Andrew Jones wrote:
> From: Christoph M??llner <christoph.muellner@vrull.eu>
> 
> RISC-V code uses the generic ticket lock implementation, which calls
> the macros smp_cond_load_relaxed() and smp_cond_load_acquire().
> Introduce a RISC-V specific implementation of smp_cond_load_relaxed()
> which applies WRS.NTO of the Zawrs extension in order to reduce power
> consumption while waiting and allows hypervisors to enable guests to
> trap while waiting. smp_cond_load_acquire() doesn't need a RISC-V
> specific implementation as the generic implementation is based on
> smp_cond_load_relaxed() and smp_acquire__after_ctrl_dep() sufficiently
> provides the acquire semantics.
> 
> This implementation is heavily based on Arm's approach which is the
> approach Andrea Parri also suggested.
> 
> The Zawrs specification can be found here:
> https://github.com/riscv/riscv-zawrs/blob/main/zawrs.adoc
> 
> Signed-off-by: Christoph M??llner <christoph.muellner@vrull.eu>
> Co-developed-by: Andrew Jones <ajones@ventanamicro.com>
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  arch/riscv/Kconfig                | 13 ++++++++
>  arch/riscv/include/asm/barrier.h  | 45 ++++++++++++++++++---------
>  arch/riscv/include/asm/cmpxchg.h  | 51 +++++++++++++++++++++++++++++++
>  arch/riscv/include/asm/hwcap.h    |  1 +
>  arch/riscv/include/asm/insn-def.h |  2 ++
>  arch/riscv/kernel/cpufeature.c    |  1 +
>  6 files changed, 98 insertions(+), 15 deletions(-)

Doesn't apply to riscv/for-next (due to, AFAIU,

  https://lore.kernel.org/all/171275883330.18495.10110341843571163280.git-patchwork-notify@kernel.org/ ).

But other than that, this LGTM.  One nit below.


> -#define __smp_store_release(p, v)					\
> -do {									\
> -	compiletime_assert_atomic_type(*p);				\
> -	RISCV_FENCE(rw, w);						\
> -	WRITE_ONCE(*p, v);						\
> -} while (0)
> -
> -#define __smp_load_acquire(p)						\
> -({									\
> -	typeof(*p) ___p1 = READ_ONCE(*p);				\
> -	compiletime_assert_atomic_type(*p);				\
> -	RISCV_FENCE(r, rw);						\
> -	___p1;								\
> -})
> -
>  /*
>   * This is a very specific barrier: it's currently only used in two places in
>   * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
> @@ -70,6 +56,35 @@ do {									\
>   */
>  #define smp_mb__after_spinlock()	RISCV_FENCE(iorw, iorw)
>  
> +#define __smp_store_release(p, v)					\
> +do {									\
> +	compiletime_assert_atomic_type(*p);				\
> +	RISCV_FENCE(rw, w);						\
> +	WRITE_ONCE(*p, v);						\
> +} while (0)
> +
> +#define __smp_load_acquire(p)						\
> +({									\
> +	typeof(*p) ___p1 = READ_ONCE(*p);				\
> +	compiletime_assert_atomic_type(*p);				\
> +	RISCV_FENCE(r, rw);						\
> +	___p1;								\
> +})

Unrelated/unmotivated changes.

  Andrea
Andrew Jones April 22, 2024, 8:36 a.m. UTC | #3
On Sun, Apr 21, 2024 at 11:16:47PM +0200, Andrea Parri wrote:
> On Fri, Apr 19, 2024 at 03:53:25PM +0200, Andrew Jones wrote:
> > From: Christoph M??llner <christoph.muellner@vrull.eu>
> > 
> > RISC-V code uses the generic ticket lock implementation, which calls
> > the macros smp_cond_load_relaxed() and smp_cond_load_acquire().
> > Introduce a RISC-V specific implementation of smp_cond_load_relaxed()
> > which applies WRS.NTO of the Zawrs extension in order to reduce power
> > consumption while waiting and allows hypervisors to enable guests to
> > trap while waiting. smp_cond_load_acquire() doesn't need a RISC-V
> > specific implementation as the generic implementation is based on
> > smp_cond_load_relaxed() and smp_acquire__after_ctrl_dep() sufficiently
> > provides the acquire semantics.
> > 
> > This implementation is heavily based on Arm's approach which is the
> > approach Andrea Parri also suggested.
> > 
> > The Zawrs specification can be found here:
> > https://github.com/riscv/riscv-zawrs/blob/main/zawrs.adoc
> > 
> > Signed-off-by: Christoph M??llner <christoph.muellner@vrull.eu>
> > Co-developed-by: Andrew Jones <ajones@ventanamicro.com>
> > Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> >  arch/riscv/Kconfig                | 13 ++++++++
> >  arch/riscv/include/asm/barrier.h  | 45 ++++++++++++++++++---------
> >  arch/riscv/include/asm/cmpxchg.h  | 51 +++++++++++++++++++++++++++++++
> >  arch/riscv/include/asm/hwcap.h    |  1 +
> >  arch/riscv/include/asm/insn-def.h |  2 ++
> >  arch/riscv/kernel/cpufeature.c    |  1 +
> >  6 files changed, 98 insertions(+), 15 deletions(-)
> 
> Doesn't apply to riscv/for-next (due to, AFAIU,
> 
>   https://lore.kernel.org/all/171275883330.18495.10110341843571163280.git-patchwork-notify@kernel.org/ ).

I based it on -rc1. We recently discussed what we should base on, but
I couldn't recall the final decision, so I fell back to the old approach.
I can rebase on for-next or the latest rc if that's the new, improved
approach.

> 
> But other than that, this LGTM.  One nit below.
> 
> 
> > -#define __smp_store_release(p, v)					\
> > -do {									\
> > -	compiletime_assert_atomic_type(*p);				\
> > -	RISCV_FENCE(rw, w);						\
> > -	WRITE_ONCE(*p, v);						\
> > -} while (0)
> > -
> > -#define __smp_load_acquire(p)						\
> > -({									\
> > -	typeof(*p) ___p1 = READ_ONCE(*p);				\
> > -	compiletime_assert_atomic_type(*p);				\
> > -	RISCV_FENCE(r, rw);						\
> > -	___p1;								\
> > -})
> > -
> >  /*
> >   * This is a very specific barrier: it's currently only used in two places in
> >   * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
> > @@ -70,6 +56,35 @@ do {									\
> >   */
> >  #define smp_mb__after_spinlock()	RISCV_FENCE(iorw, iorw)
> >  
> > +#define __smp_store_release(p, v)					\
> > +do {									\
> > +	compiletime_assert_atomic_type(*p);				\
> > +	RISCV_FENCE(rw, w);						\
> > +	WRITE_ONCE(*p, v);						\
> > +} while (0)
> > +
> > +#define __smp_load_acquire(p)						\
> > +({									\
> > +	typeof(*p) ___p1 = READ_ONCE(*p);				\
> > +	compiletime_assert_atomic_type(*p);				\
> > +	RISCV_FENCE(r, rw);						\
> > +	___p1;								\
> > +})
> 
> Unrelated/unmotivated changes.

The relation/motivation was to get the load/store macros in one part of
the file with the barrier macros in another. With this change we have

 __mb
 __rmb
 __wmb

 __smp_mb
 __smp_rmb
 __smp_wmb

 smp_mb__after_spinlock

 __smp_store_release
 __smp_load_acquire
 smp_cond_load_relaxed

Without the change, smp_mb__after_spinlock is either after all the
load/stores or in between them.

I didn't think the reorganization was worth its own patch, but I could
split it out (or just drop it).

Thanks,
drew
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 7427d8088337..34bbe6b70546 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -578,6 +578,19 @@  config RISCV_ISA_V_PREEMPTIVE
 	  preemption. Enabling this config will result in higher memory
 	  consumption due to the allocation of per-task's kernel Vector context.
 
+config RISCV_ISA_ZAWRS
+	bool "Zawrs extension support for more efficient busy waiting"
+	depends on RISCV_ALTERNATIVE
+	default y
+	help
+	  The Zawrs extension defines instructions to be used in polling loops
+	  which allow a hart to enter a low-power state or to trap to the
+	  hypervisor while waiting on a store to a memory location. Enable the
+	  use of these instructions in the kernel when the Zawrs extension is
+	  detected at boot.
+
+	  If you don't know what to do here, say Y.
+
 config TOOLCHAIN_HAS_ZBB
 	bool
 	default y
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 880b56d8480d..e1d9bf1deca6 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -11,6 +11,7 @@ 
 #define _ASM_RISCV_BARRIER_H
 
 #ifndef __ASSEMBLY__
+#include <asm/cmpxchg.h>
 #include <asm/fence.h>
 
 #define nop()		__asm__ __volatile__ ("nop")
@@ -28,21 +29,6 @@ 
 #define __smp_rmb()	RISCV_FENCE(r, r)
 #define __smp_wmb()	RISCV_FENCE(w, w)
 
-#define __smp_store_release(p, v)					\
-do {									\
-	compiletime_assert_atomic_type(*p);				\
-	RISCV_FENCE(rw, w);						\
-	WRITE_ONCE(*p, v);						\
-} while (0)
-
-#define __smp_load_acquire(p)						\
-({									\
-	typeof(*p) ___p1 = READ_ONCE(*p);				\
-	compiletime_assert_atomic_type(*p);				\
-	RISCV_FENCE(r, rw);						\
-	___p1;								\
-})
-
 /*
  * This is a very specific barrier: it's currently only used in two places in
  * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
@@ -70,6 +56,35 @@  do {									\
  */
 #define smp_mb__after_spinlock()	RISCV_FENCE(iorw, iorw)
 
+#define __smp_store_release(p, v)					\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	RISCV_FENCE(rw, w);						\
+	WRITE_ONCE(*p, v);						\
+} while (0)
+
+#define __smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	RISCV_FENCE(r, rw);						\
+	___p1;								\
+})
+
+#ifdef CONFIG_RISCV_ISA_ZAWRS
+#define smp_cond_load_relaxed(ptr, cond_expr) ({			\
+	typeof(ptr) __PTR = (ptr);					\
+	__unqual_scalar_typeof(*ptr) VAL;				\
+	for (;;) {							\
+		VAL = READ_ONCE(*__PTR);				\
+		if (cond_expr)						\
+			break;						\
+		__cmpwait_relaxed(ptr, VAL);				\
+	}								\
+	(typeof(*ptr))VAL;						\
+})
+#endif
+
 #include <asm-generic/barrier.h>
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 2fee65cc8443..0926ac7f4ca6 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -8,7 +8,10 @@ 
 
 #include <linux/bug.h>
 
+#include <asm/alternative-macros.h>
 #include <asm/fence.h>
+#include <asm/hwcap.h>
+#include <asm/insn-def.h>
 
 #define __xchg_relaxed(ptr, new, size)					\
 ({									\
@@ -359,4 +362,52 @@ 
 	arch_cmpxchg_relaxed((ptr), (o), (n));				\
 })
 
+#ifdef CONFIG_RISCV_ISA_ZAWRS
+static __always_inline void __cmpwait(volatile void *ptr,
+				      unsigned long val,
+				      int size)
+{
+	unsigned long tmp;
+
+	asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
+			     0, RISCV_ISA_EXT_ZAWRS, 1)
+		 : : : : no_zawrs);
+
+	switch (size) {
+	case 4:
+		asm volatile(
+		"	lr.w	%0, %1\n"
+		"	xor	%0, %0, %2\n"
+		"	bnez	%0, 1f\n"
+			ZAWRS_WRS_NTO "\n"
+		"1:"
+		: "=&r" (tmp), "+A" (*(u32 *)ptr)
+		: "r" (val));
+		break;
+#if __riscv_xlen == 64
+	case 8:
+		asm volatile(
+		"	lr.d	%0, %1\n"
+		"	xor	%0, %0, %2\n"
+		"	bnez	%0, 1f\n"
+			ZAWRS_WRS_NTO "\n"
+		"1:"
+		: "=&r" (tmp), "+A" (*(u64 *)ptr)
+		: "r" (val));
+		break;
+#endif
+	default:
+		BUILD_BUG();
+	}
+
+	return;
+
+no_zawrs:
+	asm volatile(RISCV_PAUSE : : : "memory");
+}
+
+#define __cmpwait_relaxed(ptr, val) \
+	__cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+#endif
+
 #endif /* _ASM_RISCV_CMPXCHG_H */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index e17d0078a651..5b358c3cf212 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -81,6 +81,7 @@ 
 #define RISCV_ISA_EXT_ZTSO		72
 #define RISCV_ISA_EXT_ZACAS		73
 #define RISCV_ISA_EXT_XANDESPMU		74
+#define RISCV_ISA_EXT_ZAWRS		75
 
 #define RISCV_ISA_EXT_XLINUXENVCFG	127
 
diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h
index 64dffaa21bfa..9a913010cdd9 100644
--- a/arch/riscv/include/asm/insn-def.h
+++ b/arch/riscv/include/asm/insn-def.h
@@ -197,5 +197,7 @@ 
 	       RS1(base), SIMM12(4))
 
 #define RISCV_PAUSE	".4byte 0x100000f"
+#define ZAWRS_WRS_NTO	".4byte 0x00d00073"
+#define ZAWRS_WRS_STO	".4byte 0x01d00073"
 
 #endif /* __ASM_INSN_DEF_H */
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 3ed2359eae35..02de9eaa3f42 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -257,6 +257,7 @@  const struct riscv_isa_ext_data riscv_isa_ext[] = {
 	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
 	__RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
 	__RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
+	__RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
 	__RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
 	__RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH),
 	__RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN),