diff mbox series

riscv: Rewrite AMO instructions via lr and sc.

Message ID 20241129144319.74257-1-arikalo@gmail.com (mailing list archive)
State New
Headers show
Series riscv: Rewrite AMO instructions via lr and sc. | expand

Commit Message

Aleksandar Rikalo Nov. 29, 2024, 2:43 p.m. UTC
From: Chao-ying Fu <cfu@mips.com>

Use lr and sc to implement all atomic functions. Some CPUs have
native support for lr and sc, but emulate AMO instructions through
trap handlers that are slow.

Add config RISCV_ISA_ZALRSC_ONLY.

Signed-off-by: Chao-ying Fu <cfu@mips.com>
Signed-off-by: Aleksandar Rikalo <arikalo@gmail.com>
---
 arch/riscv/Kconfig               | 10 ++++++
 arch/riscv/include/asm/atomic.h  | 52 +++++++++++++++++++++++++++++++-
 arch/riscv/include/asm/bitops.h  | 45 +++++++++++++++++++++++++++
 arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
 arch/riscv/include/asm/futex.h   | 46 ++++++++++++++++++++++++++++
 5 files changed, 168 insertions(+), 1 deletion(-)

Comments

Guo Ren Nov. 29, 2024, 3:12 p.m. UTC | #1
On Fri, Nov 29, 2024 at 10:43 PM Aleksandar Rikalo <arikalo@gmail.com> wrote:
>
> From: Chao-ying Fu <cfu@mips.com>
>
> Use lr and sc to implement all atomic functions. Some CPUs have
> native support for lr and sc, but emulate AMO instructions through
> trap handlers that are slow.
>
> Add config RISCV_ISA_ZALRSC_ONLY.
>
> Signed-off-by: Chao-ying Fu <cfu@mips.com>
> Signed-off-by: Aleksandar Rikalo <arikalo@gmail.com>
> ---
>  arch/riscv/Kconfig               | 10 ++++++
>  arch/riscv/include/asm/atomic.h  | 52 +++++++++++++++++++++++++++++++-
>  arch/riscv/include/asm/bitops.h  | 45 +++++++++++++++++++++++++++
>  arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
>  arch/riscv/include/asm/futex.h   | 46 ++++++++++++++++++++++++++++
>  5 files changed, 168 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index cc63aef41e94..767538c27875 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -715,6 +715,16 @@ config RISCV_ISA_ZACAS
>
>           If you don't know what to do here, say Y.
>
> +config RISCV_ISA_ZALRSC_ONLY
> +       bool "Zalrsc extension support only"
> +       default n
> +       help
> +          Use lr and sc to build all atomic functions. Some CPUs have
> +          native support for lr and sc, but emulate amo instructions through
> +          trap handlers that are slow.
What's the actual hardware?
Is it emulated in m-mode?
Where is the code?

> +
> +          If you don't know what to do here, say n.
> +
>  config TOOLCHAIN_HAS_ZBB
>         bool
>         default y
> diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
> index 5b96c2f61adb..f484babecb9e 100644
> --- a/arch/riscv/include/asm/atomic.h
> +++ b/arch/riscv/include/asm/atomic.h
> @@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
>   * have the AQ or RL bits set.  These don't return anything, so there's only
>   * one version to worry about.
>   */
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)             \
>  static __always_inline                                                 \
>  void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)       \
> @@ -59,7 +60,23 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)     \
>                 : "+A" (v->counter)                                     \
>                 : "r" (I)                                               \
>                 : "memory");                                            \
> -}                                                                      \
> +}
> +#else
> +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)             \
> +static __always_inline                                                 \
> +void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)       \
> +{                                                                      \
> +       register c_type ret, temp;                                      \
> +       __asm__ __volatile__ (                                          \
> +               "1:     lr." #asm_type " %1, %0\n"                      \
> +               "       " #asm_op " %2, %1, %3\n"                       \
> +               "       sc." #asm_type " %2, %2, %0\n"                  \
> +               "       bnez %2, 1b\n"                                  \
> +               : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> +               : "r" (I)                                               \
> +               : "memory");                                            \
> +}
> +#endif
>
>  #ifdef CONFIG_GENERIC_ATOMIC64
>  #define ATOMIC_OPS(op, asm_op, I)                                      \
> @@ -84,6 +101,7 @@ ATOMIC_OPS(xor, xor,  i)
>   * There's two flavors of these: the arithmatic ops have both fetch and return
>   * versions, while the logical ops only have fetch versions.
>   */
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)       \
>  static __always_inline                                                 \
>  c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,            \
> @@ -108,6 +126,38 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)   \
>                 : "memory");                                            \
>         return ret;                                                     \
>  }
> +#else
> +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)       \
> +static __always_inline                                                 \
> +c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,            \
> +                                            atomic##prefix##_t *v)     \
> +{                                                                      \
> +       register c_type ret, temp;                                      \
> +       __asm__ __volatile__ (                                          \
> +               "1:     lr." #asm_type " %1, %0\n"                      \
> +               "       " #asm_op " %2, %1, %3\n"                       \
> +               "       sc." #asm_type " %2, %2, %0\n"                  \
> +               "       bnez %2, 1b\n"                                  \
> +               : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> +               : "r" (I)                                               \
> +               : "memory");                                            \
> +       return ret;                                                     \
> +}                                                                      \
> +static __always_inline                                                 \
> +c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)       \
> +{                                                                      \
> +       register c_type ret, temp;                                      \
> +       __asm__ __volatile__ (                                          \
> +               "1:     lr." #asm_type ".aqrl %1, %0\n"                 \
> +               "       " #asm_op " %2, %1, %3\n"                       \
> +               "       sc." #asm_type ".aqrl %2, %2, %0\n"             \
> +               "       bnez %2, 1b\n"                                  \
> +               : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> +               : "r" (I)                                               \
> +               : "memory");                                            \
> +       return ret;                                                     \
> +}
> +#endif
>
>  #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)        \
>  static __always_inline                                                 \
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> index fae152ea0508..b51cb18f7d9e 100644
> --- a/arch/riscv/include/asm/bitops.h
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
>
>  #if (BITS_PER_LONG == 64)
>  #define __AMO(op)      "amo" #op ".d"
> +#define __LR   "lr.d"
> +#define __SC   "sc.d"
>  #elif (BITS_PER_LONG == 32)
>  #define __AMO(op)      "amo" #op ".w"
> +#define __LR   "lr.w"
> +#define __SC   "sc.w"
>  #else
>  #error "Unexpected BITS_PER_LONG"
>  #endif
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define __test_and_op_bit_ord(op, mod, nr, addr, ord)          \
>  ({                                                             \
>         unsigned long __res, __mask;                            \
> @@ -211,6 +216,33 @@ static __always_inline int variable_fls(unsigned int x)
>                 : "+A" (addr[BIT_WORD(nr)])                     \
>                 : "r" (mod(BIT_MASK(nr)))                       \
>                 : "memory");
> +#else
> +#define __test_and_op_bit_ord(op, mod, nr, addr, ord)          \
> +({                                                             \
> +       unsigned long __res, __mask, __temp;                    \
> +       __mask = BIT_MASK(nr);                                  \
> +       __asm__ __volatile__ (                                  \
> +               "1: " __LR #ord " %0, %1\n"                     \
> +               #op " %2, %0, %3\n"                             \
> +               __SC #ord " %2, %2, %1\n"                       \
> +               "bnez %2, 1b\n"                                 \
> +               : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> +               : "r" (mod(__mask))                             \
> +               : "memory");                                    \
> +       ((__res & __mask) != 0);                                \
> +})
> +
> +#define __op_bit_ord(op, mod, nr, addr, ord)                   \
> +       unsigned long __res, __temp;                            \
> +       __asm__ __volatile__ (                                  \
> +               "1: " __LR #ord " %0, %1\n"                     \
> +               #op " %2, %0, %3\n"                             \
> +               __SC #ord " %2, %2, %1\n"                       \
> +               "bnez %2, 1b\n"                                 \
> +               : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> +               : "r" (mod(BIT_MASK(nr)))                       \
> +               : "memory")
> +#endif
>
>  #define __test_and_op_bit(op, mod, nr, addr)                   \
>         __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
> @@ -354,12 +386,25 @@ static inline void arch___clear_bit_unlock(
>  static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
>                 volatile unsigned long *addr)
>  {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>         unsigned long res;
>         __asm__ __volatile__ (
>                 __AMO(xor) ".rl %0, %2, %1"
>                 : "=r" (res), "+A" (*addr)
>                 : "r" (__NOP(mask))
>                 : "memory");
> +#else
> +       unsigned long res, temp;
> +
> +       __asm__ __volatile__ (
> +               "1: " __LR ".rl %0, %1\n"
> +               "xor %2, %0, %3\n"
> +               __SC ".rl %2, %2, %1\n"
> +               "bnez %2, 1b\n"
> +               : "=&r" (res), "+A" (*addr), "=&r" (temp)
> +               : "r" (__NOP(mask))
> +               : "memory");
> +#endif
>         return (res & BIT(7)) != 0;
>  }
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index 4cadc56220fe..881082b05110 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -51,6 +51,7 @@
>         }                                                                       \
>  })
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define __arch_xchg(sfx, prepend, append, r, p, n)                     \
>  ({                                                                     \
>         __asm__ __volatile__ (                                          \
> @@ -61,6 +62,21 @@
>                 : "r" (n)                                               \
>                 : "memory");                                            \
>  })
> +#else
> +#define __arch_xchg(sfx, prepend, append, r, p, n)                     \
> +({                                                                     \
> +       __typeof__(*(__ptr)) temp;                                      \
> +       __asm__ __volatile__ (                                          \
> +               prepend                                                 \
> +               "1:     lr" sfx " %0, %1\n"                             \
> +               "       sc" sfx " %2, %3, %1\n"                         \
> +               "       bnez %2, 1b\n"                                  \
> +               append                                                  \
> +               : "=&r" (r), "+A" (*(p)), "=&r" (temp)                  \
> +               : "r" (n)                                               \
> +               : "memory");                                            \
> +})
> +#endif
>
>  #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,                        \
>                    sc_append, swap_append)                              \
> diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
> index fc8130f995c1..47297f47ec35 100644
> --- a/arch/riscv/include/asm/futex.h
> +++ b/arch/riscv/include/asm/futex.h
> @@ -19,6 +19,7 @@
>  #define __disable_user_access()                do { } while (0)
>  #endif
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>  #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)     \
>  {                                                              \
>         __enable_user_access();                                 \
> @@ -32,16 +33,39 @@
>         : "memory");                                            \
>         __disable_user_access();                                \
>  }
> +#else
> +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)     \
> +{                                                              \
> +       __enable_user_access();                                 \
> +       __asm__ __volatile__ (                                  \
> +       "1:     lr.w.aqrl %[ov], %[u]\n"                        \
> +       "       " insn "\n"                                     \
> +       "       sc.w.aqrl %[t], %[t], %[u]\n"                   \
> +       "       bnez %[t], 1b\n"                                \
> +       "2:\n"                                                  \
> +       _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r])                  \
> +       : [r] "+r" (ret), [ov] "=&r" (oldval),                  \
> +         [t] "=&r" (temp), [u] "+m" (*uaddr)                   \
> +       : [op] "Jr" (oparg)                                     \
> +       : "memory");                                            \
> +       __disable_user_access();                                \
> +}
> +#endif
>
>  static inline int
>  arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
>  {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>         int oldval = 0, ret = 0;
> +#else
> +       int oldval = 0, ret = 0, temp = 0;
> +#endif
>
>         if (!access_ok(uaddr, sizeof(u32)))
>                 return -EFAULT;
>
>         switch (op) {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>         case FUTEX_OP_SET:
>                 __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
>                                   ret, oldval, uaddr, oparg);
> @@ -62,6 +86,28 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
>                 __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
>                                   ret, oldval, uaddr, oparg);
>                 break;
> +#else
> +       case FUTEX_OP_SET:
> +               __futex_atomic_op("mv %[t], %z[op]",
> +                                 ret, oldval, uaddr, oparg);
> +               break;
> +       case FUTEX_OP_ADD:
> +               __futex_atomic_op("add %[t], %[ov], %z[op]",
> +                                 ret, oldval, uaddr, oparg);
> +               break;
> +       case FUTEX_OP_OR:
> +               __futex_atomic_op("or %[t], %[ov], %z[op]",
> +                                 ret, oldval, uaddr, oparg);
> +               break;
> +       case FUTEX_OP_ANDN:
> +               __futex_atomic_op("and %[t], %[ov], %z[op]",
> +                                 ret, oldval, uaddr, ~oparg);
> +               break;
> +       case FUTEX_OP_XOR:
> +               __futex_atomic_op("xor %[t], %[ov], %z[op]",
> +                                 ret, oldval, uaddr, oparg);
> +               break;
> +#endif
>         default:
>                 ret = -ENOSYS;
>         }
> --
> 2.25.1
>
Conor Dooley Nov. 29, 2024, 4:29 p.m. UTC | #2
Hey,

On Fri, Nov 29, 2024 at 03:43:19PM +0100, Aleksandar Rikalo wrote:
> From: Chao-ying Fu <cfu@mips.com>
> 
> Use lr and sc to implement all atomic functions. Some CPUs have
> native support for lr and sc, but emulate AMO instructions through
> trap handlers that are slow.
> 
> Add config RISCV_ISA_ZALRSC_ONLY.
> 
> Signed-off-by: Chao-ying Fu <cfu@mips.com>
> Signed-off-by: Aleksandar Rikalo <arikalo@gmail.com>
> ---
>  arch/riscv/Kconfig               | 10 ++++++
>  arch/riscv/include/asm/atomic.h  | 52 +++++++++++++++++++++++++++++++-
>  arch/riscv/include/asm/bitops.h  | 45 +++++++++++++++++++++++++++
>  arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
>  arch/riscv/include/asm/futex.h   | 46 ++++++++++++++++++++++++++++
>  5 files changed, 168 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index cc63aef41e94..767538c27875 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -715,6 +715,16 @@ config RISCV_ISA_ZACAS
>  
>  	  If you don't know what to do here, say Y.
>  
> +config RISCV_ISA_ZALRSC_ONLY
> +	bool "Zalrsc extension support only"

I don't agree with the naming of and description for this option.
From the description below I'd imagine that you only care about a
platform where use of AMO instructions is only undesirable, but overall
the option implies that it can be used to build a kernel that can be
used on systems that only implement Zalrsc, which, even with your patch
applied, it cannot. I think, if we are going to merge something like this,
we should go the whole way and permit platforms that don't even emulate
the a extension. If not, the Kconfig option should be explicitly clear
that the a extension is still mandatory.

> +	default n

"default n" is the default, you shouldn't need this line.

> +	help
> +	   Use lr and sc to build all atomic functions. Some CPUs have
> +	   native support for lr and sc, but emulate amo instructions through
> +	   trap handlers that are slow.

Since you mention trap handlers here, it sounds like it may not be the
CPU itself that emulates it, but rather firmware?

Cheers,
Conor.
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index cc63aef41e94..767538c27875 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -715,6 +715,16 @@  config RISCV_ISA_ZACAS
 
 	  If you don't know what to do here, say Y.
 
+config RISCV_ISA_ZALRSC_ONLY
+	bool "Zalrsc extension support only"
+	default n
+	help
+	   Use lr and sc to build all atomic functions. Some CPUs have
+	   native support for lr and sc, but emulate amo instructions through
+	   trap handlers that are slow.
+
+	   If you don't know what to do here, say n.
+
 config TOOLCHAIN_HAS_ZBB
 	bool
 	default y
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 5b96c2f61adb..f484babecb9e 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -50,6 +50,7 @@  static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
  * have the AQ or RL bits set.  These don't return anything, so there's only
  * one version to worry about.
  */
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)		\
 static __always_inline							\
 void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
@@ -59,7 +60,23 @@  void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
 		: "+A" (v->counter)					\
 		: "r" (I)						\
 		: "memory");						\
-}									\
+}
+#else
+#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)		\
+static __always_inline							\
+void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
+{									\
+	register c_type ret, temp;					\
+	__asm__ __volatile__ (						\
+		"1:	lr." #asm_type " %1, %0\n"			\
+		"	" #asm_op " %2, %1, %3\n"			\
+		"	sc." #asm_type " %2, %2, %0\n"			\
+		"	bnez %2, 1b\n"					\
+		: "+A" (v->counter), "=&r" (ret), "=&r" (temp)		\
+		: "r" (I)						\
+		: "memory");						\
+}
+#endif
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS(op, asm_op, I)					\
@@ -84,6 +101,7 @@  ATOMIC_OPS(xor, xor,  i)
  * There's two flavors of these: the arithmatic ops have both fetch and return
  * versions, while the logical ops only have fetch versions.
  */
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)	\
 static __always_inline							\
 c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,		\
@@ -108,6 +126,38 @@  c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
 		: "memory");						\
 	return ret;							\
 }
+#else
+#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)	\
+static __always_inline							\
+c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,		\
+					     atomic##prefix##_t *v)	\
+{									\
+	register c_type ret, temp;					\
+	__asm__ __volatile__ (						\
+		"1:	lr." #asm_type " %1, %0\n"			\
+		"	" #asm_op " %2, %1, %3\n"			\
+		"	sc." #asm_type " %2, %2, %0\n"			\
+		"	bnez %2, 1b\n"					\
+		: "+A" (v->counter), "=&r" (ret), "=&r" (temp)		\
+		: "r" (I)						\
+		: "memory");						\
+	return ret;							\
+}									\
+static __always_inline							\
+c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
+{									\
+	register c_type ret, temp;					\
+	__asm__ __volatile__ (						\
+		"1:	lr." #asm_type ".aqrl %1, %0\n"			\
+		"	" #asm_op " %2, %1, %3\n"			\
+		"	sc." #asm_type ".aqrl %2, %2, %0\n"		\
+		"	bnez %2, 1b\n"					\
+		: "+A" (v->counter), "=&r" (ret), "=&r" (temp)		\
+		: "r" (I)						\
+		: "memory");						\
+	return ret;							\
+}
+#endif
 
 #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)	\
 static __always_inline							\
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index fae152ea0508..b51cb18f7d9e 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -187,12 +187,17 @@  static __always_inline int variable_fls(unsigned int x)
 
 #if (BITS_PER_LONG == 64)
 #define __AMO(op)	"amo" #op ".d"
+#define __LR	"lr.d"
+#define __SC	"sc.d"
 #elif (BITS_PER_LONG == 32)
 #define __AMO(op)	"amo" #op ".w"
+#define __LR	"lr.w"
+#define __SC	"sc.w"
 #else
 #error "Unexpected BITS_PER_LONG"
 #endif
 
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 #define __test_and_op_bit_ord(op, mod, nr, addr, ord)		\
 ({								\
 	unsigned long __res, __mask;				\
@@ -211,6 +216,33 @@  static __always_inline int variable_fls(unsigned int x)
 		: "+A" (addr[BIT_WORD(nr)])			\
 		: "r" (mod(BIT_MASK(nr)))			\
 		: "memory");
+#else
+#define __test_and_op_bit_ord(op, mod, nr, addr, ord)		\
+({								\
+	unsigned long __res, __mask, __temp;			\
+	__mask = BIT_MASK(nr);					\
+	__asm__ __volatile__ (					\
+		"1: " __LR #ord " %0, %1\n"			\
+		#op " %2, %0, %3\n"				\
+		__SC #ord " %2, %2, %1\n"			\
+		"bnez %2, 1b\n"					\
+		: "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)	\
+		: "r" (mod(__mask))				\
+		: "memory");					\
+	((__res & __mask) != 0);				\
+})
+
+#define __op_bit_ord(op, mod, nr, addr, ord)			\
+	unsigned long __res, __temp;				\
+	__asm__ __volatile__ (					\
+		"1: " __LR #ord " %0, %1\n"			\
+		#op " %2, %0, %3\n"				\
+		__SC #ord " %2, %2, %1\n"			\
+		"bnez %2, 1b\n"					\
+		: "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)	\
+		: "r" (mod(BIT_MASK(nr)))			\
+		: "memory")
+#endif
 
 #define __test_and_op_bit(op, mod, nr, addr) 			\
 	__test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
@@ -354,12 +386,25 @@  static inline void arch___clear_bit_unlock(
 static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
 		volatile unsigned long *addr)
 {
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 	unsigned long res;
 	__asm__ __volatile__ (
 		__AMO(xor) ".rl %0, %2, %1"
 		: "=r" (res), "+A" (*addr)
 		: "r" (__NOP(mask))
 		: "memory");
+#else
+	unsigned long res, temp;
+
+	__asm__ __volatile__ (
+		"1: " __LR ".rl %0, %1\n"
+		"xor %2, %0, %3\n"
+		__SC ".rl %2, %2, %1\n"
+		"bnez %2, 1b\n"
+		: "=&r" (res), "+A" (*addr), "=&r" (temp)
+		: "r" (__NOP(mask))
+		: "memory");
+#endif
 	return (res & BIT(7)) != 0;
 }
 
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 4cadc56220fe..881082b05110 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -51,6 +51,7 @@ 
 	}									\
 })
 
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 #define __arch_xchg(sfx, prepend, append, r, p, n)			\
 ({									\
 	__asm__ __volatile__ (						\
@@ -61,6 +62,21 @@ 
 		: "r" (n)						\
 		: "memory");						\
 })
+#else
+#define __arch_xchg(sfx, prepend, append, r, p, n)			\
+({									\
+	__typeof__(*(__ptr)) temp;					\
+	__asm__ __volatile__ (						\
+		prepend							\
+		"1:	lr" sfx " %0, %1\n"				\
+		"	sc" sfx " %2, %3, %1\n"				\
+		"	bnez %2, 1b\n"					\
+		append							\
+		: "=&r" (r), "+A" (*(p)), "=&r" (temp)			\
+		: "r" (n)						\
+		: "memory");						\
+})
+#endif
 
 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
 		   sc_append, swap_append)				\
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index fc8130f995c1..47297f47ec35 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -19,6 +19,7 @@ 
 #define __disable_user_access()		do { } while (0)
 #endif
 
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
 {								\
 	__enable_user_access();					\
@@ -32,16 +33,39 @@ 
 	: "memory");						\
 	__disable_user_access();				\
 }
+#else
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
+{								\
+	__enable_user_access();					\
+	__asm__ __volatile__ (					\
+	"1:	lr.w.aqrl %[ov], %[u]\n"			\
+	"	" insn "\n"					\
+	"	sc.w.aqrl %[t], %[t], %[u]\n"			\
+	"	bnez %[t], 1b\n"				\
+	"2:\n"							\
+	_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r])			\
+	: [r] "+r" (ret), [ov] "=&r" (oldval),			\
+	  [t] "=&r" (temp), [u] "+m" (*uaddr)			\
+	: [op] "Jr" (oparg)					\
+	: "memory");						\
+	__disable_user_access();				\
+}
+#endif
 
 static inline int
 arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 	int oldval = 0, ret = 0;
+#else
+	int oldval = 0, ret = 0, temp = 0;
+#endif
 
 	if (!access_ok(uaddr, sizeof(u32)))
 		return -EFAULT;
 
 	switch (op) {
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 	case FUTEX_OP_SET:
 		__futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
 				  ret, oldval, uaddr, oparg);
@@ -62,6 +86,28 @@  arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 		__futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
 				  ret, oldval, uaddr, oparg);
 		break;
+#else
+	case FUTEX_OP_SET:
+		__futex_atomic_op("mv %[t], %z[op]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("add %[t], %[ov], %z[op]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("or %[t], %[ov], %z[op]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("and %[t], %[ov], %z[op]",
+				  ret, oldval, uaddr, ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("xor %[t], %[ov], %z[op]",
+				  ret, oldval, uaddr, oparg);
+		break;
+#endif
 	default:
 		ret = -ENOSYS;
 	}