diff mbox series

[-fixes] riscv: Fix fully ordered LR/SC xchg[8|16]() implementations

Message ID 20240530075424.380557-1-alexghiti@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series [-fixes] riscv: Fix fully ordered LR/SC xchg[8|16]() implementations | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-1-test-2 fail .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-1-test-3 fail .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-1-test-6 warning .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh

Commit Message

Alexandre Ghiti May 30, 2024, 7:54 a.m. UTC
The fully ordered versions of xchg[8|16]() using LR/SC lack the
necessary memory barriers to guarantee the order.

Fix this by matching what is already implemented in the fully ordered
versions of cmpxchg() using LR/SC.

Suggested-by: Andrea Parri <parri.andrea@gmail.com>
Reported-by: Andrea Parri <parri.andrea@gmail.com>
Closes: https://lore.kernel.org/linux-riscv/ZlYbupL5XgzgA0MX@andrea/T/#u
Fixes: a8ed2b7a2c13 ("riscv/cmpxchg: Implement xchg for variables of size 1 and 2")
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 arch/riscv/include/asm/cmpxchg.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

Comments

Andrea Parri May 30, 2024, 11:54 a.m. UTC | #1
> -#define _arch_xchg(ptr, new, sfx, prepend, append)			\
> +#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, append)		\
>  ({									\
>  	__typeof__(ptr) __ptr = (ptr);					\
>  	__typeof__(*(__ptr)) __new = (new);				\
> @@ -55,15 +55,15 @@
>  	switch (sizeof(*__ptr)) {					\
>  	case 1:								\
>  	case 2:								\
> -		__arch_xchg_masked(prepend, append,			\
> +		__arch_xchg_masked(sc_sfx, prepend, append,		\
>  				   __ret, __ptr, __new);		\
>  		break;							\
>  	case 4:								\
> -		__arch_xchg(".w" sfx, prepend, append,			\
> +		__arch_xchg(".w" swap_sfx, prepend, append,		\
>  			      __ret, __ptr, __new);			\
>  		break;							\
>  	case 8:								\
> -		__arch_xchg(".d" sfx, prepend, append,			\
> +		__arch_xchg(".d" swap_sfx, prepend, append,		\
>  			      __ret, __ptr, __new);			\
>  		break;							\
>  	default:							\
> @@ -73,16 +73,16 @@
>  })
>  
>  #define arch_xchg_relaxed(ptr, x)					\
> -	_arch_xchg(ptr, x, "", "", "")
> +	_arch_xchg(ptr, x, "", "", "", "")
>  
>  #define arch_xchg_acquire(ptr, x)					\
> -	_arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
> +	_arch_xchg(ptr, x, "", "", "", RISCV_ACQUIRE_BARRIER)
>  
>  #define arch_xchg_release(ptr, x)					\
> -	_arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
> +	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "")
>  
>  #define arch_xchg(ptr, x)						\
> -	_arch_xchg(ptr, x, ".aqrl", "", "")
> +	_arch_xchg(ptr, x, ".rl", ".aqrl", "", "     fence rw, rw\n")

This does indeed fix the fully-ordered variant of xchg8/16().  But this
also changes the fully-ordered xchg32() to

  amoswap.w.aqrl  a4,a5,(s1)
  fence   rw,rw

(and similarly for xchg64()); we should be able to restore the original
mapping with the diff below on top of this patch.

  Andrea

P.S. Perhaps expand the width of the macros to avoid newlines (I didn't
do it keep the diff smaller).

P.S. With Zabha, we'd probably like to pass swap_sfx and swap_append as
well to __arch_xchg_masked().


diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index e1e564f5dc7ba..88c8bb7ec1c34 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -46,7 +46,8 @@
 		: "memory");						\
 })
 
-#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, append)		\
+#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
+		   sc_append, swap_append)				\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
 	__typeof__(*(__ptr)) __new = (new);				\
@@ -55,15 +56,15 @@
 	switch (sizeof(*__ptr)) {					\
 	case 1:								\
 	case 2:								\
-		__arch_xchg_masked(sc_sfx, prepend, append,		\
+		__arch_xchg_masked(sc_sfx, prepend, sc_append,		\
 				   __ret, __ptr, __new);		\
 		break;							\
 	case 4:								\
-		__arch_xchg(".w" swap_sfx, prepend, append,		\
+		__arch_xchg(".w" swap_sfx, prepend, swap_append,	\
 			      __ret, __ptr, __new);			\
 		break;							\
 	case 8:								\
-		__arch_xchg(".d" swap_sfx, prepend, append,		\
+		__arch_xchg(".d" swap_sfx, prepend, swap_append,	\
 			      __ret, __ptr, __new);			\
 		break;							\
 	default:							\
@@ -73,16 +74,16 @@
 })
 
 #define arch_xchg_relaxed(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", "", "")
+	_arch_xchg(ptr, x, "", "", "", "", "")
 
 #define arch_xchg_acquire(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", "", RISCV_ACQUIRE_BARRIER)
+	_arch_xchg(ptr, x, "", "", "", RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
 
 #define arch_xchg_release(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "")
+	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
 
 #define arch_xchg(ptr, x)						\
-	_arch_xchg(ptr, x, ".rl", ".aqrl", "", "     fence rw, rw\n")
+	_arch_xchg(ptr, x, ".rl", ".aqrl", "", "     fence rw, rw\n", "")
 
 #define xchg32(ptr, x)							\
 ({									\
Alexandre Ghiti May 30, 2024, 12:05 p.m. UTC | #2
Andrea,

On Thu, May 30, 2024 at 1:54 PM Andrea Parri <parri.andrea@gmail.com> wrote:
>
> > -#define _arch_xchg(ptr, new, sfx, prepend, append)                   \
> > +#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, append)              \
> >  ({                                                                   \
> >       __typeof__(ptr) __ptr = (ptr);                                  \
> >       __typeof__(*(__ptr)) __new = (new);                             \
> > @@ -55,15 +55,15 @@
> >       switch (sizeof(*__ptr)) {                                       \
> >       case 1:                                                         \
> >       case 2:                                                         \
> > -             __arch_xchg_masked(prepend, append,                     \
> > +             __arch_xchg_masked(sc_sfx, prepend, append,             \
> >                                  __ret, __ptr, __new);                \
> >               break;                                                  \
> >       case 4:                                                         \
> > -             __arch_xchg(".w" sfx, prepend, append,                  \
> > +             __arch_xchg(".w" swap_sfx, prepend, append,             \
> >                             __ret, __ptr, __new);                     \
> >               break;                                                  \
> >       case 8:                                                         \
> > -             __arch_xchg(".d" sfx, prepend, append,                  \
> > +             __arch_xchg(".d" swap_sfx, prepend, append,             \
> >                             __ret, __ptr, __new);                     \
> >               break;                                                  \
> >       default:                                                        \
> > @@ -73,16 +73,16 @@
> >  })
> >
> >  #define arch_xchg_relaxed(ptr, x)                                    \
> > -     _arch_xchg(ptr, x, "", "", "")
> > +     _arch_xchg(ptr, x, "", "", "", "")
> >
> >  #define arch_xchg_acquire(ptr, x)                                    \
> > -     _arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
> > +     _arch_xchg(ptr, x, "", "", "", RISCV_ACQUIRE_BARRIER)
> >
> >  #define arch_xchg_release(ptr, x)                                    \
> > -     _arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
> > +     _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "")
> >
> >  #define arch_xchg(ptr, x)                                            \
> > -     _arch_xchg(ptr, x, ".aqrl", "", "")
> > +     _arch_xchg(ptr, x, ".rl", ".aqrl", "", "     fence rw, rw\n")
>
> This does indeed fix the fully-ordered variant of xchg8/16().  But this
> also changes the fully-ordered xchg32() to
>
>   amoswap.w.aqrl  a4,a5,(s1)
>   fence   rw,rw
>
> (and similarly for xchg64()); we should be able to restore the original
> mapping with the diff below on top of this patch.

And you already told me that privately...Sorry, my mind has been
elsewhere lately...I'll fix that right now.

Sorry again and thanks,

Alex

>
>   Andrea
>
> P.S. Perhaps expand the width of the macros to avoid newlines (I didn't
> do it keep the diff smaller).
>
> P.S. With Zabha, we'd probably like to pass swap_sfx and swap_append as
> well to __arch_xchg_masked().
>
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index e1e564f5dc7ba..88c8bb7ec1c34 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -46,7 +46,8 @@
>                 : "memory");                                            \
>  })
>
> -#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, append)                \
> +#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,                        \
> +                  sc_append, swap_append)                              \
>  ({                                                                     \
>         __typeof__(ptr) __ptr = (ptr);                                  \
>         __typeof__(*(__ptr)) __new = (new);                             \
> @@ -55,15 +56,15 @@
>         switch (sizeof(*__ptr)) {                                       \
>         case 1:                                                         \
>         case 2:                                                         \
> -               __arch_xchg_masked(sc_sfx, prepend, append,             \
> +               __arch_xchg_masked(sc_sfx, prepend, sc_append,          \
>                                    __ret, __ptr, __new);                \
>                 break;                                                  \
>         case 4:                                                         \
> -               __arch_xchg(".w" swap_sfx, prepend, append,             \
> +               __arch_xchg(".w" swap_sfx, prepend, swap_append,        \
>                               __ret, __ptr, __new);                     \
>                 break;                                                  \
>         case 8:                                                         \
> -               __arch_xchg(".d" swap_sfx, prepend, append,             \
> +               __arch_xchg(".d" swap_sfx, prepend, swap_append,        \
>                               __ret, __ptr, __new);                     \
>                 break;                                                  \
>         default:                                                        \
> @@ -73,16 +74,16 @@
>  })
>
>  #define arch_xchg_relaxed(ptr, x)                                      \
> -       _arch_xchg(ptr, x, "", "", "", "")
> +       _arch_xchg(ptr, x, "", "", "", "", "")
>
>  #define arch_xchg_acquire(ptr, x)                                      \
> -       _arch_xchg(ptr, x, "", "", "", RISCV_ACQUIRE_BARRIER)
> +       _arch_xchg(ptr, x, "", "", "", RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
>
>  #define arch_xchg_release(ptr, x)                                      \
> -       _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "")
> +       _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
>
>  #define arch_xchg(ptr, x)                                              \
> -       _arch_xchg(ptr, x, ".rl", ".aqrl", "", "     fence rw, rw\n")
> +       _arch_xchg(ptr, x, ".rl", ".aqrl", "", "     fence rw, rw\n", "")
>
>  #define xchg32(ptr, x)                                                 \
>  ({                                                                     \
>
Andrea Parri May 30, 2024, 2:09 p.m. UTC | #3
> And you already told me that privately...Sorry, my mind has been
> elsewhere lately...I'll fix that right now.

Np.  While at it, one nit below.


> >  #define arch_xchg_relaxed(ptr, x)                                      \
> > -       _arch_xchg(ptr, x, "", "", "", "")
> > +       _arch_xchg(ptr, x, "", "", "", "", "")
> >
> >  #define arch_xchg_acquire(ptr, x)                                      \
> > -       _arch_xchg(ptr, x, "", "", "", RISCV_ACQUIRE_BARRIER)
> > +       _arch_xchg(ptr, x, "", "", "", RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
> >
> >  #define arch_xchg_release(ptr, x)                                      \
> > -       _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "")
> > +       _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
> >
> >  #define arch_xchg(ptr, x)                                              \
> > -       _arch_xchg(ptr, x, ".rl", ".aqrl", "", "     fence rw, rw\n")
> > +       _arch_xchg(ptr, x, ".rl", ".aqrl", "", "     fence rw, rw\n", "")

The plain string can be replaced with RISCV_FULL_BARRIER (cf. asm/fence.h)
to match the style/approach used elsewhere in this file.

  Andrea
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ddb002ed89de..e1e564f5dc7b 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -10,7 +10,7 @@ 
 
 #include <asm/fence.h>
 
-#define __arch_xchg_masked(prepend, append, r, p, n)			\
+#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n)		\
 ({									\
 	u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);			\
 	ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;	\
@@ -25,7 +25,7 @@ 
 	       "0:	lr.w %0, %2\n"					\
 	       "	and  %1, %0, %z4\n"				\
 	       "	or   %1, %1, %z3\n"				\
-	       "	sc.w %1, %1, %2\n"				\
+	       "	sc.w" sc_sfx " %1, %1, %2\n"			\
 	       "	bnez %1, 0b\n"					\
 	       append							\
 	       : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))	\
@@ -46,7 +46,7 @@ 
 		: "memory");						\
 })
 
-#define _arch_xchg(ptr, new, sfx, prepend, append)			\
+#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, append)		\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
 	__typeof__(*(__ptr)) __new = (new);				\
@@ -55,15 +55,15 @@ 
 	switch (sizeof(*__ptr)) {					\
 	case 1:								\
 	case 2:								\
-		__arch_xchg_masked(prepend, append,			\
+		__arch_xchg_masked(sc_sfx, prepend, append,		\
 				   __ret, __ptr, __new);		\
 		break;							\
 	case 4:								\
-		__arch_xchg(".w" sfx, prepend, append,			\
+		__arch_xchg(".w" swap_sfx, prepend, append,		\
 			      __ret, __ptr, __new);			\
 		break;							\
 	case 8:								\
-		__arch_xchg(".d" sfx, prepend, append,			\
+		__arch_xchg(".d" swap_sfx, prepend, append,		\
 			      __ret, __ptr, __new);			\
 		break;							\
 	default:							\
@@ -73,16 +73,16 @@ 
 })
 
 #define arch_xchg_relaxed(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", "")
+	_arch_xchg(ptr, x, "", "", "", "")
 
 #define arch_xchg_acquire(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
+	_arch_xchg(ptr, x, "", "", "", RISCV_ACQUIRE_BARRIER)
 
 #define arch_xchg_release(ptr, x)					\
-	_arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
+	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "")
 
 #define arch_xchg(ptr, x)						\
-	_arch_xchg(ptr, x, ".aqrl", "", "")
+	_arch_xchg(ptr, x, ".rl", ".aqrl", "", "     fence rw, rw\n")
 
 #define xchg32(ptr, x)							\
 ({									\