[v5,10/10] arm64: atomics: Use K constraint when toolchain appears to support it
diff mbox series

Message ID 20190829154834.26547-11-will@kernel.org
State New
Headers show
Series
  • arm64: avoid out-of-line ll/sc atomics
Related show

Commit Message

Will Deacon Aug. 29, 2019, 3:48 p.m. UTC
The 'K' constraint is a documented AArch64 machine constraint supported
by GCC for matching integer constants that can be used with a 32-bit
logical instruction. Unfortunately, some released compilers erroneously
accept the immediate '4294967295' for this constraint, which is later
refused by GAS at assembly time. This had led us to avoid the use of
the 'K' constraint altogether.

Instead, detect whether the compiler is up to the job when building the
kernel and pass the 'K' constraint to our 32-bit atomic macros when it
appears to be supported.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Makefile                   |  9 ++++++-
 arch/arm64/include/asm/atomic_ll_sc.h | 47 +++++++++++++++++++++++------------
 2 files changed, 39 insertions(+), 17 deletions(-)

Comments

Will Deacon Aug. 29, 2019, 4:54 p.m. UTC | #1
On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> index 95091f72228b..7fa042f5444e 100644
> --- a/arch/arm64/include/asm/atomic_ll_sc.h
> +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> @@ -23,6 +23,10 @@ asm_ops "\n"								\
>  #define __LL_SC_FALLBACK(asm_ops) asm_ops
>  #endif
>  
> +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> +#define K
> +#endif

Bah, I need to use something like __stringify when the constraint is used
in order for this to get expanded properly. Updated diff below.

Will

--->8

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 61de992bbea3..0cef056b5fb1 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -39,6 +39,12 @@ $(warning LSE atomics not supported by binutils)
   endif
 endif
 
+cc_has_k_constraint := $(call try-run,echo				\
+	'int main(void) {						\
+		asm volatile("and w0, w0, %w0" :: "K" (4294967295));	\
+		return 0;						\
+	}' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1)
+
 ifeq ($(CONFIG_ARM64), y)
 brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)
 
@@ -63,7 +69,8 @@ ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
   endif
 endif
 
-KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
+KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)	\
+		   $(compat_vdso) $(cc_has_k_constraint)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS	+= $(call cc-disable-warning, psabi)
 KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst) $(compat_vdso)
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 95091f72228b..7b012148bfd6 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -10,6 +10,8 @@
 #ifndef __ASM_ATOMIC_LL_SC_H
 #define __ASM_ATOMIC_LL_SC_H
 
+#include <linux/stringify.h>
+
 #if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE)
 #define __LL_SC_FALLBACK(asm_ops)					\
 "	b	3f\n"							\
@@ -23,6 +25,10 @@ asm_ops "\n"								\
 #define __LL_SC_FALLBACK(asm_ops) asm_ops
 #endif
 
+#ifndef CONFIG_CC_HAS_K_CONSTRAINT
+#define K
+#endif
+
 /*
  * AArch64 UP and SMP safe atomic ops.  We use load exclusive and
  * store exclusive to ensure that these are atomic.  We may loop
@@ -44,7 +50,7 @@ __ll_sc_atomic_##op(int i, atomic_t *v)					\
 "	stxr	%w1, %w0, %2\n"						\
 "	cbnz	%w1, 1b\n")						\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
-	: #constraint "r" (i));						\
+	: __stringify(constraint) "r" (i));				\
 }
 
 #define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
@@ -63,7 +69,7 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v)			\
 "	cbnz	%w1, 1b\n"						\
 "	" #mb )								\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
-	: #constraint "r" (i)						\
+	: __stringify(constraint) "r" (i)				\
 	: cl);								\
 									\
 	return result;							\
@@ -85,7 +91,7 @@ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v)			\
 "	cbnz	%w2, 1b\n"						\
 "	" #mb )								\
 	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
-	: #constraint "r" (i)						\
+	: __stringify(constraint) "r" (i)				\
 	: cl);								\
 									\
 	return result;							\
@@ -113,10 +119,15 @@ ATOMIC_OPS(sub, sub, J)
 	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
 	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS(and, and, )
+ATOMIC_OPS(and, and, K)
+ATOMIC_OPS(or, orr, K)
+ATOMIC_OPS(xor, eor, K)
+/*
+ * GAS converts the mysterious and undocumented BIC (immediate) alias to
+ * an AND (immediate) instruction with the immediate inverted. We don't
+ * have a constraint for this, so fall back to register.
+ */
 ATOMIC_OPS(andnot, bic, )
-ATOMIC_OPS(or, orr, )
-ATOMIC_OPS(xor, eor, )
 
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
@@ -138,7 +149,7 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v)				\
 "	stxr	%w1, %0, %2\n"						\
 "	cbnz	%w1, 1b")						\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
-	: #constraint "r" (i));						\
+	: __stringify(constraint) "r" (i));				\
 }
 
 #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
@@ -157,7 +168,7 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)		\
 "	cbnz	%w1, 1b\n"						\
 "	" #mb )								\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
-	: #constraint "r" (i)						\
+	: __stringify(constraint) "r" (i)				\
 	: cl);								\
 									\
 	return result;							\
@@ -179,7 +190,7 @@ __ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v)		\
 "	cbnz	%w2, 1b\n"						\
 "	" #mb )								\
 	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
-	: #constraint "r" (i)						\
+	: __stringify(constraint) "r" (i)				\
 	: cl);								\
 									\
 	return result;							\
@@ -208,9 +219,14 @@ ATOMIC64_OPS(sub, sub, J)
 	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
 ATOMIC64_OPS(and, and, L)
-ATOMIC64_OPS(andnot, bic, )
 ATOMIC64_OPS(or, orr, L)
 ATOMIC64_OPS(xor, eor, L)
+/*
+ * GAS converts the mysterious and undocumented BIC (immediate) alias to
+ * an AND (immediate) instruction with the immediate inverted. We don't
+ * have a constraint for this, so fall back to register.
+ */
+ATOMIC64_OPS(andnot, bic, )
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
@@ -269,7 +285,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
 	"2:")								\
 	: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),			\
 	  [v] "+Q" (*(u##sz *)ptr)					\
-	: [old] #constraint "r" (old), [new] "r" (new)			\
+	: [old] __stringify(constraint) "r" (old), [new] "r" (new)	\
 	: cl);								\
 									\
 	return oldval;							\
@@ -280,21 +296,21 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
  * handle the 'K' constraint for the value 4294967295 - thus we use no
  * constraint for 32 bit operations.
  */
-__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , )
-__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , )
-__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , )
+__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , K)
+__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , K)
+__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , K)
 __CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         , L)
-__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", )
-__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", )
-__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", )
+__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", K)
+__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", K)
+__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", K)
 __CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory", L)
-__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", )
-__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", )
-__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", )
+__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", K)
+__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", K)
+__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", K)
 __CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory", L)
-__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", )
-__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", )
-__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", )
+__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", K)
+__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", K)
+__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", K)
 __CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory", L)
 
 #undef __CMPXCHG_CASE
@@ -332,5 +348,6 @@ __CMPXCHG_DBL(   ,        ,  ,         )
 __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
 
 #undef __CMPXCHG_DBL
+#undef K
 
 #endif	/* __ASM_ATOMIC_LL_SC_H */
Nick Desaulniers Aug. 29, 2019, 5:45 p.m. UTC | #2
On Thu, Aug 29, 2019 at 9:55 AM Will Deacon <will@kernel.org> wrote:
>
> On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > index 95091f72228b..7fa042f5444e 100644
> > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > @@ -23,6 +23,10 @@ asm_ops "\n"                                                               \
> >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> >  #endif
> >
> > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > +#define K
> > +#endif
>
> Bah, I need to use something like __stringify when the constraint is used
> in order for this to get expanded properly. Updated diff below.
>
> Will

Hi Will, thanks for cc'ing me on the patch set.  I'd be happy to help
test w/ Clang.  Would you mind pushing this set with the below diff to
a publicly available tree+branch I can pull from?  (I haven't yet
figured out how to download multiple diff's from gmail rather than 1
by 1, and TBH I'd rather just use git).

>
> --->8
>
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index 61de992bbea3..0cef056b5fb1 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -39,6 +39,12 @@ $(warning LSE atomics not supported by binutils)
>    endif
>  endif
>
> +cc_has_k_constraint := $(call try-run,echo                             \
> +       'int main(void) {                                               \
> +               asm volatile("and w0, w0, %w0" :: "K" (4294967295));    \
> +               return 0;                                               \
> +       }' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1)
> +
>  ifeq ($(CONFIG_ARM64), y)
>  brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)
>
> @@ -63,7 +69,8 @@ ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
>    endif
>  endif
>
> -KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
> +KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr) $(brokengasinst)     \
> +                  $(compat_vdso) $(cc_has_k_constraint)
>  KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
>  KBUILD_CFLAGS  += $(call cc-disable-warning, psabi)
>  KBUILD_AFLAGS  += $(lseinstr) $(brokengasinst) $(compat_vdso)
> diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> index 95091f72228b..7b012148bfd6 100644
> --- a/arch/arm64/include/asm/atomic_ll_sc.h
> +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> @@ -10,6 +10,8 @@
>  #ifndef __ASM_ATOMIC_LL_SC_H
>  #define __ASM_ATOMIC_LL_SC_H
>
> +#include <linux/stringify.h>
> +
>  #if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE)
>  #define __LL_SC_FALLBACK(asm_ops)                                      \
>  "      b       3f\n"                                                   \
> @@ -23,6 +25,10 @@ asm_ops "\n"                                                         \
>  #define __LL_SC_FALLBACK(asm_ops) asm_ops
>  #endif
>
> +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> +#define K
> +#endif
> +
>  /*
>   * AArch64 UP and SMP safe atomic ops.  We use load exclusive and
>   * store exclusive to ensure that these are atomic.  We may loop
> @@ -44,7 +50,7 @@ __ll_sc_atomic_##op(int i, atomic_t *v)                                       \
>  "      stxr    %w1, %w0, %2\n"                                         \
>  "      cbnz    %w1, 1b\n")                                             \
>         : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
> -       : #constraint "r" (i));                                         \
> +       : __stringify(constraint) "r" (i));                             \
>  }
>
>  #define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
> @@ -63,7 +69,7 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v)                        \
>  "      cbnz    %w1, 1b\n"                                              \
>  "      " #mb )                                                         \
>         : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
> -       : #constraint "r" (i)                                           \
> +       : __stringify(constraint) "r" (i)                               \
>         : cl);                                                          \
>                                                                         \
>         return result;                                                  \
> @@ -85,7 +91,7 @@ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v)                   \
>  "      cbnz    %w2, 1b\n"                                              \
>  "      " #mb )                                                         \
>         : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
> -       : #constraint "r" (i)                                           \
> +       : __stringify(constraint) "r" (i)                               \
>         : cl);                                                          \
>                                                                         \
>         return result;                                                  \
> @@ -113,10 +119,15 @@ ATOMIC_OPS(sub, sub, J)
>         ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
>         ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
>
> -ATOMIC_OPS(and, and, )
> +ATOMIC_OPS(and, and, K)
> +ATOMIC_OPS(or, orr, K)
> +ATOMIC_OPS(xor, eor, K)
> +/*
> + * GAS converts the mysterious and undocumented BIC (immediate) alias to
> + * an AND (immediate) instruction with the immediate inverted. We don't
> + * have a constraint for this, so fall back to register.
> + */
>  ATOMIC_OPS(andnot, bic, )
> -ATOMIC_OPS(or, orr, )
> -ATOMIC_OPS(xor, eor, )
>
>  #undef ATOMIC_OPS
>  #undef ATOMIC_FETCH_OP
> @@ -138,7 +149,7 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v)                         \
>  "      stxr    %w1, %0, %2\n"                                          \
>  "      cbnz    %w1, 1b")                                               \
>         : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
> -       : #constraint "r" (i));                                         \
> +       : __stringify(constraint) "r" (i));                             \
>  }
>
>  #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
> @@ -157,7 +168,7 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)          \
>  "      cbnz    %w1, 1b\n"                                              \
>  "      " #mb )                                                         \
>         : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
> -       : #constraint "r" (i)                                           \
> +       : __stringify(constraint) "r" (i)                               \
>         : cl);                                                          \
>                                                                         \
>         return result;                                                  \
> @@ -179,7 +190,7 @@ __ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v)             \
>  "      cbnz    %w2, 1b\n"                                              \
>  "      " #mb )                                                         \
>         : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
> -       : #constraint "r" (i)                                           \
> +       : __stringify(constraint) "r" (i)                               \
>         : cl);                                                          \
>                                                                         \
>         return result;                                                  \
> @@ -208,9 +219,14 @@ ATOMIC64_OPS(sub, sub, J)
>         ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
>
>  ATOMIC64_OPS(and, and, L)
> -ATOMIC64_OPS(andnot, bic, )
>  ATOMIC64_OPS(or, orr, L)
>  ATOMIC64_OPS(xor, eor, L)
> +/*
> + * GAS converts the mysterious and undocumented BIC (immediate) alias to
> + * an AND (immediate) instruction with the immediate inverted. We don't
> + * have a constraint for this, so fall back to register.
> + */
> +ATOMIC64_OPS(andnot, bic, )
>
>  #undef ATOMIC64_OPS
>  #undef ATOMIC64_FETCH_OP
> @@ -269,7 +285,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,                        \
>         "2:")                                                           \
>         : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),                   \
>           [v] "+Q" (*(u##sz *)ptr)                                      \
> -       : [old] #constraint "r" (old), [new] "r" (new)                  \
> +       : [old] __stringify(constraint) "r" (old), [new] "r" (new)      \
>         : cl);                                                          \
>                                                                         \
>         return oldval;                                                  \
> @@ -280,21 +296,21 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,                      \
>   * handle the 'K' constraint for the value 4294967295 - thus we use no
>   * constraint for 32 bit operations.
>   */
> -__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , )
> -__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , )
> -__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , )
> +__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , K)
> +__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , K)
> +__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , K)
>  __CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         , L)
> -__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", )
> -__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", )
> -__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", )
> +__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", K)
> +__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", K)
> +__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", K)
>  __CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory", L)
> -__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", )
> -__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", )
> -__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", )
> +__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", K)
> +__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", K)
> +__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", K)
>  __CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory", L)
> -__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", )
> -__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", )
> -__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", )
> +__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", K)
> +__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", K)
> +__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", K)
>  __CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory", L)
>
>  #undef __CMPXCHG_CASE
> @@ -332,5 +348,6 @@ __CMPXCHG_DBL(   ,        ,  ,         )
>  __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
>
>  #undef __CMPXCHG_DBL
> +#undef K
>
>  #endif /* __ASM_ATOMIC_LL_SC_H */
Will Deacon Aug. 29, 2019, 9:53 p.m. UTC | #3
On Thu, Aug 29, 2019 at 10:45:57AM -0700, Nick Desaulniers wrote:
> On Thu, Aug 29, 2019 at 9:55 AM Will Deacon <will@kernel.org> wrote:
> >
> > On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > > index 95091f72228b..7fa042f5444e 100644
> > > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > > @@ -23,6 +23,10 @@ asm_ops "\n"                                                               \
> > >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> > >  #endif
> > >
> > > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > > +#define K
> > > +#endif
> >
> > Bah, I need to use something like __stringify when the constraint is used
> > in order for this to get expanded properly. Updated diff below.
> >
> > Will
> 
> Hi Will, thanks for cc'ing me on the patch set.  I'd be happy to help
> test w/ Clang.  Would you mind pushing this set with the below diff to
> a publicly available tree+branch I can pull from?  (I haven't yet
> figured out how to download multiple diff's from gmail rather than 1
> by 1, and TBH I'd rather just use git).

Sorry, of course. I should've mentioned this in the cover letter:

https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/log/?h=for-next/atomics

FWIW, I did test (defconfig + boot) with clang, but this does mean that LSE
atomics are disabled for that configuration when asm goto is not supported.

Will
Andrew Murray Aug. 29, 2019, 11:49 p.m. UTC | #4
On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> The 'K' constraint is a documented AArch64 machine constraint supported
> by GCC for matching integer constants that can be used with a 32-bit
> logical instruction. Unfortunately, some released compilers erroneously
> accept the immediate '4294967295' for this constraint, which is later
> refused by GAS at assembly time. This had led us to avoid the use of
> the 'K' constraint altogether.
> 
> Instead, detect whether the compiler is up to the job when building the
> kernel and pass the 'K' constraint to our 32-bit atomic macros when it
> appears to be supported.
> 
> Signed-off-by: Will Deacon <will@kernel.org>

See my comments within this email thread, but for this patch as it is:

Reviewed-by: Andrew Murray <andrew.murray@arm.com>

> ---
>  arch/arm64/Makefile                   |  9 ++++++-
>  arch/arm64/include/asm/atomic_ll_sc.h | 47 +++++++++++++++++++++++------------
>  2 files changed, 39 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index 61de992bbea3..0cef056b5fb1 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -39,6 +39,12 @@ $(warning LSE atomics not supported by binutils)
>    endif
>  endif
>  
> +cc_has_k_constraint := $(call try-run,echo				\
> +	'int main(void) {						\
> +		asm volatile("and w0, w0, %w0" :: "K" (4294967295));	\
> +		return 0;						\
> +	}' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1)
> +
>  ifeq ($(CONFIG_ARM64), y)
>  brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)
>  
> @@ -63,7 +69,8 @@ ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
>    endif
>  endif
>  
> -KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
> +KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)	\
> +		   $(compat_vdso) $(cc_has_k_constraint)
>  KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
>  KBUILD_CFLAGS	+= $(call cc-disable-warning, psabi)
>  KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst) $(compat_vdso)
> diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> index 95091f72228b..7fa042f5444e 100644
> --- a/arch/arm64/include/asm/atomic_ll_sc.h
> +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> @@ -23,6 +23,10 @@ asm_ops "\n"								\
>  #define __LL_SC_FALLBACK(asm_ops) asm_ops
>  #endif
>  
> +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> +#define K
> +#endif
> +
>  /*
>   * AArch64 UP and SMP safe atomic ops.  We use load exclusive and
>   * store exclusive to ensure that these are atomic.  We may loop
> @@ -113,10 +117,15 @@ ATOMIC_OPS(sub, sub, J)
>  	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
>  	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
>  
> -ATOMIC_OPS(and, and, )
> +ATOMIC_OPS(and, and, K)
> +ATOMIC_OPS(or, orr, K)
> +ATOMIC_OPS(xor, eor, K)
> +/*
> + * GAS converts the mysterious and undocumented BIC (immediate) alias to
> + * an AND (immediate) instruction with the immediate inverted. We don't
> + * have a constraint for this, so fall back to register.
> + */
>  ATOMIC_OPS(andnot, bic, )
> -ATOMIC_OPS(or, orr, )
> -ATOMIC_OPS(xor, eor, )
>  
>  #undef ATOMIC_OPS
>  #undef ATOMIC_FETCH_OP
> @@ -208,9 +217,14 @@ ATOMIC64_OPS(sub, sub, J)
>  	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
>  
>  ATOMIC64_OPS(and, and, L)
> -ATOMIC64_OPS(andnot, bic, )
>  ATOMIC64_OPS(or, orr, L)
>  ATOMIC64_OPS(xor, eor, L)
> +/*
> + * GAS converts the mysterious and undocumented BIC (immediate) alias to
> + * an AND (immediate) instruction with the immediate inverted. We don't
> + * have a constraint for this, so fall back to register.
> + */
> +ATOMIC64_OPS(andnot, bic, )
>  
>  #undef ATOMIC64_OPS
>  #undef ATOMIC64_FETCH_OP
> @@ -280,21 +294,21 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
>   * handle the 'K' constraint for the value 4294967295 - thus we use no
>   * constraint for 32 bit operations.
>   */
> -__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , )
> -__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , )
> -__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , )
> +__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , K)
> +__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , K)
> +__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , K)
>  __CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         , L)
> -__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", )
> -__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", )
> -__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", )
> +__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", K)
> +__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", K)
> +__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", K)
>  __CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory", L)
> -__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", )
> -__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", )
> -__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", )
> +__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", K)
> +__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", K)
> +__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", K)
>  __CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory", L)
> -__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", )
> -__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", )
> -__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", )
> +__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", K)
> +__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", K)
> +__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", K)
>  __CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory", L)
>  
>  #undef __CMPXCHG_CASE
> @@ -332,5 +346,6 @@ __CMPXCHG_DBL(   ,        ,  ,         )
>  __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
>  
>  #undef __CMPXCHG_DBL
> +#undef K
>  
>  #endif	/* __ASM_ATOMIC_LL_SC_H */
> -- 
> 2.11.0
>
Andrew Murray Aug. 30, 2019, 12:08 a.m. UTC | #5
On Thu, Aug 29, 2019 at 05:54:58PM +0100, Will Deacon wrote:
> On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > index 95091f72228b..7fa042f5444e 100644
> > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > @@ -23,6 +23,10 @@ asm_ops "\n"								\
> >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> >  #endif
> >  
> > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > +#define K
> > +#endif
> 
> Bah, I need to use something like __stringify when the constraint is used
> in order for this to get expanded properly. Updated diff below.

I don't think the changes in your updated diff are required. We successfully
combine 'asm_op' with the remainder of the assembly string without using
 __stringify, and this is no different to how the original patch combined
'constraint' with "r".

You can verify this by looking at the preprocessed .i files generated with
something like:

make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- drivers/spi/spi-rockchip.i

I see no difference (with GCC 7.3.1) between the original approach and your
use of __stringify. Incidentally you end up with "K" "r" instead of "Kr" but
it seems to have the desired effect (e.g. supress/emit out of range errors).
I have a couple of macros that resolves this to "Kr" but I don't think it's
necessary.

Did you find that it didn't work without your changes? I found it hard to
reproduce the out-of-range errors until I made the following change, I could
then easily see the effect of changing the constraint:

        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
-       : #constraint "r" (i));                                         \
+       : #constraint) "r" (4294967295));                                               \
 }


Thanks,

Andrew Murray

> 
> Will
> 
> --->8
> 
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index 61de992bbea3..0cef056b5fb1 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -39,6 +39,12 @@ $(warning LSE atomics not supported by binutils)
>    endif
>  endif
>  
> +cc_has_k_constraint := $(call try-run,echo				\
> +	'int main(void) {						\
> +		asm volatile("and w0, w0, %w0" :: "K" (4294967295));	\
> +		return 0;						\
> +	}' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1)
> +
>  ifeq ($(CONFIG_ARM64), y)
>  brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)
>  
> @@ -63,7 +69,8 @@ ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
>    endif
>  endif
>  
> -KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
> +KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)	\
> +		   $(compat_vdso) $(cc_has_k_constraint)
>  KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
>  KBUILD_CFLAGS	+= $(call cc-disable-warning, psabi)
>  KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst) $(compat_vdso)
> diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> index 95091f72228b..7b012148bfd6 100644
> --- a/arch/arm64/include/asm/atomic_ll_sc.h
> +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> @@ -10,6 +10,8 @@
>  #ifndef __ASM_ATOMIC_LL_SC_H
>  #define __ASM_ATOMIC_LL_SC_H
>  
> +#include <linux/stringify.h>
> +
>  #if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE)
>  #define __LL_SC_FALLBACK(asm_ops)					\
>  "	b	3f\n"							\
> @@ -23,6 +25,10 @@ asm_ops "\n"								\
>  #define __LL_SC_FALLBACK(asm_ops) asm_ops
>  #endif
>  
> +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> +#define K
> +#endif
> +
>  /*
>   * AArch64 UP and SMP safe atomic ops.  We use load exclusive and
>   * store exclusive to ensure that these are atomic.  We may loop
> @@ -44,7 +50,7 @@ __ll_sc_atomic_##op(int i, atomic_t *v)					\
>  "	stxr	%w1, %w0, %2\n"						\
>  "	cbnz	%w1, 1b\n")						\
>  	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
> -	: #constraint "r" (i));						\
> +	: __stringify(constraint) "r" (i));				\
>  }
>  
>  #define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
> @@ -63,7 +69,7 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v)			\
>  "	cbnz	%w1, 1b\n"						\
>  "	" #mb )								\
>  	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
> -	: #constraint "r" (i)						\
> +	: __stringify(constraint) "r" (i)				\
>  	: cl);								\
>  									\
>  	return result;							\
> @@ -85,7 +91,7 @@ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v)			\
>  "	cbnz	%w2, 1b\n"						\
>  "	" #mb )								\
>  	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
> -	: #constraint "r" (i)						\
> +	: __stringify(constraint) "r" (i)				\
>  	: cl);								\
>  									\
>  	return result;							\
> @@ -113,10 +119,15 @@ ATOMIC_OPS(sub, sub, J)
>  	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
>  	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
>  
> -ATOMIC_OPS(and, and, )
> +ATOMIC_OPS(and, and, K)
> +ATOMIC_OPS(or, orr, K)
> +ATOMIC_OPS(xor, eor, K)
> +/*
> + * GAS converts the mysterious and undocumented BIC (immediate) alias to
> + * an AND (immediate) instruction with the immediate inverted. We don't
> + * have a constraint for this, so fall back to register.
> + */
>  ATOMIC_OPS(andnot, bic, )
> -ATOMIC_OPS(or, orr, )
> -ATOMIC_OPS(xor, eor, )
>  
>  #undef ATOMIC_OPS
>  #undef ATOMIC_FETCH_OP
> @@ -138,7 +149,7 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v)				\
>  "	stxr	%w1, %0, %2\n"						\
>  "	cbnz	%w1, 1b")						\
>  	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
> -	: #constraint "r" (i));						\
> +	: __stringify(constraint) "r" (i));				\
>  }
>  
>  #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
> @@ -157,7 +168,7 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)		\
>  "	cbnz	%w1, 1b\n"						\
>  "	" #mb )								\
>  	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
> -	: #constraint "r" (i)						\
> +	: __stringify(constraint) "r" (i)				\
>  	: cl);								\
>  									\
>  	return result;							\
> @@ -179,7 +190,7 @@ __ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v)		\
>  "	cbnz	%w2, 1b\n"						\
>  "	" #mb )								\
>  	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
> -	: #constraint "r" (i)						\
> +	: __stringify(constraint) "r" (i)				\
>  	: cl);								\
>  									\
>  	return result;							\
> @@ -208,9 +219,14 @@ ATOMIC64_OPS(sub, sub, J)
>  	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
>  
>  ATOMIC64_OPS(and, and, L)
> -ATOMIC64_OPS(andnot, bic, )
>  ATOMIC64_OPS(or, orr, L)
>  ATOMIC64_OPS(xor, eor, L)
> +/*
> + * GAS converts the mysterious and undocumented BIC (immediate) alias to
> + * an AND (immediate) instruction with the immediate inverted. We don't
> + * have a constraint for this, so fall back to register.
> + */
> +ATOMIC64_OPS(andnot, bic, )
>  
>  #undef ATOMIC64_OPS
>  #undef ATOMIC64_FETCH_OP
> @@ -269,7 +285,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
>  	"2:")								\
>  	: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),			\
>  	  [v] "+Q" (*(u##sz *)ptr)					\
> -	: [old] #constraint "r" (old), [new] "r" (new)			\
> +	: [old] __stringify(constraint) "r" (old), [new] "r" (new)	\
>  	: cl);								\
>  									\
>  	return oldval;							\
> @@ -280,21 +296,21 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
>   * handle the 'K' constraint for the value 4294967295 - thus we use no
>   * constraint for 32 bit operations.
>   */
> -__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , )
> -__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , )
> -__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , )
> +__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , K)
> +__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , K)
> +__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , K)
>  __CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         , L)
> -__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", )
> -__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", )
> -__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", )
> +__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", K)
> +__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", K)
> +__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", K)
>  __CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory", L)
> -__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", )
> -__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", )
> -__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", )
> +__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", K)
> +__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", K)
> +__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", K)
>  __CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory", L)
> -__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", )
> -__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", )
> -__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", )
> +__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", K)
> +__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", K)
> +__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", K)
>  __CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory", L)
>  
>  #undef __CMPXCHG_CASE
> @@ -332,5 +348,6 @@ __CMPXCHG_DBL(   ,        ,  ,         )
>  __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
>  
>  #undef __CMPXCHG_DBL
> +#undef K
>  
>  #endif	/* __ASM_ATOMIC_LL_SC_H */
Will Deacon Aug. 30, 2019, 7:52 a.m. UTC | #6
On Fri, Aug 30, 2019 at 01:08:03AM +0100, Andrew Murray wrote:
> On Thu, Aug 29, 2019 at 05:54:58PM +0100, Will Deacon wrote:
> > On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > > index 95091f72228b..7fa042f5444e 100644
> > > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > > @@ -23,6 +23,10 @@ asm_ops "\n"								\
> > >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> > >  #endif
> > >  
> > > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > > +#define K
> > > +#endif
> > 
> > Bah, I need to use something like __stringify when the constraint is used
> > in order for this to get expanded properly. Updated diff below.
> 
> I don't think the changes in your updated diff are required. We successfully
> combine 'asm_op' with the remainder of the assembly string without using
>  __stringify, and this is no different to how the original patch combined
> 'constraint' with "r".

It's a hack: __stringify expands its arguments, so I figured I may as well
use that rather than do it manually with an extra macro.

> You can verify this by looking at the preprocessed .i files generated with
> something like:
> 
> make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- drivers/spi/spi-rockchip.i
> 
> I see no difference (with GCC 7.3.1) between the original approach and your
> use of __stringify. Incidentally you end up with "K" "r" instead of "Kr" but
> it seems to have the desired effect (e.g. supress/emit out of range errors).
> I have a couple of macros that resolves this to "Kr" but I don't think it's
> necessary.
> 
> Did you find that it didn't work without your changes? I found it hard to
> reproduce the out-of-range errors until I made the following change, I could
> then easily see the effect of changing the constraint:
> 
>         : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
> -       : #constraint "r" (i));                                         \
> +       : #constraint) "r" (4294967295));                                               \
>  }

Without the __stringify I get a compilation failure when building
kernel/panic.o because it tries to cmpxchg a 32-bit variable with -1
(PANIC_CPU_INVALID). Looking at panic.s, I see that constraint parameter
isn't being expanded. For example if I do:

  #ifndef CONFIG_CC_HAS_K_CONSTRAINT
  #define INVALID_CONSTRAINT
  #else
  #define INVALID_CONSTRAINT	K
  #endif

and then pass INVALID_CONSTRAINT to the generator macros, we'll end up
with INVALID_CONSTRAINT in the .s file and gas will barf.

The reason I didn't see this initially is because my silly testcase had
a typo and was using atomic_add instead of atomic_and.

Will
Andrew Murray Aug. 30, 2019, 9:11 a.m. UTC | #7
On Fri, Aug 30, 2019 at 08:52:20AM +0100, Will Deacon wrote:
> On Fri, Aug 30, 2019 at 01:08:03AM +0100, Andrew Murray wrote:
> > On Thu, Aug 29, 2019 at 05:54:58PM +0100, Will Deacon wrote:
> > > On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > > > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > index 95091f72228b..7fa042f5444e 100644
> > > > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > > > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > @@ -23,6 +23,10 @@ asm_ops "\n"								\
> > > >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> > > >  #endif

I downloaded your original patches and tried them, and also got the
build error. After playing with this I think something isn't quite right...

This is your current test:

 echo 'int main(void) {asm volatile("and w0, w0, %w0" :: "K" (4294967295)); return 0; }' |  aarch64-linux-gnu-gcc -S -x c  - ; echo $?

But on my machine this returns 0, i.e. no error. If I drop the -S:

 echo 'int main(void) {asm volatile("and w0, w0, %w0" :: "K" (4294967295)); return 0; }' |  aarch64-linux-gnu-gcc -x c  - ; echo $?

Then this returns 1.

So I guess the -S flag or something similar is needed.

> > > >  
> > > > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > > > +#define K
> > > > +#endif

Also, isn't this the wrong way around?

It looks like when using $(call try-run,echo - it's the last argument that is
used when the condition is false. Thus at present we seem to be setting 
CONFIG_CC_HAS_K_CONSTRAINT when 'K' is broken.


> > > 
> > > Bah, I need to use something like __stringify when the constraint is used
> > > in order for this to get expanded properly. Updated diff below.
> > 
> > I don't think the changes in your updated diff are required. We successfully
> > combine 'asm_op' with the remainder of the assembly string without using
> >  __stringify, and this is no different to how the original patch combined
> > 'constraint' with "r".
> 
> It's a hack: __stringify expands its arguments, so I figured I may as well
> use that rather than do it manually with an extra macro.
> 
> > You can verify this by looking at the preprocessed .i files generated with
> > something like:
> > 
> > make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- drivers/spi/spi-rockchip.i
> > 
> > I see no difference (with GCC 7.3.1) between the original approach and your
> > use of __stringify. Incidentally you end up with "K" "r" instead of "Kr" but
> > it seems to have the desired effect (e.g. supress/emit out of range errors).
> > I have a couple of macros that resolves this to "Kr" but I don't think it's
> > necessary.
> > 
> > Did you find that it didn't work without your changes? I found it hard to
> > reproduce the out-of-range errors until I made the following change, I could
> > then easily see the effect of changing the constraint:
> > 
> >         : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
> > -       : #constraint "r" (i));                                         \
> > +       : #constraint) "r" (4294967295));                                               \
> >  }
> 
> Without the __stringify I get a compilation failure when building
> kernel/panic.o because it tries to cmpxchg a 32-bit variable with -1
> (PANIC_CPU_INVALID). Looking at panic.s, I see that constraint parameter
> isn't being expanded. For example if I do:
> 
>   #ifndef CONFIG_CC_HAS_K_CONSTRAINT
>   #define INVALID_CONSTRAINT
>   #else
>   #define INVALID_CONSTRAINT	K
>   #endif
> 
> and then pass INVALID_CONSTRAINT to the generator macros, we'll end up
> with INVALID_CONSTRAINT in the .s file and gas will barf.

This still isn't an issue for me. Your patches cause the build to fail because
it's using the K flag - if I invert the CONFIG_CC_HAS_K_CONSTRAINT test then
it builds correctly (because it expands the K to nothing).

If there is an issue with the expansion of constraint, shouldn't we also
__stringify 'asm_op'?

Thanks,

Andrew Murray

> 
> The reason I didn't see this initially is because my silly testcase had
> a typo and was using atomic_add instead of atomic_and.
> 
> Will
Will Deacon Aug. 30, 2019, 10:17 a.m. UTC | #8
On Fri, Aug 30, 2019 at 10:11:55AM +0100, Andrew Murray wrote:
> On Fri, Aug 30, 2019 at 08:52:20AM +0100, Will Deacon wrote:
> > On Fri, Aug 30, 2019 at 01:08:03AM +0100, Andrew Murray wrote:
> > > On Thu, Aug 29, 2019 at 05:54:58PM +0100, Will Deacon wrote:
> > > > On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > > > > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > index 95091f72228b..7fa042f5444e 100644
> > > > > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > @@ -23,6 +23,10 @@ asm_ops "\n"								\
> > > > >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> > > > >  #endif
> 
> I downloaded your original patches and tried them, and also got the
> build error. After playing with this I think something isn't quite right...
> 
> This is your current test:
> 
>  echo 'int main(void) {asm volatile("and w0, w0, %w0" :: "K" (4294967295)); return 0; }' |  aarch64-linux-gnu-gcc -S -x c  - ; echo $?
> 
> But on my machine this returns 0, i.e. no error. If I drop the -S:
> 
>  echo 'int main(void) {asm volatile("and w0, w0, %w0" :: "K" (4294967295)); return 0; }' |  aarch64-linux-gnu-gcc -x c  - ; echo $?
> 
> Then this returns 1.
> 
> So I guess the -S flag or something similar is needed.

This seems correct to me, and is the reason we pass -S in the Makefile. Why
are you dropping it?

In the first case, the (broken) compiler is emitted an assembly file
containing "and w0, w0, 4294967295", and so we will not define
CONFIG_CC_HAS_K_CONSTRAINT.

In the second case, you're passing the bad assembly file to GAS, which
rejects it.

> > > > > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > > > > +#define K
> > > > > +#endif
> 
> Also, isn't this the wrong way around?

No. If the compiler doesn't support the K constraint, then we get:

	[old] "" "r" (old)

because we've defined K as being nothing. Otherwise, we get:

	[old] "K" "r" (old)

because K isn't defined as anything.

> It looks like when using $(call try-run,echo - it's the last argument that is
> used when the condition is false. Thus at present we seem to be setting 
> CONFIG_CC_HAS_K_CONSTRAINT when 'K' is broken.

No. We set CONFIG_CC_HAS_K_CONSTRAINT when the compiler fails to generate
an assembly file with the invalid immediate.

> > Without the __stringify I get a compilation failure when building
> > kernel/panic.o because it tries to cmpxchg a 32-bit variable with -1
> > (PANIC_CPU_INVALID). Looking at panic.s, I see that constraint parameter
> > isn't being expanded. For example if I do:
> > 
> >   #ifndef CONFIG_CC_HAS_K_CONSTRAINT
> >   #define INVALID_CONSTRAINT
> >   #else
> >   #define INVALID_CONSTRAINT	K
> >   #endif
> > 
> > and then pass INVALID_CONSTRAINT to the generator macros, we'll end up
> > with INVALID_CONSTRAINT in the .s file and gas will barf.
> 
> This still isn't an issue for me. Your patches cause the build to fail because
> it's using the K flag - if I invert the CONFIG_CC_HAS_K_CONSTRAINT test then
> it builds correctly (because it expands the K to nothing).

That doesn't make any sense :/ Is this after you've dropped the -S
parameter?

If you think there's a bug, please can you send a patch? However, inverting
the check breaks the build for me. Which toolchain are you using?

> If there is an issue with the expansion of constraint, shouldn't we also
> __stringify 'asm_op'?

It would be harmless, but there's no need because asm_op doesn't ever
require further expansion.

Will
Mark Rutland Aug. 30, 2019, 10:40 a.m. UTC | #9
On Fri, Aug 30, 2019 at 10:11:55AM +0100, Andrew Murray wrote:
> On Fri, Aug 30, 2019 at 08:52:20AM +0100, Will Deacon wrote:
> > On Fri, Aug 30, 2019 at 01:08:03AM +0100, Andrew Murray wrote:
> > > On Thu, Aug 29, 2019 at 05:54:58PM +0100, Will Deacon wrote:
> > > > On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > > > > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > index 95091f72228b..7fa042f5444e 100644
> > > > > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > @@ -23,6 +23,10 @@ asm_ops "\n"								\
> > > > >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> > > > >  #endif
> 
> I downloaded your original patches and tried them, and also got the
> build error. After playing with this I think something isn't quite right...

Can you post the error you see?

> This is your current test:
> 
>  echo 'int main(void) {asm volatile("and w0, w0, %w0" :: "K" (4294967295)); return 0; }' |  aarch64-linux-gnu-gcc -S -x c  - ; echo $?
> 
> But on my machine this returns 0, i.e. no error. 

IIUC that's expected, as this is testing if the compiler erroneously
accepts the invalid immediate.

Note that try-run takes (option,option-ok,otherwise), so:

| cc_has_k_constraint := $(call try-run,echo                             \
|        'int main(void) {                                               \
|                asm volatile("and w0, w0, %w0" :: "K" (4294967295));    \
|                return 0;                                               \
|        }' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1)

... means we do nothing when the compile is successful (i.e. when the compiler
is broken), and we set -DCONFIG_CC_HAS_K_CONSTRAINT=1 when the compiler
correctly rejects the invalid immediate.

If we drop the -S, we'll get an error in all cases, as either:

* GCC silently accepts the immediate, GAS aborts
* GCC aborts as it can't satisfy the constraint

> > > > > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > > > > +#define K
> > > > > +#endif

Here we define K to nothing if the compiler accepts the broken immediate.

If the compiler rejects invalid immediates we don't define K to anything, so
it's treated as a literal later on, and gets added as a constaint.

Thanks,
Mark.
Andrew Murray Aug. 30, 2019, 11:53 a.m. UTC | #10
On Fri, Aug 30, 2019 at 11:40:53AM +0100, Mark Rutland wrote:
> On Fri, Aug 30, 2019 at 10:11:55AM +0100, Andrew Murray wrote:
> > On Fri, Aug 30, 2019 at 08:52:20AM +0100, Will Deacon wrote:
> > > On Fri, Aug 30, 2019 at 01:08:03AM +0100, Andrew Murray wrote:
> > > > On Thu, Aug 29, 2019 at 05:54:58PM +0100, Will Deacon wrote:
> > > > > On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > > > > > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > > index 95091f72228b..7fa042f5444e 100644
> > > > > > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > > @@ -23,6 +23,10 @@ asm_ops "\n"								\
> > > > > >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> > > > > >  #endif
> > 
> > I downloaded your original patches and tried them, and also got the
> > build error. After playing with this I think something isn't quite right...
> 
> Can you post the error you see?

Doh, it looks like I didn't apply the __stringify patches - this is why it
didn't work for me.

> 
> > This is your current test:
> > 
> >  echo 'int main(void) {asm volatile("and w0, w0, %w0" :: "K" (4294967295)); return 0; }' |  aarch64-linux-gnu-gcc -S -x c  - ; echo $?
> > 
> > But on my machine this returns 0, i.e. no error. 
> 
> IIUC that's expected, as this is testing if the compiler erroneously
> accepts the invalid immediate.
> 
> Note that try-run takes (option,option-ok,otherwise), so:
> 
> | cc_has_k_constraint := $(call try-run,echo                             \
> |        'int main(void) {                                               \
> |                asm volatile("and w0, w0, %w0" :: "K" (4294967295));    \
> |                return 0;                                               \
> |        }' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1)
> 
> ... means we do nothing when the compile is successful (i.e. when the compiler
> is broken), and we set -DCONFIG_CC_HAS_K_CONSTRAINT=1 when the compiler
> correctly rejects the invalid immediate.

Yes I see this now. I hadn't realised that the -S allows us to see what the
compiler does prior to assembling. Indeed this test verifies that the compiler
accepts an invalid value - and if so we don't permit use of the 'K' flag.

(I guess I was wrongly expecting the command to fail when we pass an invalid
value and thus expected the option-ok to be where we set the define.)

Thanks for the explanation!

Andrew Murray

> 
> If we drop the -S, we'll get an error in all cases, as either:
> 
> * GCC silently accepts the immediate, GAS aborts
> * GCC aborts as it can't satisfy the constraint
> 
> > > > > > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > > > > > +#define K
> > > > > > +#endif
> 
> Here we define K to nothing if the compiler accepts the broken immediate.
> 
> If the compiler rejects invalid immediates we don't define K to anything, so
> it's treated as a literal later on, and gets added as a constaint.
> 
> Thanks,
> Mark.
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Andrew Murray Aug. 30, 2019, 11:57 a.m. UTC | #11
On Fri, Aug 30, 2019 at 11:17:16AM +0100, Will Deacon wrote:
> On Fri, Aug 30, 2019 at 10:11:55AM +0100, Andrew Murray wrote:
> > On Fri, Aug 30, 2019 at 08:52:20AM +0100, Will Deacon wrote:
> > > On Fri, Aug 30, 2019 at 01:08:03AM +0100, Andrew Murray wrote:
> > > > On Thu, Aug 29, 2019 at 05:54:58PM +0100, Will Deacon wrote:
> > > > > On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > > > > > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > > index 95091f72228b..7fa042f5444e 100644
> > > > > > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > > > @@ -23,6 +23,10 @@ asm_ops "\n"								\
> > > > > >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> > > > > >  #endif
> > 
> > I downloaded your original patches and tried them, and also got the
> > build error. After playing with this I think something isn't quite right...
> > 
> > This is your current test:
> > 
> >  echo 'int main(void) {asm volatile("and w0, w0, %w0" :: "K" (4294967295)); return 0; }' |  aarch64-linux-gnu-gcc -S -x c  - ; echo $?
> > 
> > But on my machine this returns 0, i.e. no error. If I drop the -S:
> > 
> >  echo 'int main(void) {asm volatile("and w0, w0, %w0" :: "K" (4294967295)); return 0; }' |  aarch64-linux-gnu-gcc -x c  - ; echo $?
> > 
> > Then this returns 1.
> > 
> > So I guess the -S flag or something similar is needed.
> 
> This seems correct to me, and is the reason we pass -S in the Makefile. Why
> are you dropping it?
> 
> In the first case, the (broken) compiler is emitted an assembly file
> containing "and w0, w0, 4294967295", and so we will not define
> CONFIG_CC_HAS_K_CONSTRAINT.
> 
> In the second case, you're passing the bad assembly file to GAS, which
> rejects it.
> 
> > > > > > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > > > > > +#define K
> > > > > > +#endif
> > 
> > Also, isn't this the wrong way around?
> 
> No. If the compiler doesn't support the K constraint, then we get:
> 
> 	[old] "" "r" (old)
> 
> because we've defined K as being nothing. Otherwise, we get:
> 
> 	[old] "K" "r" (old)
> 
> because K isn't defined as anything.
> 
> > It looks like when using $(call try-run,echo - it's the last argument that is
> > used when the condition is false. Thus at present we seem to be setting 
> > CONFIG_CC_HAS_K_CONSTRAINT when 'K' is broken.
> 
> No. We set CONFIG_CC_HAS_K_CONSTRAINT when the compiler fails to generate
> an assembly file with the invalid immediate.
> 
> > > Without the __stringify I get a compilation failure when building
> > > kernel/panic.o because it tries to cmpxchg a 32-bit variable with -1
> > > (PANIC_CPU_INVALID). Looking at panic.s, I see that constraint parameter
> > > isn't being expanded. For example if I do:
> > > 
> > >   #ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > >   #define INVALID_CONSTRAINT
> > >   #else
> > >   #define INVALID_CONSTRAINT	K
> > >   #endif
> > > 
> > > and then pass INVALID_CONSTRAINT to the generator macros, we'll end up
> > > with INVALID_CONSTRAINT in the .s file and gas will barf.
> > 
> > This still isn't an issue for me. Your patches cause the build to fail because
> > it's using the K flag - if I invert the CONFIG_CC_HAS_K_CONSTRAINT test then
> > it builds correctly (because it expands the K to nothing).
> 
> That doesn't make any sense :/ Is this after you've dropped the -S
> parameter?

As discussed on IRC, all my issues were due to not applying the extra
__stringify patch of yours and getting confused about intermediates. Thanks
for your time and patience!

I'm satisfied this works (with your extra patch), so again:

Reviewed-by: Andrew Murray <andrew.murray@arm.com>


> 
> If you think there's a bug, please can you send a patch? However, inverting
> the check breaks the build for me. Which toolchain are you using?
> 
> > If there is an issue with the expansion of constraint, shouldn't we also
> > __stringify 'asm_op'?
> 
> It would be harmless, but there's no need because asm_op doesn't ever
> require further expansion.
> 
> Will
Nick Desaulniers Aug. 30, 2019, 8:57 p.m. UTC | #12
On Thu, Aug 29, 2019 at 2:53 PM Will Deacon <will@kernel.org> wrote:
>
> On Thu, Aug 29, 2019 at 10:45:57AM -0700, Nick Desaulniers wrote:
> > On Thu, Aug 29, 2019 at 9:55 AM Will Deacon <will@kernel.org> wrote:
> > >
> > > On Thu, Aug 29, 2019 at 04:48:34PM +0100, Will Deacon wrote:
> > > > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > index 95091f72228b..7fa042f5444e 100644
> > > > --- a/arch/arm64/include/asm/atomic_ll_sc.h
> > > > +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> > > > @@ -23,6 +23,10 @@ asm_ops "\n"                                                               \
> > > >  #define __LL_SC_FALLBACK(asm_ops) asm_ops
> > > >  #endif
> > > >
> > > > +#ifndef CONFIG_CC_HAS_K_CONSTRAINT
> > > > +#define K
> > > > +#endif
> > >
> > > Bah, I need to use something like __stringify when the constraint is used
> > > in order for this to get expanded properly. Updated diff below.
> > >
> > > Will
> >
> > Hi Will, thanks for cc'ing me on the patch set.  I'd be happy to help
> > test w/ Clang.  Would you mind pushing this set with the below diff to
> > a publicly available tree+branch I can pull from?  (I haven't yet
> > figured out how to download multiple diff's from gmail rather than 1
> > by 1, and TBH I'd rather just use git).
>
> Sorry, of course. I should've mentioned this in the cover letter:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/log/?h=for-next/atomics
>
> FWIW, I did test (defconfig + boot) with clang, but this does mean that LSE
> atomics are disabled for that configuration when asm goto is not supported.
>
> Will

Thanks, just curious if you (or anyone else on the list) has the QEMU
recipe handy to test on a virtual machine that has ll/sc instructions,
and one that does not?  I'm guessing testing the default machine would
not exercise the code path where these instructions have been added?

Patch
diff mbox series

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 61de992bbea3..0cef056b5fb1 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -39,6 +39,12 @@  $(warning LSE atomics not supported by binutils)
   endif
 endif
 
+cc_has_k_constraint := $(call try-run,echo				\
+	'int main(void) {						\
+		asm volatile("and w0, w0, %w0" :: "K" (4294967295));	\
+		return 0;						\
+	}' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1)
+
 ifeq ($(CONFIG_ARM64), y)
 brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)
 
@@ -63,7 +69,8 @@  ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
   endif
 endif
 
-KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
+KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)	\
+		   $(compat_vdso) $(cc_has_k_constraint)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS	+= $(call cc-disable-warning, psabi)
 KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst) $(compat_vdso)
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 95091f72228b..7fa042f5444e 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -23,6 +23,10 @@  asm_ops "\n"								\
 #define __LL_SC_FALLBACK(asm_ops) asm_ops
 #endif
 
+#ifndef CONFIG_CC_HAS_K_CONSTRAINT
+#define K
+#endif
+
 /*
  * AArch64 UP and SMP safe atomic ops.  We use load exclusive and
  * store exclusive to ensure that these are atomic.  We may loop
@@ -113,10 +117,15 @@  ATOMIC_OPS(sub, sub, J)
 	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
 	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS(and, and, )
+ATOMIC_OPS(and, and, K)
+ATOMIC_OPS(or, orr, K)
+ATOMIC_OPS(xor, eor, K)
+/*
+ * GAS converts the mysterious and undocumented BIC (immediate) alias to
+ * an AND (immediate) instruction with the immediate inverted. We don't
+ * have a constraint for this, so fall back to register.
+ */
 ATOMIC_OPS(andnot, bic, )
-ATOMIC_OPS(or, orr, )
-ATOMIC_OPS(xor, eor, )
 
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
@@ -208,9 +217,14 @@  ATOMIC64_OPS(sub, sub, J)
 	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
 ATOMIC64_OPS(and, and, L)
-ATOMIC64_OPS(andnot, bic, )
 ATOMIC64_OPS(or, orr, L)
 ATOMIC64_OPS(xor, eor, L)
+/*
+ * GAS converts the mysterious and undocumented BIC (immediate) alias to
+ * an AND (immediate) instruction with the immediate inverted. We don't
+ * have a constraint for this, so fall back to register.
+ */
+ATOMIC64_OPS(andnot, bic, )
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
@@ -280,21 +294,21 @@  __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
  * handle the 'K' constraint for the value 4294967295 - thus we use no
  * constraint for 32 bit operations.
  */
-__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , )
-__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , )
-__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , )
+__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , K)
+__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , K)
+__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , K)
 __CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         , L)
-__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", )
-__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", )
-__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", )
+__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", K)
+__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", K)
+__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", K)
 __CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory", L)
-__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", )
-__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", )
-__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", )
+__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", K)
+__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", K)
+__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", K)
 __CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory", L)
-__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", )
-__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", )
-__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", )
+__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", K)
+__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", K)
+__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", K)
 __CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory", L)
 
 #undef __CMPXCHG_CASE
@@ -332,5 +346,6 @@  __CMPXCHG_DBL(   ,        ,  ,         )
 __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
 
 #undef __CMPXCHG_DBL
+#undef K
 
 #endif	/* __ASM_ATOMIC_LL_SC_H */