@@ -26,7 +26,7 @@
* (the optimize attribute silently ignores these options).
*/
-#define ATOMIC_OP(op, asm_op) \
+#define ATOMIC_OP(op, asm_op, constraint) \
__LL_SC_INLINE void \
__LL_SC_PREFIX(arch_atomic_##op(int i, atomic_t *v)) \
{ \
@@ -40,11 +40,11 @@ __LL_SC_PREFIX(arch_atomic_##op(int i, atomic_t *v)) \
" stxr %w1, %w0, %2\n" \
" cbnz %w1, 1b" \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
- : "Ir" (i)); \
+ : #constraint "r" (i)); \
} \
__LL_SC_EXPORT(arch_atomic_##op);
-#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
+#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
__LL_SC_INLINE int \
__LL_SC_PREFIX(arch_atomic_##op##_return##name(int i, atomic_t *v)) \
{ \
@@ -59,14 +59,14 @@ __LL_SC_PREFIX(arch_atomic_##op##_return##name(int i, atomic_t *v)) \
" cbnz %w1, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
- : "Ir" (i) \
+ : #constraint "r" (i) \
: cl); \
\
return result; \
} \
__LL_SC_EXPORT(arch_atomic_##op##_return##name);
-#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
__LL_SC_INLINE int \
__LL_SC_PREFIX(arch_atomic_fetch_##op##name(int i, atomic_t *v)) \
{ \
@@ -81,7 +81,7 @@ __LL_SC_PREFIX(arch_atomic_fetch_##op##name(int i, atomic_t *v)) \
" cbnz %w2, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
- : "Ir" (i) \
+ : #constraint "r" (i) \
: cl); \
\
return result; \
@@ -99,8 +99,8 @@ __LL_SC_EXPORT(arch_atomic_fetch_##op##name);
ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, sub)
+ATOMIC_OPS(add, add, I)
+ATOMIC_OPS(sub, sub, J)
#undef ATOMIC_OPS
#define ATOMIC_OPS(...) \
@@ -110,17 +110,17 @@ ATOMIC_OPS(sub, sub)
ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
-ATOMIC_OPS(and, and)
-ATOMIC_OPS(andnot, bic)
-ATOMIC_OPS(or, orr)
-ATOMIC_OPS(xor, eor)
+ATOMIC_OPS(and, and, K)
+ATOMIC_OPS(andnot, bic, )
+ATOMIC_OPS(or, orr, K)
+ATOMIC_OPS(xor, eor, K)
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-#define ATOMIC64_OP(op, asm_op) \
+#define ATOMIC64_OP(op, asm_op, constraint) \
__LL_SC_INLINE void \
__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v)) \
{ \
@@ -134,11 +134,11 @@ __LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v)) \
" stxr %w1, %0, %2\n" \
" cbnz %w1, 1b" \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
- : "Ir" (i)); \
+ : #constraint "r" (i)); \
} \
__LL_SC_EXPORT(arch_atomic64_##op);
-#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
+#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
__LL_SC_INLINE s64 \
__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
{ \
@@ -153,14 +153,14 @@ __LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
" cbnz %w1, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
- : "Ir" (i) \
+ : #constraint "r" (i) \
: cl); \
\
return result; \
} \
__LL_SC_EXPORT(arch_atomic64_##op##_return##name);
-#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \
+#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
__LL_SC_INLINE s64 \
__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v)) \
{ \
@@ -175,7 +175,7 @@ __LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v)) \
" cbnz %w2, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
- : "Ir" (i) \
+ : #constraint "r" (i) \
: cl); \
\
return result; \
@@ -193,8 +193,8 @@ __LL_SC_EXPORT(arch_atomic64_fetch_##op##name);
ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
-ATOMIC64_OPS(add, add)
-ATOMIC64_OPS(sub, sub)
+ATOMIC64_OPS(add, add, I)
+ATOMIC64_OPS(sub, sub, J)
#undef ATOMIC64_OPS
#define ATOMIC64_OPS(...) \
@@ -204,10 +204,10 @@ ATOMIC64_OPS(sub, sub)
ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
-ATOMIC64_OPS(and, and)
-ATOMIC64_OPS(andnot, bic)
-ATOMIC64_OPS(or, orr)
-ATOMIC64_OPS(xor, eor)
+ATOMIC64_OPS(and, and, K)
+ATOMIC64_OPS(andnot, bic, )
+ATOMIC64_OPS(or, orr, K)
+ATOMIC64_OPS(xor, eor, K)
#undef ATOMIC64_OPS
#undef ATOMIC64_FETCH_OP
@@ -237,7 +237,7 @@ __LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
}
__LL_SC_EXPORT(arch_atomic64_dec_if_positive);
-#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl) \
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \
__LL_SC_INLINE u##sz \
__LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr, \
unsigned long old, \
@@ -265,29 +265,34 @@ __LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr, \
"2:" \
: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
[v] "+Q" (*(u##sz *)ptr) \
- : [old] "Kr" (old), [new] "r" (new) \
+ : [old] #constraint "r" (old), [new] "r" (new) \
: cl); \
\
return oldval; \
} \
__LL_SC_EXPORT(__cmpxchg_case_##name##sz);
-__CMPXCHG_CASE(w, b, , 8, , , , )
-__CMPXCHG_CASE(w, h, , 16, , , , )
-__CMPXCHG_CASE(w, , , 32, , , , )
-__CMPXCHG_CASE( , , , 64, , , , )
-__CMPXCHG_CASE(w, b, acq_, 8, , a, , "memory")
-__CMPXCHG_CASE(w, h, acq_, 16, , a, , "memory")
-__CMPXCHG_CASE(w, , acq_, 32, , a, , "memory")
-__CMPXCHG_CASE( , , acq_, 64, , a, , "memory")
-__CMPXCHG_CASE(w, b, rel_, 8, , , l, "memory")
-__CMPXCHG_CASE(w, h, rel_, 16, , , l, "memory")
-__CMPXCHG_CASE(w, , rel_, 32, , , l, "memory")
-__CMPXCHG_CASE( , , rel_, 64, , , l, "memory")
-__CMPXCHG_CASE(w, b, mb_, 8, dmb ish, , l, "memory")
-__CMPXCHG_CASE(w, h, mb_, 16, dmb ish, , l, "memory")
-__CMPXCHG_CASE(w, , mb_, 32, dmb ish, , l, "memory")
-__CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory")
+/*
+ * Earlier versions of GCC (no later than 8.1.0) appear to incorrectly
+ * handle the 'K' constraint for the value 4294967295 - thus we use no
+ * constraint for 32 bit operations.
+ */
+__CMPXCHG_CASE(w, b, , 8, , , , , )
+__CMPXCHG_CASE(w, h, , 16, , , , , )
+__CMPXCHG_CASE(w, , , 32, , , , , )
+__CMPXCHG_CASE( , , , 64, , , , , L)
+__CMPXCHG_CASE(w, b, acq_, 8, , a, , "memory", )
+__CMPXCHG_CASE(w, h, acq_, 16, , a, , "memory", )
+__CMPXCHG_CASE(w, , acq_, 32, , a, , "memory", )
+__CMPXCHG_CASE( , , acq_, 64, , a, , "memory", L)
+__CMPXCHG_CASE(w, b, rel_, 8, , , l, "memory", )
+__CMPXCHG_CASE(w, h, rel_, 16, , , l, "memory", )
+__CMPXCHG_CASE(w, , rel_, 32, , , l, "memory", )
+__CMPXCHG_CASE( , , rel_, 64, , , l, "memory", L)
+__CMPXCHG_CASE(w, b, mb_, 8, dmb ish, , l, "memory", )
+__CMPXCHG_CASE(w, h, mb_, 16, dmb ish, , l, "memory", )
+__CMPXCHG_CASE(w, , mb_, 32, dmb ish, , l, "memory", )
+__CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L)
#undef __CMPXCHG_CASE
For many of the ll/sc atomic operations we use the 'I' machine constraint regardless to the instruction used - this may not be optimal. Let's add an additional parameter to the ATOMIC_xx macros that allows the caller to specify an appropriate machine constraint. Let's also improve __CMPXCHG_CASE by replacing the 'K' constraint with a caller provided constraint. Please note that whilst we would like to use the 'K' constraint on 32 bit operations, we choose not to provide any constraint to avoid a GCC bug which results in a build error. Earlier versions of GCC (no later than 8.1.0) appear to incorrectly handle the 'K' constraint for the value 4294967295. Signed-off-by: Andrew Murray <andrew.murray@arm.com> --- arch/arm64/include/asm/atomic_ll_sc.h | 89 ++++++++++++++------------- 1 file changed, 47 insertions(+), 42 deletions(-)