diff mbox

[4/5] atomic: Introduce atomic_try_cmpxchg()

Message ID 20170203132737.566324209@infradead.org (mailing list archive)
State New, archived
Headers show

Commit Message

Peter Zijlstra Feb. 3, 2017, 1:26 p.m. UTC
Add a new cmpxchg interface:

  bool try_cmpxchg(u{8,16,32,64} *ptr, u{8,16,32,64} *val, u{8,16,32,64} new);

Where the boolean returns the result of the compare; and thus if the
exchange happened; and in case of failure, the new value of *ptr is
returned in *val.

This allows simplification/improvement of loops like:

	for (;;) {
		new = val $op $imm;
		old = cmpxchg(ptr, val, new);
		if (old == val)
			break;
		val = old;
	}

into:

	for (;;) {
		new = val $op $imm;
		if (try_cmpxchg(ptr, &val, new))
			break;
	}

while also generating better code (GCC6 and onwards).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---

Comments

Boqun Feng Feb. 6, 2017, 4:24 a.m. UTC | #1
On Fri, Feb 03, 2017 at 02:26:02PM +0100, Peter Zijlstra wrote:
> Add a new cmpxchg interface:
> 
>   bool try_cmpxchg(u{8,16,32,64} *ptr, u{8,16,32,64} *val, u{8,16,32,64} new);
> 
> Where the boolean returns the result of the compare; and thus if the
> exchange happened; and in case of failure, the new value of *ptr is
> returned in *val.
> 
> This allows simplification/improvement of loops like:
> 
> 	for (;;) {
> 		new = val $op $imm;
> 		old = cmpxchg(ptr, val, new);
> 		if (old == val)
> 			break;
> 		val = old;
> 	}
> 
> into:
> 
> 	for (;;) {
> 		new = val $op $imm;
> 		if (try_cmpxchg(ptr, &val, new))
> 			break;
> 	}
> 
> while also generating better code (GCC6 and onwards).
> 

But switching to try_cmpxchg() will make @val a memory location, which
could not be put in a register. And this will generate unnecessary
memory accesses on archs having enough registers(PPC, e.g.).

> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> --- a/arch/x86/include/asm/atomic.h
> +++ b/arch/x86/include/asm/atomic.h
> @@ -186,6 +186,12 @@ static __always_inline int atomic_cmpxch
>  	return cmpxchg(&v->counter, old, new);
>  }
>  
> +#define atomic_try_cmpxchg atomic_try_cmpxchg
> +static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
> +{
> +	return try_cmpxchg(&v->counter, old, new);
> +}
> +
>  static inline int atomic_xchg(atomic_t *v, int new)
>  {
>  	return xchg(&v->counter, new);
> --- a/arch/x86/include/asm/cmpxchg.h
> +++ b/arch/x86/include/asm/cmpxchg.h
> @@ -153,6 +153,75 @@ extern void __add_wrong_size(void)
>  #define cmpxchg_local(ptr, old, new)					\
>  	__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
>  
> +
> +#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)		\
> +({									\
> +	bool success;							\
> +	__typeof__(_ptr) _old = (_pold);				\
> +	__typeof__(*(_ptr)) __old = *_old;				\
> +	__typeof__(*(_ptr)) __new = (_new);				\
> +	switch (size) {							\
> +	case __X86_CASE_B:						\
> +	{								\
> +		volatile u8 *__ptr = (volatile u8 *)(_ptr);		\
> +		asm volatile(lock "cmpxchgb %[new], %[ptr]"		\
> +			     CC_SET(z)					\
> +			     : CC_OUT(z) (success),			\
> +			       [ptr] "+m" (*__ptr),			\
> +			       [old] "+a" (__old)			\
> +			     : [new] "q" (__new)			\
> +			     : "memory");				\
> +		break;							\
> +	}								\
> +	case __X86_CASE_W:						\
> +	{								\
> +		volatile u16 *__ptr = (volatile u16 *)(_ptr);		\
> +		asm volatile(lock "cmpxchgw %[new], %[ptr]"		\
> +			     CC_SET(z)					\
> +			     : CC_OUT(z) (success),			\
> +			       [ptr] "+m" (*__ptr),			\
> +			       [old] "+a" (__old)			\
> +			     : [new] "r" (__new)			\
> +			     : "memory");				\
> +		break;							\
> +	}								\
> +	case __X86_CASE_L:						\
> +	{								\
> +		volatile u32 *__ptr = (volatile u32 *)(_ptr);		\
> +		asm volatile(lock "cmpxchgl %[new], %[ptr]"		\
> +			     CC_SET(z)					\
> +			     : CC_OUT(z) (success),			\
> +			       [ptr] "+m" (*__ptr),			\
> +			       [old] "+a" (__old)			\
> +			     : [new] "r" (__new)			\
> +			     : "memory");				\
> +		break;							\
> +	}								\
> +	case __X86_CASE_Q:						\
> +	{								\
> +		volatile u64 *__ptr = (volatile u64 *)(_ptr);		\
> +		asm volatile(lock "cmpxchgq %[new], %[ptr]"		\
> +			     CC_SET(z)					\
> +			     : CC_OUT(z) (success),			\
> +			       [ptr] "+m" (*__ptr),			\
> +			       [old] "+a" (__old)			\
> +			     : [new] "r" (__new)			\
> +			     : "memory");				\
> +		break;							\
> +	}								\
> +	default:							\
> +		__cmpxchg_wrong_size();					\
> +	}								\
> +	*_old = __old;							\
> +	success;							\
> +})
> +
> +#define __try_cmpxchg(ptr, pold, new, size)				\
> +	__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
> +
> +#define try_cmpxchg(ptr, pold, new)					\
> +	__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
> +
>  /*
>   * xadd() adds "inc" to "*ptr" and atomically returns the previous
>   * value of "*ptr".
> --- a/include/linux/atomic.h
> +++ b/include/linux/atomic.h
> @@ -423,6 +423,28 @@
>  #endif
>  #endif /* atomic_cmpxchg_relaxed */
>  
> +#ifndef atomic_try_cmpxchg
> +
> +#define __atomic_try_cmpxchg(type, _p, _po, _n)				\
> +({									\
> +	typeof(_po) __po = (_po);					\
> +	typeof(*(_po)) __o = *__po;					\
> +	bool success = (atomic_cmpxchg##type((_p), __o, (_n)) == __o);	\
> +	*__po = __o;							\

Besides, is this part correct? atomic_cmpxchg_*() wouldn't change the
value of __o, so *__po wouldn't be changed.. IOW, in case of failure,
*ptr wouldn't be updated to a new value.

Maybe this should be:

	bool success;
	*__po = atomic_cmpxchg##type((_p), __o, (_n));
	sucess = (*__po == _o);

, right?

Regards,
Boqun

> +	success;							\
> +})
> +
> +#define atomic_try_cmpxchg(_p, _po, _n)		__atomic_try_cmpxchg(, _p, _po, _n)
> +#define atomic_try_cmpxchg_relaxed(_p, _po, _n)	__atomic_try_cmpxchg(_relaxed, _p, _po, _n)
> +#define atomic_try_cmpxchg_acquire(_p, _po, _n)	__atomic_try_cmpxchg(_acquire, _p, _po, _n)
> +#define atomic_try_cmpxchg_release(_p, _po, _n)	__atomic_try_cmpxchg(_release, _p, _po, _n)
> +
> +#else /* atomic_try_cmpxchg */
> +#define atomic_try_cmpxchg_relaxed	atomic_try_cmpxchg
> +#define atomic_try_cmpxchg_acquire	atomic_try_cmpxchg
> +#define atomic_try_cmpxchg_release	atomic_try_cmpxchg
> +#endif /* atomic_try_cmpxchg */
> +
>  /* cmpxchg_relaxed */
>  #ifndef cmpxchg_relaxed
>  #define  cmpxchg_relaxed		cmpxchg
> 
>
Boqun Feng Feb. 6, 2017, 6:32 a.m. UTC | #2
On Mon, Feb 06, 2017 at 12:24:28PM +0800, Boqun Feng wrote:
> On Fri, Feb 03, 2017 at 02:26:02PM +0100, Peter Zijlstra wrote:
> > Add a new cmpxchg interface:
> > 
> >   bool try_cmpxchg(u{8,16,32,64} *ptr, u{8,16,32,64} *val, u{8,16,32,64} new);
> > 
> > Where the boolean returns the result of the compare; and thus if the
> > exchange happened; and in case of failure, the new value of *ptr is
> > returned in *val.
> > 
> > This allows simplification/improvement of loops like:
> > 
> > 	for (;;) {
> > 		new = val $op $imm;
> > 		old = cmpxchg(ptr, val, new);
> > 		if (old == val)
> > 			break;
> > 		val = old;
> > 	}
> > 
> > into:
> > 
> > 	for (;;) {
> > 		new = val $op $imm;
> > 		if (try_cmpxchg(ptr, &val, new))
> > 			break;
> > 	}
> > 
> > while also generating better code (GCC6 and onwards).
> > 
> 
> But switching to try_cmpxchg() will make @val a memory location, which
> could not be put in a register. And this will generate unnecessary
> memory accesses on archs having enough registers(PPC, e.g.).
> 

Hmm.. it turns out that compilers can figure out that @val could be fit
in a register(maybe in the escape analysis step), so don't treat it as a
memory location. This is at least true for GCC 5.4.0 on PPC. So I think
we can rely on this?

> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> > --- a/arch/x86/include/asm/atomic.h
> > +++ b/arch/x86/include/asm/atomic.h
> > @@ -186,6 +186,12 @@ static __always_inline int atomic_cmpxch
> >  	return cmpxchg(&v->counter, old, new);
> >  }
> >  
> > +#define atomic_try_cmpxchg atomic_try_cmpxchg
> > +static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
> > +{
> > +	return try_cmpxchg(&v->counter, old, new);
> > +}
> > +
> >  static inline int atomic_xchg(atomic_t *v, int new)
> >  {
> >  	return xchg(&v->counter, new);
> > --- a/arch/x86/include/asm/cmpxchg.h
> > +++ b/arch/x86/include/asm/cmpxchg.h
> > @@ -153,6 +153,75 @@ extern void __add_wrong_size(void)
> >  #define cmpxchg_local(ptr, old, new)					\
> >  	__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
> >  
> > +
> > +#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)		\
> > +({									\
> > +	bool success;							\
> > +	__typeof__(_ptr) _old = (_pold);				\
> > +	__typeof__(*(_ptr)) __old = *_old;				\
> > +	__typeof__(*(_ptr)) __new = (_new);				\
> > +	switch (size) {							\
> > +	case __X86_CASE_B:						\
> > +	{								\
> > +		volatile u8 *__ptr = (volatile u8 *)(_ptr);		\
> > +		asm volatile(lock "cmpxchgb %[new], %[ptr]"		\
> > +			     CC_SET(z)					\
> > +			     : CC_OUT(z) (success),			\
> > +			       [ptr] "+m" (*__ptr),			\
> > +			       [old] "+a" (__old)			\
> > +			     : [new] "q" (__new)			\
> > +			     : "memory");				\
> > +		break;							\
> > +	}								\
> > +	case __X86_CASE_W:						\
> > +	{								\
> > +		volatile u16 *__ptr = (volatile u16 *)(_ptr);		\
> > +		asm volatile(lock "cmpxchgw %[new], %[ptr]"		\
> > +			     CC_SET(z)					\
> > +			     : CC_OUT(z) (success),			\
> > +			       [ptr] "+m" (*__ptr),			\
> > +			       [old] "+a" (__old)			\
> > +			     : [new] "r" (__new)			\
> > +			     : "memory");				\
> > +		break;							\
> > +	}								\
> > +	case __X86_CASE_L:						\
> > +	{								\
> > +		volatile u32 *__ptr = (volatile u32 *)(_ptr);		\
> > +		asm volatile(lock "cmpxchgl %[new], %[ptr]"		\
> > +			     CC_SET(z)					\
> > +			     : CC_OUT(z) (success),			\
> > +			       [ptr] "+m" (*__ptr),			\
> > +			       [old] "+a" (__old)			\
> > +			     : [new] "r" (__new)			\
> > +			     : "memory");				\
> > +		break;							\
> > +	}								\
> > +	case __X86_CASE_Q:						\
> > +	{								\
> > +		volatile u64 *__ptr = (volatile u64 *)(_ptr);		\
> > +		asm volatile(lock "cmpxchgq %[new], %[ptr]"		\
> > +			     CC_SET(z)					\
> > +			     : CC_OUT(z) (success),			\
> > +			       [ptr] "+m" (*__ptr),			\
> > +			       [old] "+a" (__old)			\
> > +			     : [new] "r" (__new)			\
> > +			     : "memory");				\
> > +		break;							\
> > +	}								\
> > +	default:							\
> > +		__cmpxchg_wrong_size();					\
> > +	}								\
> > +	*_old = __old;							\
> > +	success;							\
> > +})
> > +
> > +#define __try_cmpxchg(ptr, pold, new, size)				\
> > +	__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
> > +
> > +#define try_cmpxchg(ptr, pold, new)					\
> > +	__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
> > +
> >  /*
> >   * xadd() adds "inc" to "*ptr" and atomically returns the previous
> >   * value of "*ptr".
> > --- a/include/linux/atomic.h
> > +++ b/include/linux/atomic.h
> > @@ -423,6 +423,28 @@
> >  #endif
> >  #endif /* atomic_cmpxchg_relaxed */
> >  
> > +#ifndef atomic_try_cmpxchg
> > +
> > +#define __atomic_try_cmpxchg(type, _p, _po, _n)				\
> > +({									\
> > +	typeof(_po) __po = (_po);					\
> > +	typeof(*(_po)) __o = *__po;					\
> > +	bool success = (atomic_cmpxchg##type((_p), __o, (_n)) == __o);	\
> > +	*__po = __o;							\
> 
> Besides, is this part correct? atomic_cmpxchg_*() wouldn't change the
> value of __o, so *__po wouldn't be changed.. IOW, in case of failure,
> *ptr wouldn't be updated to a new value.
> 
> Maybe this should be:
> 
> 	bool success;
> 	*__po = atomic_cmpxchg##type((_p), __o, (_n));
> 	sucess = (*__po == _o);

typo... should be

	success = (*__po == __o);

Regards,
Boqun

> 
> , right?
> 
> Regards,
> Boqun
> 
> > +	success;							\
> > +})
> > +
> > +#define atomic_try_cmpxchg(_p, _po, _n)		__atomic_try_cmpxchg(, _p, _po, _n)
> > +#define atomic_try_cmpxchg_relaxed(_p, _po, _n)	__atomic_try_cmpxchg(_relaxed, _p, _po, _n)
> > +#define atomic_try_cmpxchg_acquire(_p, _po, _n)	__atomic_try_cmpxchg(_acquire, _p, _po, _n)
> > +#define atomic_try_cmpxchg_release(_p, _po, _n)	__atomic_try_cmpxchg(_release, _p, _po, _n)
> > +
> > +#else /* atomic_try_cmpxchg */
> > +#define atomic_try_cmpxchg_relaxed	atomic_try_cmpxchg
> > +#define atomic_try_cmpxchg_acquire	atomic_try_cmpxchg
> > +#define atomic_try_cmpxchg_release	atomic_try_cmpxchg
> > +#endif /* atomic_try_cmpxchg */
> > +
> >  /* cmpxchg_relaxed */
> >  #ifndef cmpxchg_relaxed
> >  #define  cmpxchg_relaxed		cmpxchg
> > 
> >
Peter Zijlstra Feb. 6, 2017, 8:12 a.m. UTC | #3
On Mon, Feb 06, 2017 at 12:24:28PM +0800, Boqun Feng wrote:
> On Fri, Feb 03, 2017 at 02:26:02PM +0100, Peter Zijlstra wrote:

> > 
> > 	for (;;) {
> > 		new = val $op $imm;
> > 		if (try_cmpxchg(ptr, &val, new))
> > 			break;
> > 	}
> > 
> > while also generating better code (GCC6 and onwards).
> > 
> 
> But switching to try_cmpxchg() will make @val a memory location, which
> could not be put in a register. And this will generate unnecessary
> memory accesses on archs having enough registers(PPC, e.g.).

GCC was perfectly capable of making @val a register in the code I was
looking at.

> > +#ifndef atomic_try_cmpxchg
> > +
> > +#define __atomic_try_cmpxchg(type, _p, _po, _n)				\
> > +({									\
> > +	typeof(_po) __po = (_po);					\
> > +	typeof(*(_po)) __o = *__po;					\
> > +	bool success = (atomic_cmpxchg##type((_p), __o, (_n)) == __o);	\
> > +	*__po = __o;							\
> 
> Besides, is this part correct? atomic_cmpxchg_*() wouldn't change the
> value of __o, so *__po wouldn't be changed.. IOW, in case of failure,
> *ptr wouldn't be updated to a new value.
> 
> Maybe this should be:
> 
> 	bool success;
> 	*__po = atomic_cmpxchg##type((_p), __o, (_n));
> 	sucess = (*__po == _o);
> 
> , right?

Yes, botched that. Don't think I even compiled it to be honest :/
diff mbox

Patch

--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -186,6 +186,12 @@  static __always_inline int atomic_cmpxch
 	return cmpxchg(&v->counter, old, new);
 }
 
+#define atomic_try_cmpxchg atomic_try_cmpxchg
+static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+	return try_cmpxchg(&v->counter, old, new);
+}
+
 static inline int atomic_xchg(atomic_t *v, int new)
 {
 	return xchg(&v->counter, new);
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -153,6 +153,75 @@  extern void __add_wrong_size(void)
 #define cmpxchg_local(ptr, old, new)					\
 	__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
 
+
+#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)		\
+({									\
+	bool success;							\
+	__typeof__(_ptr) _old = (_pold);				\
+	__typeof__(*(_ptr)) __old = *_old;				\
+	__typeof__(*(_ptr)) __new = (_new);				\
+	switch (size) {							\
+	case __X86_CASE_B:						\
+	{								\
+		volatile u8 *__ptr = (volatile u8 *)(_ptr);		\
+		asm volatile(lock "cmpxchgb %[new], %[ptr]"		\
+			     CC_SET(z)					\
+			     : CC_OUT(z) (success),			\
+			       [ptr] "+m" (*__ptr),			\
+			       [old] "+a" (__old)			\
+			     : [new] "q" (__new)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_W:						\
+	{								\
+		volatile u16 *__ptr = (volatile u16 *)(_ptr);		\
+		asm volatile(lock "cmpxchgw %[new], %[ptr]"		\
+			     CC_SET(z)					\
+			     : CC_OUT(z) (success),			\
+			       [ptr] "+m" (*__ptr),			\
+			       [old] "+a" (__old)			\
+			     : [new] "r" (__new)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_L:						\
+	{								\
+		volatile u32 *__ptr = (volatile u32 *)(_ptr);		\
+		asm volatile(lock "cmpxchgl %[new], %[ptr]"		\
+			     CC_SET(z)					\
+			     : CC_OUT(z) (success),			\
+			       [ptr] "+m" (*__ptr),			\
+			       [old] "+a" (__old)			\
+			     : [new] "r" (__new)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_Q:						\
+	{								\
+		volatile u64 *__ptr = (volatile u64 *)(_ptr);		\
+		asm volatile(lock "cmpxchgq %[new], %[ptr]"		\
+			     CC_SET(z)					\
+			     : CC_OUT(z) (success),			\
+			       [ptr] "+m" (*__ptr),			\
+			       [old] "+a" (__old)			\
+			     : [new] "r" (__new)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	default:							\
+		__cmpxchg_wrong_size();					\
+	}								\
+	*_old = __old;							\
+	success;							\
+})
+
+#define __try_cmpxchg(ptr, pold, new, size)				\
+	__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
+
+#define try_cmpxchg(ptr, pold, new)					\
+	__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+
 /*
  * xadd() adds "inc" to "*ptr" and atomically returns the previous
  * value of "*ptr".
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -423,6 +423,28 @@ 
 #endif
 #endif /* atomic_cmpxchg_relaxed */
 
+#ifndef atomic_try_cmpxchg
+
+#define __atomic_try_cmpxchg(type, _p, _po, _n)				\
+({									\
+	typeof(_po) __po = (_po);					\
+	typeof(*(_po)) __o = *__po;					\
+	bool success = (atomic_cmpxchg##type((_p), __o, (_n)) == __o);	\
+	*__po = __o;							\
+	success;							\
+})
+
+#define atomic_try_cmpxchg(_p, _po, _n)		__atomic_try_cmpxchg(, _p, _po, _n)
+#define atomic_try_cmpxchg_relaxed(_p, _po, _n)	__atomic_try_cmpxchg(_relaxed, _p, _po, _n)
+#define atomic_try_cmpxchg_acquire(_p, _po, _n)	__atomic_try_cmpxchg(_acquire, _p, _po, _n)
+#define atomic_try_cmpxchg_release(_p, _po, _n)	__atomic_try_cmpxchg(_release, _p, _po, _n)
+
+#else /* atomic_try_cmpxchg */
+#define atomic_try_cmpxchg_relaxed	atomic_try_cmpxchg
+#define atomic_try_cmpxchg_acquire	atomic_try_cmpxchg
+#define atomic_try_cmpxchg_release	atomic_try_cmpxchg
+#endif /* atomic_try_cmpxchg */
+
 /* cmpxchg_relaxed */
 #ifndef cmpxchg_relaxed
 #define  cmpxchg_relaxed		cmpxchg