diff mbox series

[v2,2/2] xen: Introduce cmpxchg64() and guest_cmpxchg64()

Message ID 20200911160622.19721-3-julien@xen.org
State New
Headers show
Series Introduce cmpxchg64() and guest_cmpxchg64() | expand

Commit Message

Julien Grall Sept. 11, 2020, 4:06 p.m. UTC
From: Julien Grall <jgrall@amazon.com>

The IOREQ code is using cmpxchg() with 64-bit value. At the moment, this
is x86 code, but there is plan to make it common.

To cater 32-bit arch, introduce two new helpers to deal with 64-bit
cmpxchg().

The Arm 32-bit implementation of cmpxchg64() is based on the __cmpxchg64
in Linux v5.8 (arch/arm/include/asm/cmpxchg.h).

Note that only guest_cmpxchg64() is introduced on x86 so far.

Signed-off-by: Julien Grall <jgrall@amazon.com>

---

I have looked at whether we can extend cmpxchg() to support 64-bit on
arm32 bit. However the resulting code is much worse as the compiler will
use the stack more often to spill. Therefore the introduction of
cmpxchg64() is better option.

    Changes in v2:
        - Remove extra teq in the arm32 cmpxchg implementation
        - Don't define cmpxchg64() on x86
        - Drop _mb from the helpers name
        - Add missing barrier in the arm32 implementation
---
 xen/include/asm-arm/arm32/cmpxchg.h | 68 +++++++++++++++++++++++++++++
 xen/include/asm-arm/arm64/cmpxchg.h |  5 +++
 xen/include/asm-arm/guest_atomics.h | 22 ++++++++++
 xen/include/asm-x86/guest_atomics.h |  1 +
 4 files changed, 96 insertions(+)

Comments

Jan Beulich Sept. 14, 2020, 8:48 a.m. UTC | #1
On 11.09.2020 18:06, Julien Grall wrote:
> --- a/xen/include/asm-x86/guest_atomics.h
> +++ b/xen/include/asm-x86/guest_atomics.h
> @@ -20,6 +20,7 @@
>      ((void)(d), test_and_change_bit(nr, p))
>  
>  #define guest_cmpxchg(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
> +#define guest_cmpxchg64(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))

While them sitting side by side there's perhaps little risk of
them going out of sync with one another, I still find it a
little odd to open-code guest_cmpxchg() instead of using it,
preferably even like

#define guest_cmpxchg64 guest_cmpxchg

(i.e. not using the function-like macro form). Unless of course
there's a particular reason you went this route. Preferably with
it adjusted this minor x86 part
Acked-by: Jan Beulich <jbeulich@suse.com>

Jan
Julien Grall Sept. 14, 2020, 9:04 a.m. UTC | #2
Hi Jan,

On 14/09/2020 09:48, Jan Beulich wrote:
> On 11.09.2020 18:06, Julien Grall wrote:
>> --- a/xen/include/asm-x86/guest_atomics.h
>> +++ b/xen/include/asm-x86/guest_atomics.h
>> @@ -20,6 +20,7 @@
>>       ((void)(d), test_and_change_bit(nr, p))
>>   
>>   #define guest_cmpxchg(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
>> +#define guest_cmpxchg64(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
> 
> While them sitting side by side there's perhaps little risk of
> them going out of sync with one another, I still find it a
> little odd to open-code guest_cmpxchg() instead of using it,

It depends on how you view it... The implementation is indeed the same 
but they are meant to be used in different places.

Anyway... I can use:

#define guest_cmpxchg64 guest_cmpxchg

Cheers,
Stefano Stabellini Sept. 15, 2020, midnight UTC | #3
On Fri, 11 Sep 2020, Julien Grall wrote:
> From: Julien Grall <jgrall@amazon.com>
> 
> The IOREQ code is using cmpxchg() with 64-bit value. At the moment, this
> is x86 code, but there is plan to make it common.
> 
> To cater 32-bit arch, introduce two new helpers to deal with 64-bit
> cmpxchg().
> 
> The Arm 32-bit implementation of cmpxchg64() is based on the __cmpxchg64
> in Linux v5.8 (arch/arm/include/asm/cmpxchg.h).
> 
> Note that only guest_cmpxchg64() is introduced on x86 so far.
> 
> Signed-off-by: Julien Grall <jgrall@amazon.com>

Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
> 
> I have looked at whether we can extend cmpxchg() to support 64-bit on
> arm32 bit. However the resulting code is much worse as the compiler will
> use the stack more often to spill. Therefore the introduction of
> cmpxchg64() is better option.
> 
>     Changes in v2:
>         - Remove extra teq in the arm32 cmpxchg implementation
>         - Don't define cmpxchg64() on x86
>         - Drop _mb from the helpers name
>         - Add missing barrier in the arm32 implementation
> ---
>  xen/include/asm-arm/arm32/cmpxchg.h | 68 +++++++++++++++++++++++++++++
>  xen/include/asm-arm/arm64/cmpxchg.h |  5 +++
>  xen/include/asm-arm/guest_atomics.h | 22 ++++++++++
>  xen/include/asm-x86/guest_atomics.h |  1 +
>  4 files changed, 96 insertions(+)
> 
> diff --git a/xen/include/asm-arm/arm32/cmpxchg.h b/xen/include/asm-arm/arm32/cmpxchg.h
> index 3ef1e5c63276..b0bd1d8b685e 100644
> --- a/xen/include/asm-arm/arm32/cmpxchg.h
> +++ b/xen/include/asm-arm/arm32/cmpxchg.h
> @@ -87,6 +87,37 @@ __CMPXCHG_CASE(b, 1)
>  __CMPXCHG_CASE(h, 2)
>  __CMPXCHG_CASE( , 4)
>  
> +static inline bool __cmpxchg_case_8(volatile uint64_t *ptr,
> +			 	    uint64_t *old,
> +			 	    uint64_t new,
> +			 	    bool timeout,
> +				    unsigned int max_try)
> +{
> +	uint64_t oldval;
> +	uint64_t res;
> +
> +	do {
> +		asm volatile(
> +		"	ldrexd		%1, %H1, [%3]\n"
> +		"	teq		%1, %4\n"
> +		"	teqeq		%H1, %H4\n"
> +		"	movne		%0, #0\n"
> +		"	movne		%H0, #0\n"
> +		"	bne		2f\n"
> +		"	strexd		%0, %5, %H5, [%3]\n"
> +		"2:"
> +		: "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
> +		: "r" (ptr), "r" (*old), "r" (new)
> +		: "memory", "cc");
> +		if (!res)
> +			break;
> +	} while (!timeout || ((--max_try) > 0));
> +
> +	*old = oldval;
> +
> +	return !res;
> +}
> +
>  static always_inline bool __int_cmpxchg(volatile void *ptr, unsigned long *old,
>  					unsigned long new, int size,
>  					bool timeout, unsigned int max_try)
> @@ -145,11 +176,48 @@ static always_inline bool __cmpxchg_timeout(volatile void *ptr,
>  	return ret;
>  }
>  
> +/*
> + * The helper may fail to update the memory if the action takes too long.
> + *
> + * @old: On call the value pointed contains the expected old value. It will be
> + * updated to the actual old value.
> + * @max_try: Maximum number of iterations
> + *
> + * The helper will return true when the update has succeeded (i.e no
> + * timeout) and false if the update has failed.
> + */
> +static always_inline bool __cmpxchg64_timeout(volatile uint64_t *ptr,
> +					      uint64_t *old,
> +					      uint64_t new,
> +					      unsigned int max_try)
> +{
> +	bool ret;
> +
> +	smp_mb();
> +	ret = __cmpxchg_case_8(ptr, old, new, true, max_try);
> +	smp_mb();
> +
> +	return ret;
> +}
> +
>  #define cmpxchg(ptr,o,n)						\
>  	((__typeof__(*(ptr)))__cmpxchg((ptr),				\
>  				       (unsigned long)(o),		\
>  				       (unsigned long)(n),		\
>  				       sizeof(*(ptr))))
> +
> +static inline uint64_t cmpxchg64(volatile uint64_t *ptr,
> +				 uint64_t old,
> +				 uint64_t new)
> +{
> +	smp_mb();
> +	if (!__cmpxchg_case_8(ptr, &old, new, false, 0))
> +		ASSERT_UNREACHABLE();
> +	smp_mb();
> +
> +	return old;
> +}
> +
>  #endif
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-arm/arm64/cmpxchg.h b/xen/include/asm-arm/arm64/cmpxchg.h
> index f4a8c1ccba80..10e4edc022b7 100644
> --- a/xen/include/asm-arm/arm64/cmpxchg.h
> +++ b/xen/include/asm-arm/arm64/cmpxchg.h
> @@ -167,6 +167,11 @@ static always_inline bool __cmpxchg_timeout(volatile void *ptr,
>  	__ret; \
>  })
>  
> +#define cmpxchg64(ptr, o, n) cmpxchg(ptr, o, n)
> +
> +#define __cmpxchg64_timeout(ptr, old, new, max_try)	\
> +	__cmpxchg_timeout(ptr, old, new, 8, max_try)
> +
>  #endif
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-arm/guest_atomics.h b/xen/include/asm-arm/guest_atomics.h
> index 20347849e56c..9e2e96d4ff72 100644
> --- a/xen/include/asm-arm/guest_atomics.h
> +++ b/xen/include/asm-arm/guest_atomics.h
> @@ -115,6 +115,28 @@ static inline unsigned long __guest_cmpxchg(struct domain *d,
>                                           (unsigned long)(n),\
>                                           sizeof (*(ptr))))
>  
> +static inline uint64_t guest_cmpxchg64(struct domain *d,
> +                                       volatile uint64_t *ptr,
> +                                       uint64_t old,
> +                                       uint64_t new)
> +{
> +    uint64_t oldval = old;
> +
> +    perfc_incr(atomics_guest);
> +
> +    if ( __cmpxchg64_timeout(ptr, &oldval, new,
> +                             this_cpu(guest_safe_atomic_max)) )
> +        return oldval;
> +
> +    perfc_incr(atomics_guest_paused);
> +
> +    domain_pause_nosync(d);
> +    oldval = cmpxchg64(ptr, old, new);
> +    domain_unpause(d);
> +
> +    return oldval;
> +}
> +
>  #endif /* _ARM_GUEST_ATOMICS_H */
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-x86/guest_atomics.h b/xen/include/asm-x86/guest_atomics.h
> index 029417c8ffc1..87e7075b7623 100644
> --- a/xen/include/asm-x86/guest_atomics.h
> +++ b/xen/include/asm-x86/guest_atomics.h
> @@ -20,6 +20,7 @@
>      ((void)(d), test_and_change_bit(nr, p))
>  
>  #define guest_cmpxchg(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
> +#define guest_cmpxchg64(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
>  
>  #endif /* _X86_GUEST_ATOMICS_H */
>  /*
> -- 
> 2.17.1
>
diff mbox series

Patch

diff --git a/xen/include/asm-arm/arm32/cmpxchg.h b/xen/include/asm-arm/arm32/cmpxchg.h
index 3ef1e5c63276..b0bd1d8b685e 100644
--- a/xen/include/asm-arm/arm32/cmpxchg.h
+++ b/xen/include/asm-arm/arm32/cmpxchg.h
@@ -87,6 +87,37 @@  __CMPXCHG_CASE(b, 1)
 __CMPXCHG_CASE(h, 2)
 __CMPXCHG_CASE( , 4)
 
+static inline bool __cmpxchg_case_8(volatile uint64_t *ptr,
+			 	    uint64_t *old,
+			 	    uint64_t new,
+			 	    bool timeout,
+				    unsigned int max_try)
+{
+	uint64_t oldval;
+	uint64_t res;
+
+	do {
+		asm volatile(
+		"	ldrexd		%1, %H1, [%3]\n"
+		"	teq		%1, %4\n"
+		"	teqeq		%H1, %H4\n"
+		"	movne		%0, #0\n"
+		"	movne		%H0, #0\n"
+		"	bne		2f\n"
+		"	strexd		%0, %5, %H5, [%3]\n"
+		"2:"
+		: "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
+		: "r" (ptr), "r" (*old), "r" (new)
+		: "memory", "cc");
+		if (!res)
+			break;
+	} while (!timeout || ((--max_try) > 0));
+
+	*old = oldval;
+
+	return !res;
+}
+
 static always_inline bool __int_cmpxchg(volatile void *ptr, unsigned long *old,
 					unsigned long new, int size,
 					bool timeout, unsigned int max_try)
@@ -145,11 +176,48 @@  static always_inline bool __cmpxchg_timeout(volatile void *ptr,
 	return ret;
 }
 
+/*
+ * The helper may fail to update the memory if the action takes too long.
+ *
+ * @old: On call the value pointed contains the expected old value. It will be
+ * updated to the actual old value.
+ * @max_try: Maximum number of iterations
+ *
+ * The helper will return true when the update has succeeded (i.e no
+ * timeout) and false if the update has failed.
+ */
+static always_inline bool __cmpxchg64_timeout(volatile uint64_t *ptr,
+					      uint64_t *old,
+					      uint64_t new,
+					      unsigned int max_try)
+{
+	bool ret;
+
+	smp_mb();
+	ret = __cmpxchg_case_8(ptr, old, new, true, max_try);
+	smp_mb();
+
+	return ret;
+}
+
 #define cmpxchg(ptr,o,n)						\
 	((__typeof__(*(ptr)))__cmpxchg((ptr),				\
 				       (unsigned long)(o),		\
 				       (unsigned long)(n),		\
 				       sizeof(*(ptr))))
+
+static inline uint64_t cmpxchg64(volatile uint64_t *ptr,
+				 uint64_t old,
+				 uint64_t new)
+{
+	smp_mb();
+	if (!__cmpxchg_case_8(ptr, &old, new, false, 0))
+		ASSERT_UNREACHABLE();
+	smp_mb();
+
+	return old;
+}
+
 #endif
 /*
  * Local variables:
diff --git a/xen/include/asm-arm/arm64/cmpxchg.h b/xen/include/asm-arm/arm64/cmpxchg.h
index f4a8c1ccba80..10e4edc022b7 100644
--- a/xen/include/asm-arm/arm64/cmpxchg.h
+++ b/xen/include/asm-arm/arm64/cmpxchg.h
@@ -167,6 +167,11 @@  static always_inline bool __cmpxchg_timeout(volatile void *ptr,
 	__ret; \
 })
 
+#define cmpxchg64(ptr, o, n) cmpxchg(ptr, o, n)
+
+#define __cmpxchg64_timeout(ptr, old, new, max_try)	\
+	__cmpxchg_timeout(ptr, old, new, 8, max_try)
+
 #endif
 /*
  * Local variables:
diff --git a/xen/include/asm-arm/guest_atomics.h b/xen/include/asm-arm/guest_atomics.h
index 20347849e56c..9e2e96d4ff72 100644
--- a/xen/include/asm-arm/guest_atomics.h
+++ b/xen/include/asm-arm/guest_atomics.h
@@ -115,6 +115,28 @@  static inline unsigned long __guest_cmpxchg(struct domain *d,
                                          (unsigned long)(n),\
                                          sizeof (*(ptr))))
 
+static inline uint64_t guest_cmpxchg64(struct domain *d,
+                                       volatile uint64_t *ptr,
+                                       uint64_t old,
+                                       uint64_t new)
+{
+    uint64_t oldval = old;
+
+    perfc_incr(atomics_guest);
+
+    if ( __cmpxchg64_timeout(ptr, &oldval, new,
+                             this_cpu(guest_safe_atomic_max)) )
+        return oldval;
+
+    perfc_incr(atomics_guest_paused);
+
+    domain_pause_nosync(d);
+    oldval = cmpxchg64(ptr, old, new);
+    domain_unpause(d);
+
+    return oldval;
+}
+
 #endif /* _ARM_GUEST_ATOMICS_H */
 /*
  * Local variables:
diff --git a/xen/include/asm-x86/guest_atomics.h b/xen/include/asm-x86/guest_atomics.h
index 029417c8ffc1..87e7075b7623 100644
--- a/xen/include/asm-x86/guest_atomics.h
+++ b/xen/include/asm-x86/guest_atomics.h
@@ -20,6 +20,7 @@ 
     ((void)(d), test_and_change_bit(nr, p))
 
 #define guest_cmpxchg(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
+#define guest_cmpxchg64(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
 
 #endif /* _X86_GUEST_ATOMICS_H */
 /*