diff mbox series

[03/72] qemu/host-utils: Add wrappers for carry builtins

Message ID 20210508014802.892561-4-richard.henderson@linaro.org (mailing list archive)
State New, archived
Headers show
Series Convert floatx80 and float128 to FloatParts | expand

Commit Message

Richard Henderson May 8, 2021, 1:46 a.m. UTC
These builtins came in clang 3.8, but are not present in gcc through
version 11.  Even in clang the optimization is not ideal except for
x86_64, but no worse than the hand-coding that we currently do.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/qemu/host-utils.h | 50 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

Comments

Alex Bennée May 10, 2021, 12:57 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> These builtins came in clang 3.8, but are not present in gcc through
> version 11.  Even in clang the optimization is not ideal except for
> x86_64, but no worse than the hand-coding that we currently do.

Given this statement....

<snip>
> +/**
> + * uadd64_carry - addition with carry-in and carry-out
> + * @x, @y: addends
> + * @pcarry: in-out carry value
> + *
> + * Computes @x + @y + *@pcarry, placing the carry-out back
> + * into *@pcarry and returning the 64-bit sum.
> + */
> +static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
> +{
> +#if __has_builtin(__builtin_addcll)
> +    unsigned long long c = *pcarry;
> +    x = __builtin_addcll(x, y, c, &c);

what happens when unsigned long long isn't the same as uint64_t? Doesn't
C99 only specify a minimum?

> +    *pcarry = c & 1;

Why do we need to clamp it here? Shouldn't the compiler automatically do
that due to the bool?

> +    return x;
> +#else
> +    bool c = *pcarry;
> +    /* This is clang's internal expansion of __builtin_addc. */
> +    c = uadd64_overflow(x, c, &x);
> +    c |= uadd64_overflow(x, y, &x);
> +    *pcarry = c;
> +    return x;
> +#endif

Either way if you aren't super happy with the compilers builtin and you
get equivalent code with the unambigious hand coded version then what is
the point of having a builtin leg?

> +}
> +
> +/**
> + * usub64_borrow - subtraction with borrow-in and borrow-out
> + * @x, @y: addends
> + * @pborrow: in-out borrow value
> + *
> + * Computes @x - @y - *@pborrow, placing the borrow-out back
> + * into *@pborrow and returning the 64-bit sum.
> + */
> +static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
> +{
> +#if __has_builtin(__builtin_subcll)
> +    unsigned long long b = *pborrow;
> +    x = __builtin_subcll(x, y, b, &b);
> +    *pborrow = b & 1;
> +    return x;
> +#else
> +    bool b = *pborrow;
> +    b = usub64_overflow(x, b, &x);
> +    b |= usub64_overflow(x, y, &x);
> +    *pborrow = b;
> +    return x;
> +#endif
> +}
> +
>  /* Host type specific sizes of these routines.  */
>  
>  #if ULONG_MAX == UINT32_MAX
Richard Henderson May 11, 2021, 8:10 p.m. UTC | #2
On 5/10/21 7:57 AM, Alex Bennée wrote:
> 
> Richard Henderson <richard.henderson@linaro.org> writes:
> 
>> These builtins came in clang 3.8, but are not present in gcc through
>> version 11.  Even in clang the optimization is not ideal except for
>> x86_64, but no worse than the hand-coding that we currently do.
> 
> Given this statement....

I think you mis-read the "except for x86_64" part?

Anyway, these are simply bugs to be filed against clang, so that hopefully 
clang-12 will do a good job with the builtin.  And as I said, while the 
generated code is not ideal, it's no worse.

>> +static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
>> +{
>> +#if __has_builtin(__builtin_addcll)
>> +    unsigned long long c = *pcarry;
>> +    x = __builtin_addcll(x, y, c, &c);
> 
> what happens when unsigned long long isn't the same as uint64_t? Doesn't
> C99 only specify a minimum?

If you only look at C99, sure.  But looking at the set of supported hosts, 
unsigned long long is always a 64-bit type.

>> +    *pcarry = c & 1;
> 
> Why do we need to clamp it here? Shouldn't the compiler automatically do
> that due to the bool?

This produces a single AND insn, instead of CMP + SETcc.


r~
Alex Bennée May 12, 2021, 11:17 a.m. UTC | #3
Richard Henderson <richard.henderson@linaro.org> writes:

> On 5/10/21 7:57 AM, Alex Bennée wrote:
>> Richard Henderson <richard.henderson@linaro.org> writes:
>> 
>>> These builtins came in clang 3.8, but are not present in gcc through
>>> version 11.  Even in clang the optimization is not ideal except for
>>> x86_64, but no worse than the hand-coding that we currently do.
>> Given this statement....
>
> I think you mis-read the "except for x86_64" part?
>
> Anyway, these are simply bugs to be filed against clang, so that
> hopefully clang-12 will do a good job with the builtin.  And as I
> said, while the generated code is not ideal, it's no worse.
>
>>> +static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
>>> +{
>>> +#if __has_builtin(__builtin_addcll)
>>> +    unsigned long long c = *pcarry;
>>> +    x = __builtin_addcll(x, y, c, &c);
>> what happens when unsigned long long isn't the same as uint64_t?
>> Doesn't
>> C99 only specify a minimum?
>
> If you only look at C99, sure.  But looking at the set of supported
> hosts, unsigned long long is always a 64-bit type.

I guess I'm worrying about a theoretical future - but we don't worry
about it for other ll builtins so no biggy.

>
>>> +    *pcarry = c & 1;
>> Why do we need to clamp it here? Shouldn't the compiler
>> automatically do
>> that due to the bool?
>
> This produces a single AND insn, instead of CMP + SETcc.

Might be worth mentioning that in the commit message. 

Anyway:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
diff mbox series

Patch

diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index fd76f0cbd3..2ea8b3000b 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -26,6 +26,7 @@ 
 #ifndef HOST_UTILS_H
 #define HOST_UTILS_H
 
+#include "qemu/compiler.h"
 #include "qemu/bswap.h"
 
 #ifdef CONFIG_INT128
@@ -581,6 +582,55 @@  static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 #endif
 }
 
+/**
+ * uadd64_carry - addition with carry-in and carry-out
+ * @x, @y: addends
+ * @pcarry: in-out carry value
+ *
+ * Computes @x + @y + *@pcarry, placing the carry-out back
+ * into *@pcarry and returning the 64-bit sum.
+ */
+static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
+{
+#if __has_builtin(__builtin_addcll)
+    unsigned long long c = *pcarry;
+    x = __builtin_addcll(x, y, c, &c);
+    *pcarry = c & 1;
+    return x;
+#else
+    bool c = *pcarry;
+    /* This is clang's internal expansion of __builtin_addc. */
+    c = uadd64_overflow(x, c, &x);
+    c |= uadd64_overflow(x, y, &x);
+    *pcarry = c;
+    return x;
+#endif
+}
+
+/**
+ * usub64_borrow - subtraction with borrow-in and borrow-out
+ * @x, @y: addends
+ * @pborrow: in-out borrow value
+ *
+ * Computes @x - @y - *@pborrow, placing the borrow-out back
+ * into *@pborrow and returning the 64-bit sum.
+ */
+static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
+{
+#if __has_builtin(__builtin_subcll)
+    unsigned long long b = *pborrow;
+    x = __builtin_subcll(x, y, b, &b);
+    *pborrow = b & 1;
+    return x;
+#else
+    bool b = *pborrow;
+    b = usub64_overflow(x, b, &x);
+    b |= usub64_overflow(x, y, &x);
+    *pborrow = b;
+    return x;
+#endif
+}
+
 /* Host type specific sizes of these routines.  */
 
 #if ULONG_MAX == UINT32_MAX