diff mbox

[v1,08/14] hostfloat: support float32/64 addition and subtraction

Message ID 1521663109-32262-9-git-send-email-cota@braap.org (mailing list archive)
State New, archived
Headers show

Commit Message

Emilio Cota March 21, 2018, 8:11 p.m. UTC
Performance results (single and double precision) for
fp-bench run under aarch64-linux-user on an Intel(R)
Core(TM) i7-4790K CPU @ 4.00GHz host:

- before:
add-single: 86.74 MFlops
add-double: 86.46 MFlops
sub-single: 83.33 MFlops
sub-double: 84.57 MFlops

- after:
add-single: 188.26 MFlops
add-double: 186.60 MFlops
sub-single: 186.19 MFlops
sub-double: 187.77 MFlops

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 include/fpu/hostfloat.h |  6 ++++++
 include/fpu/softfloat.h |  8 ++++----
 fpu/hostfloat.c         | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
 fpu/softfloat.c         | 16 ++++++++--------
 4 files changed, 68 insertions(+), 12 deletions(-)

Comments

Richard Henderson March 22, 2018, 5:05 a.m. UTC | #1
On 03/22/2018 04:11 AM, Emilio G. Cota wrote:
> +#define GEN_FPU_ADDSUB(add_name, sub_name, soft_t, host_t,              \
> +                       host_abs_func, min_normal)                       \
> +    static inline __attribute__((always_inline)) soft_t                 \
> +    fpu_ ## soft_t ## _addsub(soft_t a, soft_t b, bool subtract,        \
> +                              float_status *s)                          \
> +    {                                                                   \
> +        soft_t ## _input_flush2(&a, &b, s);                             \
> +        if (likely((soft_t ## _is_normal(a) || soft_t ## _is_zero(a)) && \
> +                   (soft_t ## _is_normal(b) || soft_t ## _is_zero(b)) && \
> +                   s->float_exception_flags & float_flag_inexact &&     \
> +                   s->float_rounding_mode == float_round_nearest_even)) { \
> +            host_t ha = soft_t ## _to_ ## host_t(a);                    \
> +            host_t hb = soft_t ## _to_ ## host_t(b);                    \
> +            host_t hr;                                                  \
> +            soft_t r;                                                   \
> +                                                                        \
> +            if (subtract) {                                             \
> +                hb = -hb;                                               \
> +            }                                                           \
> +            hr = ha + hb;                                               \
> +            r = host_t ## _to_ ## soft_t(hr);                           \
> +            if (unlikely(soft_t ## _is_infinity(r))) {                  \
> +                s->float_exception_flags |= float_flag_overflow;        \
> +            } else if (unlikely(host_abs_func(hr) <= min_normal)) {     \
> +                goto soft;                                              \
> +            }                                                           \
> +            return r;                                                   \
> +        }                                                               \
> +    soft:                                                               \

Is there any especially good reason you want to not put this code into the
normal softfloat function?  Does it really many any measurable difference at
all to force this code to be inlined into a helper?


r~
diff mbox

Patch

diff --git a/include/fpu/hostfloat.h b/include/fpu/hostfloat.h
index b01291b..db49efa 100644
--- a/include/fpu/hostfloat.h
+++ b/include/fpu/hostfloat.h
@@ -11,4 +11,10 @@ 
 #error fpu/hostfloat.h must only be included from softfloat.h
 #endif
 
+float32 float32_add(float32 a, float32 b, float_status *status);
+float32 float32_sub(float32 a, float32 b, float_status *status);
+
+float64 float64_add(float64 a, float64 b, float_status *status);
+float64 float64_sub(float64 a, float64 b, float_status *status);
+
 #endif /* HOSTFLOAT_H */
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 8963b68..eb7e9bc 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -342,8 +342,8 @@  float128 float32_to_float128(float32, float_status *status);
 | Software IEC/IEEE single-precision operations.
 *----------------------------------------------------------------------------*/
 float32 float32_round_to_int(float32, float_status *status);
-float32 float32_add(float32, float32, float_status *status);
-float32 float32_sub(float32, float32, float_status *status);
+float32 soft_float32_add(float32, float32, float_status *status);
+float32 soft_float32_sub(float32, float32, float_status *status);
 float32 float32_mul(float32, float32, float_status *status);
 float32 float32_div(float32, float32, float_status *status);
 float32 float32_rem(float32, float32, float_status *status);
@@ -482,8 +482,8 @@  float128 float64_to_float128(float64, float_status *status);
 *----------------------------------------------------------------------------*/
 float64 float64_round_to_int(float64, float_status *status);
 float64 float64_trunc_to_int(float64, float_status *status);
-float64 float64_add(float64, float64, float_status *status);
-float64 float64_sub(float64, float64, float_status *status);
+float64 soft_float64_add(float64, float64, float_status *status);
+float64 soft_float64_sub(float64, float64, float_status *status);
 float64 float64_mul(float64, float64, float_status *status);
 float64 float64_div(float64, float64, float_status *status);
 float64 float64_rem(float64, float64, float_status *status);
diff --git a/fpu/hostfloat.c b/fpu/hostfloat.c
index cab0341..502552b 100644
--- a/fpu/hostfloat.c
+++ b/fpu/hostfloat.c
@@ -94,3 +94,53 @@  GEN_TYPE_CONV(double_to_float64, float64, double)
 GEN_INPUT_FLUSH(float32)
 GEN_INPUT_FLUSH(float64)
 #undef GEN_INPUT_FLUSH
+
+#define GEN_FPU_ADDSUB(add_name, sub_name, soft_t, host_t,              \
+                       host_abs_func, min_normal)                       \
+    static inline __attribute__((always_inline)) soft_t                 \
+    fpu_ ## soft_t ## _addsub(soft_t a, soft_t b, bool subtract,        \
+                              float_status *s)                          \
+    {                                                                   \
+        soft_t ## _input_flush2(&a, &b, s);                             \
+        if (likely((soft_t ## _is_normal(a) || soft_t ## _is_zero(a)) && \
+                   (soft_t ## _is_normal(b) || soft_t ## _is_zero(b)) && \
+                   s->float_exception_flags & float_flag_inexact &&     \
+                   s->float_rounding_mode == float_round_nearest_even)) { \
+            host_t ha = soft_t ## _to_ ## host_t(a);                    \
+            host_t hb = soft_t ## _to_ ## host_t(b);                    \
+            host_t hr;                                                  \
+            soft_t r;                                                   \
+                                                                        \
+            if (subtract) {                                             \
+                hb = -hb;                                               \
+            }                                                           \
+            hr = ha + hb;                                               \
+            r = host_t ## _to_ ## soft_t(hr);                           \
+            if (unlikely(soft_t ## _is_infinity(r))) {                  \
+                s->float_exception_flags |= float_flag_overflow;        \
+            } else if (unlikely(host_abs_func(hr) <= min_normal)) {     \
+                goto soft;                                              \
+            }                                                           \
+            return r;                                                   \
+        }                                                               \
+    soft:                                                               \
+        if (subtract) {                                                 \
+            return soft_ ## soft_t ## _sub(a, b, s);                    \
+        } else {                                                        \
+            return soft_ ## soft_t ## _add(a, b, s);                    \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    soft_t add_name(soft_t a, soft_t b, float_status *status)           \
+    {                                                                   \
+        return fpu_ ## soft_t ## _addsub(a, b, false, status);          \
+    }                                                                   \
+                                                                        \
+    soft_t sub_name(soft_t a, soft_t b, float_status *status)           \
+    {                                                                   \
+        return fpu_ ## soft_t ## _addsub(a, b, true, status);           \
+    }                                                                   \
+
+GEN_FPU_ADDSUB(float32_add, float32_sub, float32, float, fabsf, FLT_MIN)
+GEN_FPU_ADDSUB(float64_add, float64_sub, float64, double, fabs, DBL_MIN)
+#undef GEN_FPU_ADDSUB
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index ee615a9..bd82adf 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -738,8 +738,8 @@  float16  __attribute__((flatten)) float16_add(float16 a, float16 b,
     return float16_round_pack_canonical(pr, status);
 }
 
-float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
-                                             float_status *status)
+float32 __attribute__((flatten)) soft_float32_add(float32 a, float32 b,
+                                                  float_status *status)
 {
     FloatParts pa = float32_unpack_canonical(a, status);
     FloatParts pb = float32_unpack_canonical(b, status);
@@ -748,8 +748,8 @@  float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
     return float32_round_pack_canonical(pr, status);
 }
 
-float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
-                                             float_status *status)
+float64 __attribute__((flatten)) soft_float64_add(float64 a, float64 b,
+                                                  float_status *status)
 {
     FloatParts pa = float64_unpack_canonical(a, status);
     FloatParts pb = float64_unpack_canonical(b, status);
@@ -768,8 +768,8 @@  float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
     return float16_round_pack_canonical(pr, status);
 }
 
-float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
-                                             float_status *status)
+float32 __attribute__((flatten)) soft_float32_sub(float32 a, float32 b,
+                                                  float_status *status)
 {
     FloatParts pa = float32_unpack_canonical(a, status);
     FloatParts pb = float32_unpack_canonical(b, status);
@@ -778,8 +778,8 @@  float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
     return float32_round_pack_canonical(pr, status);
 }
 
-float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
-                                             float_status *status)
+float64 __attribute__((flatten)) soft_float64_sub(float64 a, float64 b,
+                                                  float_status *status)
 {
     FloatParts pa = float64_unpack_canonical(a, status);
     FloatParts pb = float64_unpack_canonical(b, status);