diff mbox series

[v2,2/5] target/ppc: change xs[n]madd[am]sp to use float64r32_muladd

Message ID 20220303172041.1915037-3-matheus.ferst@eldorado.org.br (mailing list archive)
State New, archived
Headers show
Series tests/tcg/ppc64le: fix the build of TCG tests with Clang | expand

Commit Message

Matheus K. Ferst March 3, 2022, 5:20 p.m. UTC
From: Matheus Ferst <matheus.ferst@eldorado.org.br>

Change VSX Scalar Multiply-Add/Subtract Type-A/M Single Precision
helpers to use float64r32_muladd. This method should correctly handle
all rounding modes, so the workaround for float_round_nearest_even can
be dropped.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
 target/ppc/fpu_helper.c | 93 ++++++++++++++++-------------------------
 1 file changed, 35 insertions(+), 58 deletions(-)

Comments

Richard Henderson March 3, 2022, 6:49 p.m. UTC | #1
On 3/3/22 07:20, matheus.ferst@eldorado.org.br wrote:
> From: Matheus Ferst <matheus.ferst@eldorado.org.br>
> 
> Change VSX Scalar Multiply-Add/Subtract Type-A/M Single Precision
> helpers to use float64r32_muladd. This method should correctly handle
> all rounding modes, so the workaround for float_round_nearest_even can
> be dropped.
> 
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
> ---
>   target/ppc/fpu_helper.c | 93 ++++++++++++++++-------------------------
>   1 file changed, 35 insertions(+), 58 deletions(-)
> 
> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
> index 8f970288f5..c973968ed6 100644
> --- a/target/ppc/fpu_helper.c
> +++ b/target/ppc/fpu_helper.c
> @@ -1916,22 +1916,19 @@ void helper_xsdivqp(CPUPPCState *env, uint32_t opcode,
>    *   fld   - vsr_t field (VsrD(*) or VsrW(*))
>    *   sfprf - set FPRF
>    */
> -#define VSX_RE(op, nels, tp, fld, sfprf, r2sp)                                \
> +#define VSX_RE(op, nels, tp, op_tp, fld, sfprf)                               \
>   void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)              \
>   {                                                                             \
>       ppc_vsr_t t = { };                                                        \
> -    int i;                                                                    \
> +    int i, flags;                                                             \
>                                                                                 \
>       helper_reset_fpstatus(env);                                               \
>                                                                                 \
>       for (i = 0; i < nels; i++) {                                              \
> -        if (unlikely(tp##_is_signaling_nan(xb->fld, &env->fp_status))) {      \
> +        t.fld = op_tp##_div(tp##_one, xb->fld, &env->fp_status);              \
> +        flags = get_float_exception_flags(&env->fp_status);                   \
> +        if (unlikely(flags & float_flag_invalid_snan)) {   

You seem to have squashed the change to recip-estimate into the same patch as muladd.


> -#define VSX_RSQRTE(op, nels, tp, fld, sfprf, r2sp)                           \
> +#define VSX_RSQRTE(op, nels, tp, op_tp, fld, sfprf)                          \

And recip-sqrt-estimate.

I guess it's ok to squash, since it's all related, but you should update the patch 
description if you leave it this way.


r~
Matheus K. Ferst March 3, 2022, 8:53 p.m. UTC | #2
On 03/03/2022 15:49, Richard Henderson wrote:
> On 3/3/22 07:20, matheus.ferst@eldorado.org.br wrote:
>> From: Matheus Ferst <matheus.ferst@eldorado.org.br>
>>
>> Change VSX Scalar Multiply-Add/Subtract Type-A/M Single Precision
>> helpers to use float64r32_muladd. This method should correctly handle
>> all rounding modes, so the workaround for float_round_nearest_even can
>> be dropped.
>>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
>> ---
>>   target/ppc/fpu_helper.c | 93 ++++++++++++++++-------------------------
>>   1 file changed, 35 insertions(+), 58 deletions(-)
>>
>> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
>> index 8f970288f5..c973968ed6 100644
>> --- a/target/ppc/fpu_helper.c
>> +++ b/target/ppc/fpu_helper.c
>> @@ -1916,22 +1916,19 @@ void helper_xsdivqp(CPUPPCState *env, uint32_t 
>> opcode,
>>    *   fld   - vsr_t field (VsrD(*) or VsrW(*))
>>    *   sfprf - set FPRF
>>    */
>> -#define VSX_RE(op, nels, tp, fld, sfprf, 
>> r2sp)                                \
>> +#define VSX_RE(op, nels, tp, op_tp, fld, 
>> sfprf)                               \
>>   void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t 
>> *xb)              \
>>   
>> {                                                                             
>> \
>>       ppc_vsr_t t = { 
>> };                                                        \
>> -    int 
>> i;                                                                    \
>> +    int i, 
>> flags;                                                             \
>>                                                                                 
>> \
>>       
>> helper_reset_fpstatus(env);                                               
>> \
>>                                                                                 
>> \
>>       for (i = 0; i < nels; i++) 
>> {                                              \
>> -        if (unlikely(tp##_is_signaling_nan(xb->fld, 
>> &env->fp_status))) {      \
>> +        t.fld = op_tp##_div(tp##_one, xb->fld, 
>> &env->fp_status);              \
>> +        flags = 
>> get_float_exception_flags(&env->fp_status);                   \
>> +        if (unlikely(flags & float_flag_invalid_snan)) {
> 
> You seem to have squashed the change to recip-estimate into the same 
> patch as muladd.
> 
> 
>> -#define VSX_RSQRTE(op, nels, tp, fld, sfprf, 
>> r2sp)                           \
>> +#define VSX_RSQRTE(op, nels, tp, op_tp, fld, 
>> sfprf)                          \
> 
> And recip-sqrt-estimate.
> 
> I guess it's ok to squash, since it's all related, but you should update 
> the patch
> description if you leave it this way.
> 

Sorry, I cherry-picked the wrong branch. This patch should just be a 
rebase of v1. I'll send the changes to 
VSX_{ADD_SUB,MUL,DIV,RE,SQRT,RSQRTE} in a separate patch series since 
it's not test-related.

Thanks,
Matheus K. Ferst
Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/>
Analista de Software
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
diff mbox series

Patch

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 8f970288f5..c973968ed6 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -1916,22 +1916,19 @@  void helper_xsdivqp(CPUPPCState *env, uint32_t opcode,
  *   fld   - vsr_t field (VsrD(*) or VsrW(*))
  *   sfprf - set FPRF
  */
-#define VSX_RE(op, nels, tp, fld, sfprf, r2sp)                                \
+#define VSX_RE(op, nels, tp, op_tp, fld, sfprf)                               \
 void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)              \
 {                                                                             \
     ppc_vsr_t t = { };                                                        \
-    int i;                                                                    \
+    int i, flags;                                                             \
                                                                               \
     helper_reset_fpstatus(env);                                               \
                                                                               \
     for (i = 0; i < nels; i++) {                                              \
-        if (unlikely(tp##_is_signaling_nan(xb->fld, &env->fp_status))) {      \
+        t.fld = op_tp##_div(tp##_one, xb->fld, &env->fp_status);              \
+        flags = get_float_exception_flags(&env->fp_status);                   \
+        if (unlikely(flags & float_flag_invalid_snan)) {                      \
             float_invalid_op_vxsnan(env, GETPC());                            \
-        }                                                                     \
-        t.fld = tp##_div(tp##_one, xb->fld, &env->fp_status);                 \
-                                                                              \
-        if (r2sp) {                                                           \
-            t.fld = do_frsp(env, t.fld, GETPC());                             \
         }                                                                     \
                                                                               \
         if (sfprf) {                                                          \
@@ -1943,10 +1940,10 @@  void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)              \
     do_float_check_status(env, GETPC());                                      \
 }
 
-VSX_RE(xsredp, 1, float64, VsrD(0), 1, 0)
-VSX_RE(xsresp, 1, float64, VsrD(0), 1, 1)
-VSX_RE(xvredp, 2, float64, VsrD(i), 0, 0)
-VSX_RE(xvresp, 4, float32, VsrW(i), 0, 0)
+VSX_RE(xsredp, 1, float64, float64, VsrD(0), 1)
+VSX_RE(xsresp, 1, float64, float64r32, VsrD(0), 1)
+VSX_RE(xvredp, 2, float64, float64, VsrD(i), 0)
+VSX_RE(xvresp, 4, float32, float32, VsrW(i), 0)
 
 /*
  * VSX_SQRT - VSX floating point square root
@@ -1998,10 +1995,11 @@  VSX_SQRT(xvsqrtsp, 4, float32, VsrW(i), 0, 0)
  *   op    - instruction mnemonic
  *   nels  - number of elements (1, 2 or 4)
  *   tp    - type (float32 or float64)
+ *   op_tp - operation type
  *   fld   - vsr_t field (VsrD(*) or VsrW(*))
  *   sfprf - set FPRF
  */
-#define VSX_RSQRTE(op, nels, tp, fld, sfprf, r2sp)                           \
+#define VSX_RSQRTE(op, nels, tp, op_tp, fld, sfprf)                          \
 void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)             \
 {                                                                            \
     ppc_vsr_t t = { };                                                       \
@@ -2012,15 +2010,12 @@  void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)             \
     for (i = 0; i < nels; i++) {                                             \
         float_status tstat = env->fp_status;                                 \
         set_float_exception_flags(0, &tstat);                                \
-        t.fld = tp##_sqrt(xb->fld, &tstat);                                  \
-        t.fld = tp##_div(tp##_one, t.fld, &tstat);                           \
+        t.fld = op_tp##_sqrt(xb->fld, &tstat);                               \
+        t.fld = op_tp##_div(tp##_one, t.fld, &tstat);                        \
         env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
         if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {    \
             float_invalid_op_sqrt(env, tstat.float_exception_flags,          \
                                   sfprf, GETPC());                           \
-        }                                                                    \
-        if (r2sp) {                                                          \
-            t.fld = do_frsp(env, t.fld, GETPC());                            \
         }                                                                    \
                                                                              \
         if (sfprf) {                                                         \
@@ -2032,10 +2027,10 @@  void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)             \
     do_float_check_status(env, GETPC());                                     \
 }
 
-VSX_RSQRTE(xsrsqrtedp, 1, float64, VsrD(0), 1, 0)
-VSX_RSQRTE(xsrsqrtesp, 1, float64, VsrD(0), 1, 1)
-VSX_RSQRTE(xvrsqrtedp, 2, float64, VsrD(i), 0, 0)
-VSX_RSQRTE(xvrsqrtesp, 4, float32, VsrW(i), 0, 0)
+VSX_RSQRTE(xsrsqrtedp, 1, float64, float64, VsrD(0), 1)
+VSX_RSQRTE(xsrsqrtesp, 1, float64, float64r32, VsrD(0), 1)
+VSX_RSQRTE(xvrsqrtedp, 2, float64, float64, VsrD(i), 0)
+VSX_RSQRTE(xvrsqrtesp, 4, float32, float32, VsrW(i), 0)
 
 /*
  * VSX_TDIV - VSX floating point test for divide
@@ -2156,9 +2151,8 @@  VSX_TSQRT(xvtsqrtsp, 4, float32, VsrW(i), -126, 23)
  *   maddflgs - flags for the float*muladd routine that control the
  *           various forms (madd, msub, nmadd, nmsub)
  *   sfprf - set FPRF
- *   r2sp  - round intermediate double precision result to single precision
  */
-#define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf, r2sp)                    \
+#define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf)                          \
 void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,                             \
                  ppc_vsr_t *s1, ppc_vsr_t *s2, ppc_vsr_t *s3)                 \
 {                                                                             \
@@ -2170,20 +2164,7 @@  void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,                             \
     for (i = 0; i < nels; i++) {                                              \
         float_status tstat = env->fp_status;                                  \
         set_float_exception_flags(0, &tstat);                                 \
-        if (r2sp && (tstat.float_rounding_mode == float_round_nearest_even)) {\
-            /*                                                                \
-             * Avoid double rounding errors by rounding the intermediate      \
-             * result to odd.                                                 \
-             */                                                               \
-            set_float_rounding_mode(float_round_to_zero, &tstat);             \
-            t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld,                    \
-                                maddflgs, &tstat);                            \
-            t.fld |= (get_float_exception_flags(&tstat) &                     \
-                      float_flag_inexact) != 0;                               \
-        } else {                                                              \
-            t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld,                    \
-                                maddflgs, &tstat);                            \
-        }                                                                     \
+        t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, maddflgs, &tstat);     \
         env->fp_status.float_exception_flags |= tstat.float_exception_flags;  \
                                                                               \
         if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {     \
@@ -2191,10 +2172,6 @@  void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,                             \
                                   sfprf, GETPC());                            \
         }                                                                     \
                                                                               \
-        if (r2sp) {                                                           \
-            t.fld = do_frsp(env, t.fld, GETPC());                             \
-        }                                                                     \
-                                                                              \
         if (sfprf) {                                                          \
             helper_compute_fprf_float64(env, t.fld);                          \
         }                                                                     \
@@ -2203,24 +2180,24 @@  void helper_##op(CPUPPCState *env, ppc_vsr_t *xt,                             \
     do_float_check_status(env, GETPC());                                      \
 }
 
-VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1, 0)
-VSX_MADD(XSMSUBDP, 1, float64, VsrD(0), MSUB_FLGS, 1, 0)
-VSX_MADD(XSNMADDDP, 1, float64, VsrD(0), NMADD_FLGS, 1, 0)
-VSX_MADD(XSNMSUBDP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 0)
-VSX_MADD(XSMADDSP, 1, float64, VsrD(0), MADD_FLGS, 1, 1)
-VSX_MADD(XSMSUBSP, 1, float64, VsrD(0), MSUB_FLGS, 1, 1)
-VSX_MADD(XSNMADDSP, 1, float64, VsrD(0), NMADD_FLGS, 1, 1)
-VSX_MADD(XSNMSUBSP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 1)
+VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1)
+VSX_MADD(XSMSUBDP, 1, float64, VsrD(0), MSUB_FLGS, 1)
+VSX_MADD(XSNMADDDP, 1, float64, VsrD(0), NMADD_FLGS, 1)
+VSX_MADD(XSNMSUBDP, 1, float64, VsrD(0), NMSUB_FLGS, 1)
+VSX_MADD(XSMADDSP, 1, float64r32, VsrD(0), MADD_FLGS, 1)
+VSX_MADD(XSMSUBSP, 1, float64r32, VsrD(0), MSUB_FLGS, 1)
+VSX_MADD(XSNMADDSP, 1, float64r32, VsrD(0), NMADD_FLGS, 1)
+VSX_MADD(XSNMSUBSP, 1, float64r32, VsrD(0), NMSUB_FLGS, 1)
 
-VSX_MADD(xvmadddp, 2, float64, VsrD(i), MADD_FLGS, 0, 0)
-VSX_MADD(xvmsubdp, 2, float64, VsrD(i), MSUB_FLGS, 0, 0)
-VSX_MADD(xvnmadddp, 2, float64, VsrD(i), NMADD_FLGS, 0, 0)
-VSX_MADD(xvnmsubdp, 2, float64, VsrD(i), NMSUB_FLGS, 0, 0)
+VSX_MADD(xvmadddp, 2, float64, VsrD(i), MADD_FLGS, 0)
+VSX_MADD(xvmsubdp, 2, float64, VsrD(i), MSUB_FLGS, 0)
+VSX_MADD(xvnmadddp, 2, float64, VsrD(i), NMADD_FLGS, 0)
+VSX_MADD(xvnmsubdp, 2, float64, VsrD(i), NMSUB_FLGS, 0)
 
-VSX_MADD(xvmaddsp, 4, float32, VsrW(i), MADD_FLGS, 0, 0)
-VSX_MADD(xvmsubsp, 4, float32, VsrW(i), MSUB_FLGS, 0, 0)
-VSX_MADD(xvnmaddsp, 4, float32, VsrW(i), NMADD_FLGS, 0, 0)
-VSX_MADD(xvnmsubsp, 4, float32, VsrW(i), NMSUB_FLGS, 0, 0)
+VSX_MADD(xvmaddsp, 4, float32, VsrW(i), MADD_FLGS, 0)
+VSX_MADD(xvmsubsp, 4, float32, VsrW(i), MSUB_FLGS, 0)
+VSX_MADD(xvnmaddsp, 4, float32, VsrW(i), NMADD_FLGS, 0)
+VSX_MADD(xvnmsubsp, 4, float32, VsrW(i), NMSUB_FLGS, 0)
 
 /*
  * VSX_MADDQ - VSX floating point quad-precision muliply/add