diff mbox series

[03/10] target/ppc: Implemented vector divide instructions

Message ID 20220330202515.66554-4-lucas.araujo@eldorado.org.br (mailing list archive)
State New, archived
Headers show
Series VDIV/VMOD Implementation | expand

Commit Message

Lucas Mateus Martins Araujo e Castro March 30, 2022, 8:25 p.m. UTC
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
vdivsw: Vector Divide Signed Word
vdivuw: Vector Divide Unsigned Word 
vdivsd: Vector Divide Signed Doubleword
vdivud: Vector Divide Unsigned Doubleword

Hardware behavior based on mambo

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
 target/ppc/insn32.decode            |  7 +++++
 target/ppc/translate/vmx-impl.c.inc | 49 +++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

Comments

Richard Henderson March 30, 2022, 9:06 p.m. UTC | #1
On 3/30/22 14:25, Lucas Mateus Castro(alqotel) wrote:
> +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC)        \
> +static bool trans_##NAME(DisasContext *ctx, arg_VX *a)                  \
> +{                                                                       \
> +    static const GVecGen3 op[2] = {                                     \
> +        {                                                               \
> +            .fni4 = FNI4_FUNC,                                          \
> +            .fni8 = FNI8_FUNC,                                          \
> +            .vece = MO_32                                               \
> +        },                                                              \
> +        {                                                               \
> +            .fni4 = FNI4_FUNC,                                          \
> +            .fni8 = FNI8_FUNC,                                          \
> +            .vece = MO_64                                               \
> +        },                                                              \
> +    };                                                                  \

There is zero point in having a two element array here:
(1) VECE is a constant
(2) The unused array element is actively wrong.

> +#define DIV_VEC(NAME, SZ, DIV)                                          \
> +static void do_vx_##NAME(TCGv_##SZ t, TCGv_##SZ a, TCGv_##SZ b)         \
> +{                                                                       \
> +    TCGv_##SZ zero = tcg_constant_##SZ(0), one = tcg_constant_##SZ(1);  \
> +    /*                                                                  \
> +     *  If N/0 the instruction used by the backend might deliver        \
> +     *  a signal to the process and the hardware returns 0 when         \
> +     *  N/0, so if b = 0 return 0/1                                     \
> +     */                                                                 \
> +    tcg_gen_movcond_##SZ(TCG_COND_EQ, a, b, zero, zero, a);             \
> +    tcg_gen_movcond_##SZ(TCG_COND_EQ, b, b, zero, one, b);              \
> +    DIV(t, a, b);                                                       \
> +}

The manual says N/0 = undefined.  I don't think it's important to require 0.

The signed versions still need to check for int_min / -1, which will fault on x86. 
Compare vs gen_op_arith_div{w,d}.


r~
Lucas Mateus Martins Araujo e Castro March 31, 2022, 6:28 p.m. UTC | #2
On 30/03/2022 18:06, Richard Henderson wrote:
>
> On 3/30/22 14:25, Lucas Mateus Castro(alqotel) wrote:
>> +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, 
>> FNI8_FUNC)        \
>> +static bool trans_##NAME(DisasContext *ctx, arg_VX 
>> *a)                  \
>> +{ \
>> +    static const GVecGen3 op[2] = 
>> {                                     \
>> + { \
>> +            .fni4 = 
>> FNI4_FUNC,                                          \
>> +            .fni8 = 
>> FNI8_FUNC,                                          \
>> +            .vece = 
>> MO_32                                               \
>> + }, \
>> + { \
>> +            .fni4 = 
>> FNI4_FUNC,                                          \
>> +            .fni8 = 
>> FNI8_FUNC,                                          \
>> +            .vece = 
>> MO_64                                               \
>> + }, \
>> + }; \
>
> There is zero point in having a two element array here:
> (1) VECE is a constant
> (2) The unused array element is actively wrong.
Ok, I'll set VECE based on which function is NULL
>
>> +#define DIV_VEC(NAME, SZ, 
>> DIV)                                          \
>> +static void do_vx_##NAME(TCGv_##SZ t, TCGv_##SZ a, TCGv_##SZ 
>> b)         \
>> +{ \
>> +    TCGv_##SZ zero = tcg_constant_##SZ(0), one = 
>> tcg_constant_##SZ(1);  \
>> + /* \
>> +     *  If N/0 the instruction used by the backend might 
>> deliver        \
>> +     *  a signal to the process and the hardware returns 0 
>> when         \
>> +     *  N/0, so if b = 0 return 
>> 0/1                                     \
>> + */ \
>> +    tcg_gen_movcond_##SZ(TCG_COND_EQ, a, b, zero, zero, 
>> a);             \
>> +    tcg_gen_movcond_##SZ(TCG_COND_EQ, b, b, zero, one, 
>> b);              \
>> +    DIV(t, a, 
>> b);                                                       \
>> +}
>
> The manual says N/0 = undefined.  I don't think it's important to 
> require 0.
My idea here was mostly to mimic the hardware behavior, testing on a 
Power9 both divw and divd result in 0 when N/0 and mambo results in 0 in 
vdiv* and vmod* when N/0, but yeah the PowerISA just said that it's 
undefined. I'll just set b = 1 if N/0 or int_min/-1 in v2 then.
>
> The signed versions still need to check for int_min / -1, which will 
> fault on x86.
> Compare vs gen_op_arith_div{w,d}.
My mistake, I'll add this check in v2
>
>
> r~
diff mbox series

Patch

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index ac2d3da9a7..597768558b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -703,3 +703,10 @@  XVTLSBB         111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb
 &XL_s           s:uint8_t
 @XL_s           ......-------------- s:1 .......... -   &XL_s
 RFEBB           010011-------------- .   0010010010 -   @XL_s
+
+## Vector Division Instructions
+
+VDIVSW          000100 ..... ..... ..... 00110001011    @VX
+VDIVUW          000100 ..... ..... ..... 00010001011    @VX
+VDIVSD          000100 ..... ..... ..... 00111001011    @VX
+VDIVUD          000100 ..... ..... ..... 00011001011    @VX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 6101bca3fd..d96e804abb 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3236,6 +3236,55 @@  TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
 TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
 TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
 
+#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC)        \
+static bool trans_##NAME(DisasContext *ctx, arg_VX *a)                  \
+{                                                                       \
+    static const GVecGen3 op[2] = {                                     \
+        {                                                               \
+            .fni4 = FNI4_FUNC,                                          \
+            .fni8 = FNI8_FUNC,                                          \
+            .vece = MO_32                                               \
+        },                                                              \
+        {                                                               \
+            .fni4 = FNI4_FUNC,                                          \
+            .fni8 = FNI8_FUNC,                                          \
+            .vece = MO_64                                               \
+        },                                                              \
+    };                                                                  \
+                                                                        \
+    REQUIRE_VECTOR(ctx);                                                \
+    REQUIRE_INSNS_FLAGS2(ctx, FLAGS);                                   \
+                                                                        \
+    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),    \
+                   avr_full_offset(a->vrb), 16, 16, &op[VECE - MO_32]); \
+                                                                        \
+    return true;                                                        \
+}
+
+#define DIV_VEC(NAME, SZ, DIV)                                          \
+static void do_vx_##NAME(TCGv_##SZ t, TCGv_##SZ a, TCGv_##SZ b)         \
+{                                                                       \
+    TCGv_##SZ zero = tcg_constant_##SZ(0), one = tcg_constant_##SZ(1);  \
+    /*                                                                  \
+     *  If N/0 the instruction used by the backend might deliver        \
+     *  a signal to the process and the hardware returns 0 when         \
+     *  N/0, so if b = 0 return 0/1                                     \
+     */                                                                 \
+    tcg_gen_movcond_##SZ(TCG_COND_EQ, a, b, zero, zero, a);             \
+    tcg_gen_movcond_##SZ(TCG_COND_EQ, b, b, zero, one, b);              \
+    DIV(t, a, b);                                                       \
+}
+
+DIV_VEC(div_i32 , i32, tcg_gen_div_i32)
+DIV_VEC(divu_i32, i32, tcg_gen_divu_i32)
+DIV_VEC(div_i64 , i64, tcg_gen_div_i64)
+DIV_VEC(divu_i64, i64, tcg_gen_divu_i64)
+
+TRANS_VDIV_VMOD(ISA310, VDIVSW, MO_32, do_vx_div_i32 , NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVUW, MO_32, do_vx_divu_i32, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVSD, MO_64, NULL, do_vx_div_i64)
+TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_vx_divu_i64)
+
 #undef GEN_VR_LDX
 #undef GEN_VR_STX
 #undef GEN_VR_LVE