diff mbox series

[05/10] target/ppc: Implemented vector divide extended word

Message ID 20220330202515.66554-6-lucas.araujo@eldorado.org.br (mailing list archive)
State New, archived
Headers show
Series VDIV/VMOD Implementation | expand

Commit Message

Lucas Mateus Martins Araujo e Castro March 30, 2022, 8:25 p.m. UTC
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
vdivesw: Vector Divide Extended Signed Word
vdiveuw: Vector Divide Extended Unsigned Word
Undefined behavior based on mambo.

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
 target/ppc/insn32.decode            |  3 ++
 target/ppc/translate/vmx-impl.c.inc | 65 +++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

Comments

Richard Henderson March 30, 2022, 9:24 p.m. UTC | #1
On 3/30/22 14:25, Lucas Mateus Castro(alqotel) wrote:
> +static void do_vx_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
> +{
> +    TCGv_i64 res, val1, val2;
> +    TCGv_i64 zero = tcg_constant_i64(0);
> +    TCGv_i64 one =  tcg_constant_i64(1);
> +
> +    res = tcg_temp_new_i64();
> +    val1 = tcg_temp_new_i64();
> +    val2 = tcg_temp_new_i64();
> +
> +    tcg_gen_ext_i32_i64(val1, a);
> +    tcg_gen_ext_i32_i64(val2, b);
> +
> +    /* return 0 if b = 0, so make b = 1 so the result doesn't fit in 32 bits*/
> +    tcg_gen_movcond_i64(TCG_COND_EQ, val2, val2, zero, one, val2);

Need int_min / -1 check.

> +    /* (a << 32)/b */
> +    tcg_gen_shli_i64(val1, val1, 32);
> +    tcg_gen_div_i64(res, val1, val2);
> +
> +    tcg_gen_ext32s_i64(val1, res);
> +    /* if result is undefined (quotient doesn't fit in 32 bits) return 0 */
> +    tcg_gen_movcond_i64(TCG_COND_EQ, res, res, val1, res, zero);

Again, I don't see the point in producing 0 for undefined.

> +    tcg_gen_ext_i32_i64(val1, a);
> +    tcg_gen_extu_i32_i64(val2, b);

Better with extu for val1, just because a is logically unsigned.


r~
diff mbox series

Patch

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 3a88a0b5bc..8c115c9c60 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -712,3 +712,6 @@  VDIVSD          000100 ..... ..... ..... 00111001011    @VX
 VDIVUD          000100 ..... ..... ..... 00011001011    @VX
 VDIVSQ          000100 ..... ..... ..... 00100001011    @VX
 VDIVUQ          000100 ..... ..... ..... 00000001011    @VX
+
+VDIVESW         000100 ..... ..... ..... 01110001011    @VX
+VDIVEUW         000100 ..... ..... ..... 01010001011    @VX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 949e47be1c..752f3af659 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3287,6 +3287,71 @@  TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_vx_divu_i64)
 TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ)
 TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ)
 
+static void do_vx_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i64 res, val1, val2;
+    TCGv_i64 zero = tcg_constant_i64(0);
+    TCGv_i64 one =  tcg_constant_i64(1);
+
+    res = tcg_temp_new_i64();
+    val1 = tcg_temp_new_i64();
+    val2 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(val1, a);
+    tcg_gen_ext_i32_i64(val2, b);
+
+    /* return 0 if b = 0, so make b = 1 so the result doesn't fit in 32 bits*/
+    tcg_gen_movcond_i64(TCG_COND_EQ, val2, val2, zero, one, val2);
+
+    /* (a << 32)/b */
+    tcg_gen_shli_i64(val1, val1, 32);
+    tcg_gen_div_i64(res, val1, val2);
+
+    tcg_gen_ext32s_i64(val1, res);
+    /* if result is undefined (quotient doesn't fit in 32 bits) return 0 */
+    tcg_gen_movcond_i64(TCG_COND_EQ, res, res, val1, res, zero);
+    tcg_gen_extrl_i64_i32(t, res);
+
+    tcg_temp_free_i64(res);
+    tcg_temp_free_i64(val1);
+    tcg_temp_free_i64(val2);
+}
+
+static void do_vx_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i64 val1, val2;
+    TCGv_i32 h, l;
+    TCGv_i32 zero = tcg_constant_i32(0);
+    TCGv_i32 one =  tcg_constant_i32(1);
+
+    val1 = tcg_temp_new_i64();
+    val2 = tcg_temp_new_i64();
+    h = tcg_temp_new_i32();
+    l = tcg_temp_new_i32();
+
+    /* return 0 if b = 0, so make b = 1 so the result doesn't fit in 32 bits*/
+    tcg_gen_movcond_i32(TCG_COND_EQ, b, b, zero, one, b);
+
+    tcg_gen_ext_i32_i64(val1, a);
+    tcg_gen_extu_i32_i64(val2, b);
+
+    /* (a << 32)/b */
+    tcg_gen_shli_i64(val1, val1, 32);
+    tcg_gen_divu_i64(val1, val1, val2);
+
+    tcg_gen_extrh_i64_i32(h, val1);
+    tcg_gen_extrl_i64_i32(l, val1);
+    /* if result is undefined (quotient doesn't fit in 32 bits) return 0 */
+    tcg_gen_movcond_i32(TCG_COND_EQ, t, h, zero, l, zero);
+    tcg_temp_free_i32(h);
+    tcg_temp_free_i32(l);
+    tcg_temp_free_i64(val1);
+    tcg_temp_free_i64(val2);
+}
+
+TRANS_VDIV_VMOD(ISA310, VDIVESW, MO_32, do_vx_dives_i32, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVEUW, MO_32, do_vx_diveu_i32, NULL)
+
 #undef GEN_VR_LDX
 #undef GEN_VR_STX
 #undef GEN_VR_LVE