diff mbox series

[v2,4/9] target/ppc: Implemented vector divide extended word

Message ID 20220405195558.66144-5-lucas.araujo@eldorado.org.br (mailing list archive)
State New, archived
Headers show
Series VDIV/VMOD Implementation | expand

Commit Message

Lucas Mateus Martins Araujo e Castro April 5, 2022, 7:55 p.m. UTC
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
vdivesw: Vector Divide Extended Signed Word
vdiveuw: Vector Divide Extended Unsigned Word

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
 target/ppc/insn32.decode            |  3 ++
 target/ppc/translate/vmx-impl.c.inc | 48 +++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

Comments

Richard Henderson April 12, 2022, 1:56 a.m. UTC | #1
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
> 
> Implement the following PowerISA v3.1 instructions:
> vdivesw: Vector Divide Extended Signed Word
> vdiveuw: Vector Divide Extended Unsigned Word
> 
> Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
> ---
>   target/ppc/insn32.decode            |  3 ++
>   target/ppc/translate/vmx-impl.c.inc | 48 +++++++++++++++++++++++++++++
>   2 files changed, 51 insertions(+)
> 
> diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
> index 3a88a0b5bc..8c115c9c60 100644
> --- a/target/ppc/insn32.decode
> +++ b/target/ppc/insn32.decode
> @@ -712,3 +712,6 @@ VDIVSD          000100 ..... ..... ..... 00111001011    @VX
>   VDIVUD          000100 ..... ..... ..... 00011001011    @VX
>   VDIVSQ          000100 ..... ..... ..... 00100001011    @VX
>   VDIVUQ          000100 ..... ..... ..... 00000001011    @VX
> +
> +VDIVESW         000100 ..... ..... ..... 01110001011    @VX
> +VDIVEUW         000100 ..... ..... ..... 01010001011    @VX
> diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
> index bac0db7128..8799e945bd 100644
> --- a/target/ppc/translate/vmx-impl.c.inc
> +++ b/target/ppc/translate/vmx-impl.c.inc
> @@ -3295,6 +3295,54 @@ TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud)
>   TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ)
>   TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ)
>   
> +static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
> +{
> +    TCGv_i64 val1, val2;
> +
> +    val1 = tcg_temp_new_i64();
> +    val2 = tcg_temp_new_i64();
> +
> +    tcg_gen_ext_i32_i64(val1, a);
> +    tcg_gen_ext_i32_i64(val2, b);
> +
> +    /* (a << 32)/b */
> +    tcg_gen_shli_i64(val1, val1, 32);
> +    tcg_gen_div_i64(val1, val1, val2);
> +
> +    /* if quotient doesn't fit in 32 bits the result is undefined */
> +    tcg_gen_extrl_i64_i32(t, val1);
> +
> +    tcg_temp_free_i64(val1);
> +    tcg_temp_free_i64(val2);
> +}
> +
> +static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
> +{
> +    TCGv_i64 val1, val2;
> +
> +    val1 = tcg_temp_new_i64();
> +    val2 = tcg_temp_new_i64();
> +
> +    tcg_gen_extu_i32_i64(val1, a);
> +    tcg_gen_extu_i32_i64(val2, b);
> +
> +    /* (a << 32)/b */
> +    tcg_gen_shli_i64(val1, val1, 32);
> +    tcg_gen_divu_i64(val1, val1, val2);
> +
> +    /* if quotient doesn't fit in 32 bits the result is undefined */
> +    tcg_gen_extrl_i64_i32(t, val1);
> +
> +    tcg_temp_free_i64(val1);
> +    tcg_temp_free_i64(val2);
> +}
> +
> +DO_VDIV_VMOD(do_divesw, 32, do_dives_i32, true)
> +DO_VDIV_VMOD(do_diveuw, 32, do_diveu_i32, false)

Oh, I see, you do have one more use for the full min/-1 treatment.
It would still be nice if the macro were minimal and you use a callback.


r~
diff mbox series

Patch

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 3a88a0b5bc..8c115c9c60 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -712,3 +712,6 @@  VDIVSD          000100 ..... ..... ..... 00111001011    @VX
 VDIVUD          000100 ..... ..... ..... 00011001011    @VX
 VDIVSQ          000100 ..... ..... ..... 00100001011    @VX
 VDIVUQ          000100 ..... ..... ..... 00000001011    @VX
+
+VDIVESW         000100 ..... ..... ..... 01110001011    @VX
+VDIVEUW         000100 ..... ..... ..... 01010001011    @VX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index bac0db7128..8799e945bd 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3295,6 +3295,54 @@  TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud)
 TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ)
 TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ)
 
+static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i64 val1, val2;
+
+    val1 = tcg_temp_new_i64();
+    val2 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(val1, a);
+    tcg_gen_ext_i32_i64(val2, b);
+
+    /* (a << 32)/b */
+    tcg_gen_shli_i64(val1, val1, 32);
+    tcg_gen_div_i64(val1, val1, val2);
+
+    /* if quotient doesn't fit in 32 bits the result is undefined */
+    tcg_gen_extrl_i64_i32(t, val1);
+
+    tcg_temp_free_i64(val1);
+    tcg_temp_free_i64(val2);
+}
+
+static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i64 val1, val2;
+
+    val1 = tcg_temp_new_i64();
+    val2 = tcg_temp_new_i64();
+
+    tcg_gen_extu_i32_i64(val1, a);
+    tcg_gen_extu_i32_i64(val2, b);
+
+    /* (a << 32)/b */
+    tcg_gen_shli_i64(val1, val1, 32);
+    tcg_gen_divu_i64(val1, val1, val2);
+
+    /* if quotient doesn't fit in 32 bits the result is undefined */
+    tcg_gen_extrl_i64_i32(t, val1);
+
+    tcg_temp_free_i64(val1);
+    tcg_temp_free_i64(val2);
+}
+
+DO_VDIV_VMOD(do_divesw, 32, do_dives_i32, true)
+DO_VDIV_VMOD(do_diveuw, 32, do_diveu_i32, false)
+
+TRANS_VDIV_VMOD(ISA310, VDIVESW, MO_32, do_divesw, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVEUW, MO_32, do_diveuw, NULL)
+
 #undef DO_VDIV_VMOD
 
 #undef GEN_VR_LDX