Message ID | 20220405195558.66144-5-lucas.araujo@eldorado.org.br (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | VDIV/VMOD Implementation | expand |
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote: > From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br> > > Implement the following PowerISA v3.1 instructions: > vdivesw: Vector Divide Extended Signed Word > vdiveuw: Vector Divide Extended Unsigned Word > > Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br> > --- > target/ppc/insn32.decode | 3 ++ > target/ppc/translate/vmx-impl.c.inc | 48 +++++++++++++++++++++++++++++ > 2 files changed, 51 insertions(+) > > diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode > index 3a88a0b5bc..8c115c9c60 100644 > --- a/target/ppc/insn32.decode > +++ b/target/ppc/insn32.decode > @@ -712,3 +712,6 @@ VDIVSD 000100 ..... ..... ..... 00111001011 @VX > VDIVUD 000100 ..... ..... ..... 00011001011 @VX > VDIVSQ 000100 ..... ..... ..... 00100001011 @VX > VDIVUQ 000100 ..... ..... ..... 00000001011 @VX > + > +VDIVESW 000100 ..... ..... ..... 01110001011 @VX > +VDIVEUW 000100 ..... ..... ..... 01010001011 @VX > diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc > index bac0db7128..8799e945bd 100644 > --- a/target/ppc/translate/vmx-impl.c.inc > +++ b/target/ppc/translate/vmx-impl.c.inc > @@ -3295,6 +3295,54 @@ TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud) > TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ) > TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ) > > +static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) > +{ > + TCGv_i64 val1, val2; > + > + val1 = tcg_temp_new_i64(); > + val2 = tcg_temp_new_i64(); > + > + tcg_gen_ext_i32_i64(val1, a); > + tcg_gen_ext_i32_i64(val2, b); > + > + /* (a << 32)/b */ > + tcg_gen_shli_i64(val1, val1, 32); > + tcg_gen_div_i64(val1, val1, val2); > + > + /* if quotient doesn't fit in 32 bits the result is undefined */ > + tcg_gen_extrl_i64_i32(t, val1); > + > + tcg_temp_free_i64(val1); > + tcg_temp_free_i64(val2); > +} > + > +static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) > +{ > + TCGv_i64 val1, val2; > + > + val1 = tcg_temp_new_i64(); > + val2 = tcg_temp_new_i64(); > + > + tcg_gen_extu_i32_i64(val1, a); > + tcg_gen_extu_i32_i64(val2, b); > + > + /* (a << 32)/b */ > + tcg_gen_shli_i64(val1, val1, 32); > + tcg_gen_divu_i64(val1, val1, val2); > + > + /* if quotient doesn't fit in 32 bits the result is undefined */ > + tcg_gen_extrl_i64_i32(t, val1); > + > + tcg_temp_free_i64(val1); > + tcg_temp_free_i64(val2); > +} > + > +DO_VDIV_VMOD(do_divesw, 32, do_dives_i32, true) > +DO_VDIV_VMOD(do_diveuw, 32, do_diveu_i32, false) Oh, I see, you do have one more use for the full min/-1 treatment. It would still be nice if the macro were minimal and you use a callback. r~
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 3a88a0b5bc..8c115c9c60 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -712,3 +712,6 @@ VDIVSD 000100 ..... ..... ..... 00111001011 @VX VDIVUD 000100 ..... ..... ..... 00011001011 @VX VDIVSQ 000100 ..... ..... ..... 00100001011 @VX VDIVUQ 000100 ..... ..... ..... 00000001011 @VX + +VDIVESW 000100 ..... ..... ..... 01110001011 @VX +VDIVEUW 000100 ..... ..... ..... 01010001011 @VX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index bac0db7128..8799e945bd 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3295,6 +3295,54 @@ TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud) TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ) TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ) +static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 val1, val2; + + val1 = tcg_temp_new_i64(); + val2 = tcg_temp_new_i64(); + + tcg_gen_ext_i32_i64(val1, a); + tcg_gen_ext_i32_i64(val2, b); + + /* (a << 32)/b */ + tcg_gen_shli_i64(val1, val1, 32); + tcg_gen_div_i64(val1, val1, val2); + + /* if quotient doesn't fit in 32 bits the result is undefined */ + tcg_gen_extrl_i64_i32(t, val1); + + tcg_temp_free_i64(val1); + tcg_temp_free_i64(val2); +} + +static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 val1, val2; + + val1 = tcg_temp_new_i64(); + val2 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(val1, a); + tcg_gen_extu_i32_i64(val2, b); + + /* (a << 32)/b */ + tcg_gen_shli_i64(val1, val1, 32); + tcg_gen_divu_i64(val1, val1, val2); + + /* if quotient doesn't fit in 32 bits the result is undefined */ + tcg_gen_extrl_i64_i32(t, val1); + + tcg_temp_free_i64(val1); + tcg_temp_free_i64(val2); +} + +DO_VDIV_VMOD(do_divesw, 32, do_dives_i32, true) +DO_VDIV_VMOD(do_diveuw, 32, do_diveu_i32, false) + +TRANS_VDIV_VMOD(ISA310, VDIVESW, MO_32, do_divesw, NULL) +TRANS_VDIV_VMOD(ISA310, VDIVEUW, MO_32, do_diveuw, NULL) + #undef DO_VDIV_VMOD #undef GEN_VR_LDX