Message ID | 20220405195558.66144-3-lucas.araujo@eldorado.org.br (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | VDIV/VMOD Implementation | expand |
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote: > From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br> > > Implement the following PowerISA v3.1 instructions: > vdivsw: Vector Divide Signed Word > vdivuw: Vector Divide Unsigned Word > vdivsd: Vector Divide Signed Doubleword > vdivud: Vector Divide Unsigned Doubleword > > Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br> > --- > target/ppc/insn32.decode | 7 ++++ > target/ppc/translate/vmx-impl.c.inc | 59 +++++++++++++++++++++++++++++ > 2 files changed, 66 insertions(+) > > diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode > index ac2d3da9a7..597768558b 100644 > --- a/target/ppc/insn32.decode > +++ b/target/ppc/insn32.decode > @@ -703,3 +703,10 @@ XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb > &XL_s s:uint8_t > @XL_s ......-------------- s:1 .......... - &XL_s > RFEBB 010011-------------- . 0010010010 - @XL_s > + > +## Vector Division Instructions > + > +VDIVSW 000100 ..... ..... ..... 00110001011 @VX > +VDIVUW 000100 ..... ..... ..... 00010001011 @VX > +VDIVSD 000100 ..... ..... ..... 00111001011 @VX > +VDIVUD 000100 ..... ..... ..... 00011001011 @VX > diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc > index 6101bca3fd..be35d6fdf3 100644 > --- a/target/ppc/translate/vmx-impl.c.inc > +++ b/target/ppc/translate/vmx-impl.c.inc > @@ -3236,6 +3236,65 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64) > TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64) > TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64) > > +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC) \ > +static bool trans_##NAME(DisasContext *ctx, arg_VX *a) \ > +{ \ > + static const GVecGen3 op = { \ > + .fni4 = FNI4_FUNC, \ > + .fni8 = FNI8_FUNC, \ > + .vece = VECE \ > + }; \ > + \ > + REQUIRE_VECTOR(ctx); \ > + REQUIRE_INSNS_FLAGS2(ctx, FLAGS); \ > + \ > + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), \ > + avr_full_offset(a->vrb), 16, 16, &op); \ > + \ > + return true; \ > +} Better to use a standalone helper and TRANS() -- the op structure doesn't *need* to be static const. > + > +#define DO_VDIV_VMOD(NAME, SZ, DIV, SIGNED) \ > +static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ b) \ > +{ \ > + /* \ > + * If N/0 the instruction used by the backend might deliver \ > + * an invalid division signal to the process, so if b = 0 return \ > + * N/1 and if signed instruction, the same for a = int_min, b = -1 \ > + */ \ > + if (SIGNED) { \ > + TCGv_i##SZ t0 = tcg_temp_new_i##SZ(); \ > + TCGv_i##SZ t1 = tcg_temp_new_i##SZ(); \ > + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, INT##SZ##_MIN); \ > + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, -1); \ > + tcg_gen_and_i##SZ(t0, t0, t1); \ > + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 0); \ > + tcg_gen_or_i##SZ(t0, t0, t1); \ > + tcg_gen_movi_i##SZ(t1, 0); \ > + tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, b); \ > + DIV(t, a, b); \ > + tcg_temp_free_i##SZ(t0); \ > + tcg_temp_free_i##SZ(t1); \ > + } else { \ > + TCGv_i##SZ zero = tcg_constant_i##SZ(0); \ > + TCGv_i##SZ one = tcg_constant_i##SZ(1); \ > + tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, b); \ > + DIV(t, a, b); \ > + } \ > +} This is overkill. Even if you keep some macros, passing in SIGNED and using it in the outermost if is a sign you should split the macro in two. However, only tcg_gen_div_i64 really requires the full signed treatment; tcg_gen_div_i32 can be better handled by extending to i64, because INT32_MIN / -1ULL does not trap. I think this would be much easier to read as 4 separate functions. r~
On 11/04/2022 22:51, Richard Henderson wrote: > > On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote: > >> + >> +#define DO_VDIV_VMOD(NAME, SZ, DIV, >> SIGNED) \ >> +static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ >> b) \ >> +{ \ >> + /* \ >> + * If N/0 the instruction used by the backend might >> deliver \ >> + * an invalid division signal to the process, so if b = 0 >> return \ >> + * N/1 and if signed instruction, the same for a = int_min, b = >> -1 \ >> + */ \ >> + if (SIGNED) >> { \ >> + TCGv_i##SZ t0 = >> tcg_temp_new_i##SZ(); \ >> + TCGv_i##SZ t1 = >> tcg_temp_new_i##SZ(); \ >> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, >> INT##SZ##_MIN); \ >> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, >> -1); \ >> + tcg_gen_and_i##SZ(t0, t0, >> t1); \ >> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, >> 0); \ >> + tcg_gen_or_i##SZ(t0, t0, >> t1); \ >> + tcg_gen_movi_i##SZ(t1, >> 0); \ >> + tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, >> b); \ >> + DIV(t, a, >> b); \ >> + tcg_temp_free_i##SZ(t0); \ >> + tcg_temp_free_i##SZ(t1); \ >> + } else >> { \ >> + TCGv_i##SZ zero = >> tcg_constant_i##SZ(0); \ >> + TCGv_i##SZ one = >> tcg_constant_i##SZ(1); \ >> + tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, >> b); \ >> + DIV(t, a, >> b); \ >> + } \ >> +} > > This is overkill. Even if you keep some macros, passing in SIGNED and > using it in the > outermost if is a sign you should split the macro in two. > > However, only tcg_gen_div_i64 really requires the full signed > treatment; tcg_gen_div_i32 > can be better handled by extending to i64, because INT32_MIN / -1ULL > does not trap. > > I think this would be much easier to read as 4 separate functions. > > Ok, I'll change it to 4 different macros, move clz128 to int128.h and turn TRANS_VDIV_VMOD into do_vdiv_vmod function and call it with TRANS() in v3 > r~
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index ac2d3da9a7..597768558b 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -703,3 +703,10 @@ XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb &XL_s s:uint8_t @XL_s ......-------------- s:1 .......... - &XL_s RFEBB 010011-------------- . 0010010010 - @XL_s + +## Vector Division Instructions + +VDIVSW 000100 ..... ..... ..... 00110001011 @VX +VDIVUW 000100 ..... ..... ..... 00010001011 @VX +VDIVSD 000100 ..... ..... ..... 00111001011 @VX +VDIVUD 000100 ..... ..... ..... 00011001011 @VX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 6101bca3fd..be35d6fdf3 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3236,6 +3236,65 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64) TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64) TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64) +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC) \ +static bool trans_##NAME(DisasContext *ctx, arg_VX *a) \ +{ \ + static const GVecGen3 op = { \ + .fni4 = FNI4_FUNC, \ + .fni8 = FNI8_FUNC, \ + .vece = VECE \ + }; \ + \ + REQUIRE_VECTOR(ctx); \ + REQUIRE_INSNS_FLAGS2(ctx, FLAGS); \ + \ + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), \ + avr_full_offset(a->vrb), 16, 16, &op); \ + \ + return true; \ +} + +#define DO_VDIV_VMOD(NAME, SZ, DIV, SIGNED) \ +static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ b) \ +{ \ + /* \ + * If N/0 the instruction used by the backend might deliver \ + * an invalid division signal to the process, so if b = 0 return \ + * N/1 and if signed instruction, the same for a = int_min, b = -1 \ + */ \ + if (SIGNED) { \ + TCGv_i##SZ t0 = tcg_temp_new_i##SZ(); \ + TCGv_i##SZ t1 = tcg_temp_new_i##SZ(); \ + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, INT##SZ##_MIN); \ + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, -1); \ + tcg_gen_and_i##SZ(t0, t0, t1); \ + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 0); \ + tcg_gen_or_i##SZ(t0, t0, t1); \ + tcg_gen_movi_i##SZ(t1, 0); \ + tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, b); \ + DIV(t, a, b); \ + tcg_temp_free_i##SZ(t0); \ + tcg_temp_free_i##SZ(t1); \ + } else { \ + TCGv_i##SZ zero = tcg_constant_i##SZ(0); \ + TCGv_i##SZ one = tcg_constant_i##SZ(1); \ + tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, b); \ + DIV(t, a, b); \ + } \ +} + +DO_VDIV_VMOD(do_divsw, 32, tcg_gen_div_i32, true) +DO_VDIV_VMOD(do_divuw, 32, tcg_gen_divu_i32, false) +DO_VDIV_VMOD(do_divsd, 64, tcg_gen_div_i64, true) +DO_VDIV_VMOD(do_divud, 64, tcg_gen_divu_i64, false) + +TRANS_VDIV_VMOD(ISA310, VDIVSW, MO_32, do_divsw, NULL) +TRANS_VDIV_VMOD(ISA310, VDIVUW, MO_32, do_divuw, NULL) +TRANS_VDIV_VMOD(ISA310, VDIVSD, MO_64, NULL, do_divsd) +TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud) + +#undef DO_VDIV_VMOD + #undef GEN_VR_LDX #undef GEN_VR_STX #undef GEN_VR_LVE