Message ID | 20220330202515.66554-4-lucas.araujo@eldorado.org.br (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | VDIV/VMOD Implementation | expand |
On 3/30/22 14:25, Lucas Mateus Castro(alqotel) wrote: > +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC) \ > +static bool trans_##NAME(DisasContext *ctx, arg_VX *a) \ > +{ \ > + static const GVecGen3 op[2] = { \ > + { \ > + .fni4 = FNI4_FUNC, \ > + .fni8 = FNI8_FUNC, \ > + .vece = MO_32 \ > + }, \ > + { \ > + .fni4 = FNI4_FUNC, \ > + .fni8 = FNI8_FUNC, \ > + .vece = MO_64 \ > + }, \ > + }; \ There is zero point in having a two element array here: (1) VECE is a constant (2) The unused array element is actively wrong. > +#define DIV_VEC(NAME, SZ, DIV) \ > +static void do_vx_##NAME(TCGv_##SZ t, TCGv_##SZ a, TCGv_##SZ b) \ > +{ \ > + TCGv_##SZ zero = tcg_constant_##SZ(0), one = tcg_constant_##SZ(1); \ > + /* \ > + * If N/0 the instruction used by the backend might deliver \ > + * a signal to the process and the hardware returns 0 when \ > + * N/0, so if b = 0 return 0/1 \ > + */ \ > + tcg_gen_movcond_##SZ(TCG_COND_EQ, a, b, zero, zero, a); \ > + tcg_gen_movcond_##SZ(TCG_COND_EQ, b, b, zero, one, b); \ > + DIV(t, a, b); \ > +} The manual says N/0 = undefined. I don't think it's important to require 0. The signed versions still need to check for int_min / -1, which will fault on x86. Compare vs gen_op_arith_div{w,d}. r~
On 30/03/2022 18:06, Richard Henderson wrote: > > On 3/30/22 14:25, Lucas Mateus Castro(alqotel) wrote: >> +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, >> FNI8_FUNC) \ >> +static bool trans_##NAME(DisasContext *ctx, arg_VX >> *a) \ >> +{ \ >> + static const GVecGen3 op[2] = >> { \ >> + { \ >> + .fni4 = >> FNI4_FUNC, \ >> + .fni8 = >> FNI8_FUNC, \ >> + .vece = >> MO_32 \ >> + }, \ >> + { \ >> + .fni4 = >> FNI4_FUNC, \ >> + .fni8 = >> FNI8_FUNC, \ >> + .vece = >> MO_64 \ >> + }, \ >> + }; \ > > There is zero point in having a two element array here: > (1) VECE is a constant > (2) The unused array element is actively wrong. Ok, I'll set VECE based on which function is NULL > >> +#define DIV_VEC(NAME, SZ, >> DIV) \ >> +static void do_vx_##NAME(TCGv_##SZ t, TCGv_##SZ a, TCGv_##SZ >> b) \ >> +{ \ >> + TCGv_##SZ zero = tcg_constant_##SZ(0), one = >> tcg_constant_##SZ(1); \ >> + /* \ >> + * If N/0 the instruction used by the backend might >> deliver \ >> + * a signal to the process and the hardware returns 0 >> when \ >> + * N/0, so if b = 0 return >> 0/1 \ >> + */ \ >> + tcg_gen_movcond_##SZ(TCG_COND_EQ, a, b, zero, zero, >> a); \ >> + tcg_gen_movcond_##SZ(TCG_COND_EQ, b, b, zero, one, >> b); \ >> + DIV(t, a, >> b); \ >> +} > > The manual says N/0 = undefined. I don't think it's important to > require 0. My idea here was mostly to mimic the hardware behavior, testing on a Power9 both divw and divd result in 0 when N/0 and mambo results in 0 in vdiv* and vmod* when N/0, but yeah the PowerISA just said that it's undefined. I'll just set b = 1 if N/0 or int_min/-1 in v2 then. > > The signed versions still need to check for int_min / -1, which will > fault on x86. > Compare vs gen_op_arith_div{w,d}. My mistake, I'll add this check in v2 > > > r~
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index ac2d3da9a7..597768558b 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -703,3 +703,10 @@ XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb &XL_s s:uint8_t @XL_s ......-------------- s:1 .......... - &XL_s RFEBB 010011-------------- . 0010010010 - @XL_s + +## Vector Division Instructions + +VDIVSW 000100 ..... ..... ..... 00110001011 @VX +VDIVUW 000100 ..... ..... ..... 00010001011 @VX +VDIVSD 000100 ..... ..... ..... 00111001011 @VX +VDIVUD 000100 ..... ..... ..... 00011001011 @VX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 6101bca3fd..d96e804abb 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3236,6 +3236,55 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64) TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64) TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64) +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC) \ +static bool trans_##NAME(DisasContext *ctx, arg_VX *a) \ +{ \ + static const GVecGen3 op[2] = { \ + { \ + .fni4 = FNI4_FUNC, \ + .fni8 = FNI8_FUNC, \ + .vece = MO_32 \ + }, \ + { \ + .fni4 = FNI4_FUNC, \ + .fni8 = FNI8_FUNC, \ + .vece = MO_64 \ + }, \ + }; \ + \ + REQUIRE_VECTOR(ctx); \ + REQUIRE_INSNS_FLAGS2(ctx, FLAGS); \ + \ + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), \ + avr_full_offset(a->vrb), 16, 16, &op[VECE - MO_32]); \ + \ + return true; \ +} + +#define DIV_VEC(NAME, SZ, DIV) \ +static void do_vx_##NAME(TCGv_##SZ t, TCGv_##SZ a, TCGv_##SZ b) \ +{ \ + TCGv_##SZ zero = tcg_constant_##SZ(0), one = tcg_constant_##SZ(1); \ + /* \ + * If N/0 the instruction used by the backend might deliver \ + * a signal to the process and the hardware returns 0 when \ + * N/0, so if b = 0 return 0/1 \ + */ \ + tcg_gen_movcond_##SZ(TCG_COND_EQ, a, b, zero, zero, a); \ + tcg_gen_movcond_##SZ(TCG_COND_EQ, b, b, zero, one, b); \ + DIV(t, a, b); \ +} + +DIV_VEC(div_i32 , i32, tcg_gen_div_i32) +DIV_VEC(divu_i32, i32, tcg_gen_divu_i32) +DIV_VEC(div_i64 , i64, tcg_gen_div_i64) +DIV_VEC(divu_i64, i64, tcg_gen_divu_i64) + +TRANS_VDIV_VMOD(ISA310, VDIVSW, MO_32, do_vx_div_i32 , NULL) +TRANS_VDIV_VMOD(ISA310, VDIVUW, MO_32, do_vx_divu_i32, NULL) +TRANS_VDIV_VMOD(ISA310, VDIVSD, MO_64, NULL, do_vx_div_i64) +TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_vx_divu_i64) + #undef GEN_VR_LDX #undef GEN_VR_STX #undef GEN_VR_LVE