@@ -1825,6 +1825,23 @@ INSN_LASX(xvaddwod_w_hu_h, xxx)
INSN_LASX(xvaddwod_d_wu_w, xxx)
INSN_LASX(xvaddwod_q_du_d, xxx)
+INSN_LASX(xvavg_b, xxx)
+INSN_LASX(xvavg_h, xxx)
+INSN_LASX(xvavg_w, xxx)
+INSN_LASX(xvavg_d, xxx)
+INSN_LASX(xvavg_bu, xxx)
+INSN_LASX(xvavg_hu, xxx)
+INSN_LASX(xvavg_wu, xxx)
+INSN_LASX(xvavg_du, xxx)
+INSN_LASX(xvavgr_b, xxx)
+INSN_LASX(xvavgr_h, xxx)
+INSN_LASX(xvavgr_w, xxx)
+INSN_LASX(xvavgr_d, xxx)
+INSN_LASX(xvavgr_bu, xxx)
+INSN_LASX(xvavgr_hu, xxx)
+INSN_LASX(xvavgr_wu, xxx)
+INSN_LASX(xvavgr_du, xxx)
+
INSN_LASX(xvreplgr2vr_b, xr)
INSN_LASX(xvreplgr2vr_h, xr)
INSN_LASX(xvreplgr2vr_w, xr)
@@ -759,3 +759,21 @@ DEF_HELPER_FLAGS_4(xvaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(xvaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(xvaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(xvaddwod_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(xvavg_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavg_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavg_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavg_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavg_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavg_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(xvavgr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavgr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavgr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavgr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavgr_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavgr_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavgr_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(xvavgr_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -588,6 +588,168 @@ TRANS(xvaddwod_w_hu_h, gvec_xxx, MO_16, do_xvaddwod_u_s)
TRANS(xvaddwod_d_wu_w, gvec_xxx, MO_32, do_xvaddwod_u_s)
TRANS(xvaddwod_q_du_d, gvec_xxx, MO_64, do_xvaddwod_u_s)
+static void do_xvavg_s(unsigned vece, uint32_t xd_ofs, uint32_t xj_ofs,
+ uint32_t xk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vavg_s,
+ .fno = gen_helper_xvavg_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vavg_s,
+ .fno = gen_helper_xvavg_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vavg_s,
+ .fno = gen_helper_xvavg_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vavg_s,
+ .fno = gen_helper_xvavg_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(xd_ofs, xj_ofs, xk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+static void do_xvavg_u(unsigned vece, uint32_t xd_ofs, uint32_t xj_ofs,
+ uint32_t xk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vavg_u,
+ .fno = gen_helper_xvavg_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vavg_u,
+ .fno = gen_helper_xvavg_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vavg_u,
+ .fno = gen_helper_xvavg_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vavg_u,
+ .fno = gen_helper_xvavg_du,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(xd_ofs, xj_ofs, xk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(xvavg_b, gvec_xxx, MO_8, do_xvavg_s)
+TRANS(xvavg_h, gvec_xxx, MO_16, do_xvavg_s)
+TRANS(xvavg_w, gvec_xxx, MO_32, do_xvavg_s)
+TRANS(xvavg_d, gvec_xxx, MO_64, do_xvavg_s)
+TRANS(xvavg_bu, gvec_xxx, MO_8, do_xvavg_u)
+TRANS(xvavg_hu, gvec_xxx, MO_16, do_xvavg_u)
+TRANS(xvavg_wu, gvec_xxx, MO_32, do_xvavg_u)
+TRANS(xvavg_du, gvec_xxx, MO_64, do_xvavg_u)
+
+static void do_xvavgr_s(unsigned vece, uint32_t xd_ofs, uint32_t xj_ofs,
+ uint32_t xk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vavgr_s,
+ .fno = gen_helper_xvavgr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vavgr_s,
+ .fno = gen_helper_xvavgr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vavgr_s,
+ .fno = gen_helper_xvavgr_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vavgr_s,
+ .fno = gen_helper_xvavgr_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(xd_ofs, xj_ofs, xk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+static void do_xvavgr_u(unsigned vece, uint32_t xd_ofs, uint32_t xj_ofs,
+ uint32_t xk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vavgr_u,
+ .fno = gen_helper_xvavgr_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vavgr_u,
+ .fno = gen_helper_xvavgr_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vavgr_u,
+ .fno = gen_helper_xvavgr_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vavgr_u,
+ .fno = gen_helper_xvavgr_du,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(xd_ofs, xj_ofs, xk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(xvavgr_b, gvec_xxx, MO_8, do_xvavgr_s)
+TRANS(xvavgr_h, gvec_xxx, MO_16, do_xvavgr_s)
+TRANS(xvavgr_w, gvec_xxx, MO_32, do_xvavgr_s)
+TRANS(xvavgr_d, gvec_xxx, MO_64, do_xvavgr_s)
+TRANS(xvavgr_bu, gvec_xxx, MO_8, do_xvavgr_u)
+TRANS(xvavgr_hu, gvec_xxx, MO_16, do_xvavgr_u)
+TRANS(xvavgr_wu, gvec_xxx, MO_32, do_xvavgr_u)
+TRANS(xvavgr_du, gvec_xxx, MO_64, do_xvavgr_u)
+
static bool gvec_dupx(DisasContext *ctx, arg_xr *a, MemOp mop)
{
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
@@ -1421,6 +1421,23 @@ xvaddwod_w_hu_h 0111 01000100 00001 ..... ..... ..... @xxx
xvaddwod_d_wu_w 0111 01000100 00010 ..... ..... ..... @xxx
xvaddwod_q_du_d 0111 01000100 00011 ..... ..... ..... @xxx
+xvavg_b 0111 01000110 01000 ..... ..... ..... @xxx
+xvavg_h 0111 01000110 01001 ..... ..... ..... @xxx
+xvavg_w 0111 01000110 01010 ..... ..... ..... @xxx
+xvavg_d 0111 01000110 01011 ..... ..... ..... @xxx
+xvavg_bu 0111 01000110 01100 ..... ..... ..... @xxx
+xvavg_hu 0111 01000110 01101 ..... ..... ..... @xxx
+xvavg_wu 0111 01000110 01110 ..... ..... ..... @xxx
+xvavg_du 0111 01000110 01111 ..... ..... ..... @xxx
+xvavgr_b 0111 01000110 10000 ..... ..... ..... @xxx
+xvavgr_h 0111 01000110 10001 ..... ..... ..... @xxx
+xvavgr_w 0111 01000110 10010 ..... ..... ..... @xxx
+xvavgr_d 0111 01000110 10011 ..... ..... ..... @xxx
+xvavgr_bu 0111 01000110 10100 ..... ..... ..... @xxx
+xvavgr_hu 0111 01000110 10101 ..... ..... ..... @xxx
+xvavgr_wu 0111 01000110 10110 ..... ..... ..... @xxx
+xvavgr_du 0111 01000110 10111 ..... ..... ..... @xxx
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @xr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @xr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @xr
@@ -308,3 +308,32 @@ void HELPER(xvaddwod_q_du_d)(void *xd, void *xj, void *xk, uint32_t v)
XDO_ODD_U_S(xvaddwod_h_bu_b, 16, XH, UXH, XB, UXB, DO_ADD)
XDO_ODD_U_S(xvaddwod_w_hu_h, 32, XW, UXW, XH, UXH, DO_ADD)
XDO_ODD_U_S(xvaddwod_d_wu_w, 64, XD, UXD, XW, UXW, DO_ADD)
+
+#define XDO_3OP(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *xd, void *xj, void *xk, uint32_t v) \
+{ \
+ int i; \
+ XReg *Xd = (XReg *)xd; \
+ XReg *Xj = (XReg *)xj; \
+ XReg *Xk = (XReg *)xk; \
+ for (i = 0; i < LASX_LEN / BIT; i++) { \
+ Xd->E(i) = DO_OP(Xj->E(i), Xk->E(i)); \
+ } \
+}
+
+XDO_3OP(xvavg_b, 8, XB, DO_VAVG)
+XDO_3OP(xvavg_h, 16, XH, DO_VAVG)
+XDO_3OP(xvavg_w, 32, XW, DO_VAVG)
+XDO_3OP(xvavg_d, 64, XD, DO_VAVG)
+XDO_3OP(xvavgr_b, 8, XB, DO_VAVGR)
+XDO_3OP(xvavgr_h, 16, XH, DO_VAVGR)
+XDO_3OP(xvavgr_w, 32, XW, DO_VAVGR)
+XDO_3OP(xvavgr_d, 64, XD, DO_VAVGR)
+XDO_3OP(xvavg_bu, 8, UXB, DO_VAVG)
+XDO_3OP(xvavg_hu, 16, UXH, DO_VAVG)
+XDO_3OP(xvavg_wu, 32, UXW, DO_VAVG)
+XDO_3OP(xvavg_du, 64, UXD, DO_VAVG)
+XDO_3OP(xvavgr_bu, 8, UXB, DO_VAVGR)
+XDO_3OP(xvavgr_hu, 16, UXH, DO_VAVGR)
+XDO_3OP(xvavgr_wu, 32, UXW, DO_VAVGR)
+XDO_3OP(xvavgr_du, 64, UXD, DO_VAVGR)
@@ -51,4 +51,7 @@
#define DO_ADD(a, b) (a + b)
#define DO_SUB(a, b) (a - b)
+#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1))
+#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
+
#endif /* LOONGARCH_VEC_H */
This patch includes: - XVAVG.{B/H/W/D/}[U]; - XVAVGR.{B/H/W/D}[U]. Signed-off-by: Song Gao <gaosong@loongson.cn> --- target/loongarch/disas.c | 17 ++ target/loongarch/helper.h | 18 +++ target/loongarch/insn_trans/trans_lasx.c.inc | 162 +++++++++++++++++++ target/loongarch/insns.decode | 17 ++ target/loongarch/lasx_helper.c | 29 ++++ target/loongarch/vec.h | 3 + 6 files changed, 246 insertions(+)