@@ -1703,6 +1703,11 @@ static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
return true; \
}
+static void output_cx(DisasContext *ctx, arg_cx *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "fcc%d, x%d", a->cd, a->xj);
+}
+
static void output_x_i(DisasContext *ctx, arg_x_i *a, const char *mnemonic)
{
output(ctx, mnemonic, "x%d, 0x%x", a->xd, a->imm);
@@ -2478,6 +2483,20 @@ static bool trans_xvfcmp_cond_##suffix(DisasContext *ctx, \
LASX_FCMP_INSN(s)
LASX_FCMP_INSN(d)
+INSN_LASX(xvbitsel_v, xxxx)
+INSN_LASX(xvbitseli_b, xx_i)
+
+INSN_LASX(xvseteqz_v, cx)
+INSN_LASX(xvsetnez_v, cx)
+INSN_LASX(xvsetanyeqz_b, cx)
+INSN_LASX(xvsetanyeqz_h, cx)
+INSN_LASX(xvsetanyeqz_w, cx)
+INSN_LASX(xvsetanyeqz_d, cx)
+INSN_LASX(xvsetallnez_b, cx)
+INSN_LASX(xvsetallnez_h, cx)
+INSN_LASX(xvsetallnez_w, cx)
+INSN_LASX(xvsetallnez_d, cx)
+
INSN_LASX(xvreplgr2vr_b, xr)
INSN_LASX(xvreplgr2vr_h, xr)
INSN_LASX(xvreplgr2vr_w, xr)
@@ -1221,3 +1221,14 @@ DEF_HELPER_5(xvfcmp_c_s, void, env, i32, i32, i32, i32)
DEF_HELPER_5(xvfcmp_s_s, void, env, i32, i32, i32, i32)
DEF_HELPER_5(xvfcmp_c_d, void, env, i32, i32, i32, i32)
DEF_HELPER_5(xvfcmp_s_d, void, env, i32, i32, i32, i32)
+
+DEF_HELPER_FLAGS_4(xvbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_3(xvsetanyeqz_b, void, env, i32, i32)
+DEF_HELPER_3(xvsetanyeqz_h, void, env, i32, i32)
+DEF_HELPER_3(xvsetanyeqz_w, void, env, i32, i32)
+DEF_HELPER_3(xvsetanyeqz_d, void, env, i32, i32)
+DEF_HELPER_3(xvsetallnez_b, void, env, i32, i32)
+DEF_HELPER_3(xvsetallnez_h, void, env, i32, i32)
+DEF_HELPER_3(xvsetallnez_w, void, env, i32, i32)
+DEF_HELPER_3(xvsetallnez_d, void, env, i32, i32)
@@ -65,6 +65,17 @@ static bool gen_xx_i(DisasContext *ctx, arg_xx_i *a,
return true;
}
+static bool gen_cx(DisasContext *ctx, arg_cx *a,
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 xj = tcg_constant_i32(a->xj);
+ TCGv_i32 cd = tcg_constant_i32(a->cd);
+
+ CHECK_ASXE;
+ func(cpu_env, cd, xj);
+ return true;
+}
+
static bool gvec_xxx(DisasContext *ctx, arg_xxx *a, MemOp mop,
void (*func)(unsigned, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t))
@@ -2706,6 +2717,71 @@ static bool trans_xvfcmp_cond_d(DisasContext *ctx, arg_xxx_fcond *a)
return true;
}
+static bool trans_xvbitsel_v(DisasContext *ctx, arg_xxxx *a)
+{
+ CHECK_ASXE;
+
+ tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->xd), vec_full_offset(a->xa),
+ vec_full_offset(a->xk), vec_full_offset(a->xj),
+ 32, ctx->vl / 8);
+ return true;
+}
+
+static bool trans_xvbitseli_b(DisasContext *ctx, arg_xx_i *a)
+{
+ static const GVecGen2i op = {
+ .fniv = gen_vbitseli,
+ .fnoi = gen_helper_xvbitseli_b,
+ .vece = MO_8,
+ .load_dest = true
+ };
+
+ CHECK_ASXE;
+
+ tcg_gen_gvec_2i(vec_full_offset(a->xd), vec_full_offset(a->xj),
+ 32, ctx->vl / 8, a->imm, &op);
+ return true;
+}
+
+#define XVSET(NAME, COND) \
+static bool trans_## NAME(DisasContext *ctx, arg_cx * a) \
+{ \
+ TCGv_i64 t1, t2, d[4]; \
+ \
+ d[0] = tcg_temp_new_i64(); \
+ d[1] = tcg_temp_new_i64(); \
+ d[2] = tcg_temp_new_i64(); \
+ d[3] = tcg_temp_new_i64(); \
+ t1 = tcg_temp_new_i64(); \
+ t2 = tcg_temp_new_i64(); \
+ \
+ get_xreg64(d[0], a->xj, 0); \
+ get_xreg64(d[1], a->xj, 1); \
+ get_xreg64(d[2], a->xj, 2); \
+ get_xreg64(d[3], a->xj, 3); \
+ \
+ CHECK_ASXE; \
+ tcg_gen_or_i64(t1, d[0], d[1]); \
+ tcg_gen_or_i64(t2, d[2], d[3]); \
+ tcg_gen_or_i64(t1, t2, t1); \
+ tcg_gen_setcondi_i64(COND, t1, t1, 0); \
+ tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
+ \
+ return true; \
+}
+
+XVSET(xvseteqz_v, TCG_COND_EQ)
+XVSET(xvsetnez_v, TCG_COND_NE)
+
+TRANS(xvsetanyeqz_b, gen_cx, gen_helper_xvsetanyeqz_b)
+TRANS(xvsetanyeqz_h, gen_cx, gen_helper_xvsetanyeqz_h)
+TRANS(xvsetanyeqz_w, gen_cx, gen_helper_xvsetanyeqz_w)
+TRANS(xvsetanyeqz_d, gen_cx, gen_helper_xvsetanyeqz_d)
+TRANS(xvsetallnez_b, gen_cx, gen_helper_xvsetallnez_b)
+TRANS(xvsetallnez_h, gen_cx, gen_helper_xvsetallnez_h)
+TRANS(xvsetallnez_w, gen_cx, gen_helper_xvsetallnez_w)
+TRANS(xvsetallnez_d, gen_cx, gen_helper_xvsetallnez_d)
+
static bool gvec_dupx(DisasContext *ctx, arg_xr *a, MemOp mop)
{
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
@@ -1308,6 +1308,7 @@ vstelm_b 0011 000110 .... ........ ..... ..... @vr_i8i4
&x_i xd imm
&xxxx xd xj xk xa
&xxx_fcond xd xj xk fcond
+&cx cd xj
#
# LASX Formats
@@ -1326,6 +1327,7 @@ vstelm_b 0011 000110 .... ........ ..... ..... @vr_i8i4
@xx_ui8 .... ........ .. imm:8 xj:5 xd:5 &xx_i
@xxxx .... ........ xa:5 xk:5 xj:5 xd:5 &xxxx
@xxx_fcond .... ........ fcond:5 xk:5 xj:5 xd:5 &xxx_fcond
+@cx .... ........ ..... ..... xj:5 .. cd:3 &cx
xvadd_b 0111 01000000 10100 ..... ..... ..... @xxx
xvadd_h 0111 01000000 10101 ..... ..... ..... @xxx
@@ -1988,6 +1990,21 @@ xvslti_du 0111 01101000 10011 ..... ..... ..... @xx_ui5
xvfcmp_cond_s 0000 11001001 ..... ..... ..... ..... @xxx_fcond
xvfcmp_cond_d 0000 11001010 ..... ..... ..... ..... @xxx_fcond
+xvbitsel_v 0000 11010010 ..... ..... ..... ..... @xxxx
+
+xvbitseli_b 0111 01111100 01 ........ ..... ..... @xx_ui8
+
+xvseteqz_v 0111 01101001 11001 00110 ..... 00 ... @cx
+xvsetnez_v 0111 01101001 11001 00111 ..... 00 ... @cx
+xvsetanyeqz_b 0111 01101001 11001 01000 ..... 00 ... @cx
+xvsetanyeqz_h 0111 01101001 11001 01001 ..... 00 ... @cx
+xvsetanyeqz_w 0111 01101001 11001 01010 ..... 00 ... @cx
+xvsetanyeqz_d 0111 01101001 11001 01011 ..... 00 ... @cx
+xvsetallnez_b 0111 01101001 11001 01100 ..... 00 ... @cx
+xvsetallnez_h 0111 01101001 11001 01101 ..... 00 ... @cx
+xvsetallnez_w 0111 01101001 11001 01110 ..... 00 ... @cx
+xvsetallnez_d 0111 01101001 11001 01111 ..... 00 ... @cx
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @xr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @xr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @xr
@@ -2782,3 +2782,40 @@ XVFCMP(xvfcmp_c_s, 32, UXW, float32_compare_quiet)
XVFCMP(xvfcmp_s_s, 32, UXW, float32_compare)
XVFCMP(xvfcmp_c_d, 64, UXD, float64_compare_quiet)
XVFCMP(xvfcmp_s_d, 64, UXD, float64_compare)
+
+void HELPER(xvbitseli_b)(void *xd, void *xj, uint64_t imm, uint32_t v)
+{
+ int i;
+ XReg *Xd = (XReg *)xd;
+ XReg *Xj = (XReg *)xj;
+
+ for (i = 0; i < LASX_LEN / 8; i++) {
+ Xd->XB(i) = (~Xd->XB(i) & Xj->XB(i)) | (Xd->XB(i) & imm);
+ }
+}
+
+#define XSETANYEQZ(NAME, MO) \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t xj) \
+{ \
+ XReg *Xj = &(env->fpr[xj].xreg); \
+ \
+ env->cf[cd & 0x7] = do_match2(0, Xj->XD(0), Xj->XD(1), MO) || \
+ do_match2(0, Xj->XD(2), Xj->XD(3), MO); \
+}
+XSETANYEQZ(xvsetanyeqz_b, MO_8)
+XSETANYEQZ(xvsetanyeqz_h, MO_16)
+XSETANYEQZ(xvsetanyeqz_w, MO_32)
+XSETANYEQZ(xvsetanyeqz_d, MO_64)
+
+#define XSETALLNEZ(NAME, MO) \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t xj) \
+{ \
+ XReg *Xj = &(env->fpr[xj].xreg); \
+ \
+ env->cf[cd & 0x7] = !do_match2(0, Xj->XD(0), Xj->XD(1), MO) && \
+ !do_match2(0, Xj->XD(2), Xj->XD(3), MO); \
+}
+XSETALLNEZ(xvsetallnez_b, MO_8)
+XSETALLNEZ(xvsetallnez_h, MO_16)
+XSETALLNEZ(xvsetallnez_w, MO_32)
+XSETALLNEZ(xvsetallnez_d, MO_64)
@@ -2688,7 +2688,7 @@ void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
}
/* Copy from target/arm/tcg/sve_helper.c */
-static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
+bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
{
uint64_t bits = 8 << esz;
uint64_t ones = dup_const(esz, 1);
@@ -118,4 +118,6 @@ uint64_t do_frsqrt_64(CPULoongArchState *env, uint64_t fj);
uint64_t vfcmp_common(CPULoongArchState *env,
FloatRelation cmp, uint32_t flags);
+bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz);
+
#endif /* LOONGARCH_VEC_H */
This patch includes: - XVBITSEL.V; - XVBITSELI.B; - XVSET{EQZ/NEZ}.V; - XVSETANYEQZ.{B/H/W/D}; - XVSETALLNEZ.{B/H/W/D}. Signed-off-by: Song Gao <gaosong@loongson.cn> --- target/loongarch/disas.c | 19 +++++ target/loongarch/helper.h | 11 +++ target/loongarch/insn_trans/trans_lasx.c.inc | 76 ++++++++++++++++++++ target/loongarch/insns.decode | 17 +++++ target/loongarch/lasx_helper.c | 37 ++++++++++ target/loongarch/lsx_helper.c | 2 +- target/loongarch/vec.h | 2 + 7 files changed, 163 insertions(+), 1 deletion(-)