diff mbox series

[v5,09/60] target/riscv: vector single-width integer add and subtract

Message ID 20200312145900.2054-10-zhiwei_liu@c-sky.com (mailing list archive)
State New, archived
Headers show
Series target/riscv: support vector extension v0.7.1 | expand

Commit Message

LIU Zhiwei March 12, 2020, 2:58 p.m. UTC
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/helper.h                   |  21 +++
 target/riscv/insn32.decode              |  10 ++
 target/riscv/insn_trans/trans_rvv.inc.c | 220 ++++++++++++++++++++++++
 target/riscv/vector_helper.c            | 122 +++++++++++++
 4 files changed, 373 insertions(+)

Comments

Richard Henderson March 14, 2020, 5:25 a.m. UTC | #1
On 3/12/20 7:58 AM, LIU Zhiwei wrote:
> +    if (a->vm && s->vl_eq_vlmax) {                                 \
> +        tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd),      \
> +            vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),              \
> +            MAXSZ(s), MAXSZ(s));                                   \

The first argument here should be just s->sew.
You should have see the assert fire:

    tcg_debug_assert(vece <= MO_64);

It would be nice to pull out the bulk of GEN_OPIVV_GVEC_TRANS as a function,
and pass in tcg_gen_gvec_* as a function pointer, and fns as a pointer.

In general, I prefer the functions that are generated by macros like this to
have exactly one executable statement -- the call to the helper that does all
of the work using the arguments provided.  That way a maximum number of lines
are available for stepping with the debugger.

> +        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);                        \
> +        data = FIELD_DP32(data, VDATA, VM, a->vm);                            \
> +        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                        \

Why are these replicated in each trans_* function, and not done in opiv?_trans,
where the rest of the descriptor is created?

> +/* OPIVX without GVEC IR */
> +#define GEN_OPIVX_TRANS(NAME, CHECK)                                     \
> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
> +{                                                                        \
> +    if (CHECK(s, a)) {                                                   \
> +        uint32_t data = 0;                                               \
> +        static gen_helper_opivx const fns[4] = {                         \
> +            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                \
> +            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                \
> +        };                                                               \
> +                                                                         \
> +        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);                   \
> +        data = FIELD_DP32(data, VDATA, VM, a->vm);                       \
> +        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                   \
> +        return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \
> +    }                                                                    \
> +    return false;                                                        \
> +}
> +
> +GEN_OPIVX_TRANS(vrsub_vx, opivx_check)

Note that you *can* generate vector code for this,
you just have to write your own helpers.

E.g.

static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 a)
{
    tcg_gen_vec_sub8_i64(d, b, a);
}
// etc, reversing the arguments and passing on to sub.

static const GVecGen2s rsub_op[4] = {
    { .fni8 = tcg_gen_vec_rsub8_i64,
      .fniv = tcg_gen_rsub_vec,
      .fno = gen_helper_gvec_rsubs8,
      .opt_opc = vecop_list_sub,
      .vece = MO_8 },
    { .fni8 = tcg_gen_vec_rsub16_i64,
      .fniv = tcg_gen_rsub_vec,
      .fno = gen_helper_gvec_rsubs16,
      .opt_opc = vecop_list_sub,
      .vece = MO_16 },
    { .fni4 = tcg_gen_rsub_i32,
      .fniv = tcg_gen_rsub_vec,
      .fno = gen_helper_gvec_rsubs32,
      .opt_opc = vecop_list_sub,
      .vece = MO_32 },
    { .fni8 = tcg_gen_rsub_i64,
      .fniv = tcg_gen_rsub_vec,
      .fno = gen_helper_gvec_rsubs64,
      .opt_opc = vecop_list_sub,
      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
      .vece = MO_64 },
};

static void gen_gvec_rsubs(unsigned vece, uint32_t dofs,
    uint32_t aofs, TCGv_i64 c,
    uint32_t oprsz, uint32_t maxsz)
{
    tcg_debug_assert(vece <= MO_64);
    tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
}

static void gen_gvec_rsubi(unsigned vece, uint32_t dofs,
    uint32_t aofs, int64_t c,
    uint32_t oprsz, uint32_t maxsz)
{
    tcg_debug_assert(vece <= MO_64);
    tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
}

> +/* generate the helpers for OPIVV */
> +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN)             \
> +void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
> +        void *vs2, CPURISCVState *env, uint32_t desc)     \
> +{                                                         \
> +    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
> +    uint32_t mlen = vext_mlen(desc);                      \
> +    uint32_t vm = vext_vm(desc);                          \
> +    uint32_t vl = env->vl;                                \
> +    uint32_t i;                                           \
> +    for (i = 0; i < vl; i++) {                            \
> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
> +            continue;                                     \
> +        }                                                 \
> +        do_##NAME(vd, vs1, vs2, i);                       \
> +    }                                                     \
> +    if (i != 0) {                                         \
> +        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
> +    }                                                     \
> +}
> +
> +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
> +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
> +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
> +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
> +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
> +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
> +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
> +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)

The body of GEN_VEXT_VV can be an inline function, calling the helper functions
that you generated above.

> +/*
> + * If XLEN < SEW, the value from the x register is sign-extended to SEW bits.
> + * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator type.
> + * (TX1)(T1)(target_long)s1 expands the operator type of widen operations
> + * or narrow operations
> + */
> +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
> +static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i)  \
> +{                                                                   \
> +    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
> +    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1);         \
> +}

Why not just make the type of s1 be target_long in the parameter?

> +/* generate the helpers for instructions with one vector and one sclar */
> +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN)             \
> +void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
> +        void *vs2, CPURISCVState *env, uint32_t desc)     \
> +{                                                         \
> +    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
> +    uint32_t mlen = vext_mlen(desc);                      \
> +    uint32_t vm = vext_vm(desc);                          \
> +    uint32_t vl = env->vl;                                \
> +    uint32_t i;                                           \
> +                                                          \
> +    for (i = 0; i < vl; i++) {                            \
> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
> +            continue;                                     \
> +        }                                                 \
> +        do_##NAME(vd, s1, vs2, i);                        \
> +    }                                                     \
> +    if (i != 0) {                                         \
> +        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
> +    }                                                     \
> +}

Likewise an inline function.


r~
LIU Zhiwei March 14, 2020, 8:11 a.m. UTC | #2
On 2020/3/14 13:25, Richard Henderson wrote:
> On 3/12/20 7:58 AM, LIU Zhiwei wrote:
>> +    if (a->vm && s->vl_eq_vlmax) {                                 \
>> +        tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd),      \
>> +            vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),              \
>> +            MAXSZ(s), MAXSZ(s));                                   \
> The first argument here should be just s->sew.
> You should have see the assert fire:
>
>      tcg_debug_assert(vece <= MO_64);
Oh, sorry, I did not see this. I must miss testing  this  path.
> It would be nice to pull out the bulk of GEN_OPIVV_GVEC_TRANS as a function,
> and pass in tcg_gen_gvec_* as a function pointer, and fns as a pointer.
>
> In general, I prefer the functions that are generated by macros like this to
> have exactly one executable statement -- the call to the helper that does all
> of the work using the arguments provided.  That way a maximum number of lines
> are available for stepping with the debugger.
Can't agree more. When I debug the test cases, I also find it is hard 
to  debug the
generated code. The macro to generate code should be as short as possible.

I accept  your advice to  pull out the bulk of GEN_OPIVV_GVEC_TRANS as a 
function.
>
>> +        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);                        \
>> +        data = FIELD_DP32(data, VDATA, VM, a->vm);                            \
>> +        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                        \
> Why are these replicated in each trans_* function, and not done in opiv?_trans,
> where the rest of the descriptor is created?
The opiv? _trans is a better place.
>
>> +/* OPIVX without GVEC IR */
>> +#define GEN_OPIVX_TRANS(NAME, CHECK)                                     \
>> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
>> +{                                                                        \
>> +    if (CHECK(s, a)) {                                                   \
>> +        uint32_t data = 0;                                               \
>> +        static gen_helper_opivx const fns[4] = {                         \
>> +            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                \
>> +            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                \
>> +        };                                                               \
>> +                                                                         \
>> +        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);                   \
>> +        data = FIELD_DP32(data, VDATA, VM, a->vm);                       \
>> +        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                   \
>> +        return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \
>> +    }                                                                    \
>> +    return false;                                                        \
>> +}
>> +
>> +GEN_OPIVX_TRANS(vrsub_vx, opivx_check)
> Note that you *can* generate vector code for this,
> you just have to write your own helpers.
>
> E.g.
>
> static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 a)
> {
>      tcg_gen_vec_sub8_i64(d, b, a);
> }
> // etc, reversing the arguments and passing on to sub.
>
> static const GVecGen2s rsub_op[4] = {
>      { .fni8 = tcg_gen_vec_rsub8_i64,
>        .fniv = tcg_gen_rsub_vec,
>        .fno = gen_helper_gvec_rsubs8,
>        .opt_opc = vecop_list_sub,
>        .vece = MO_8 },
>      { .fni8 = tcg_gen_vec_rsub16_i64,
>        .fniv = tcg_gen_rsub_vec,
>        .fno = gen_helper_gvec_rsubs16,
>        .opt_opc = vecop_list_sub,
>        .vece = MO_16 },
>      { .fni4 = tcg_gen_rsub_i32,
>        .fniv = tcg_gen_rsub_vec,
>        .fno = gen_helper_gvec_rsubs32,
>        .opt_opc = vecop_list_sub,
>        .vece = MO_32 },
>      { .fni8 = tcg_gen_rsub_i64,
>        .fniv = tcg_gen_rsub_vec,
>        .fno = gen_helper_gvec_rsubs64,
>        .opt_opc = vecop_list_sub,
>        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
>        .vece = MO_64 },
> };
> static void gen_gvec_rsubs(unsigned vece, uint32_t dofs,
>      uint32_t aofs, TCGv_i64 c,
>      uint32_t oprsz, uint32_t maxsz)
> {
>      tcg_debug_assert(vece <= MO_64);
>      tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
> }
>
> static void gen_gvec_rsubi(unsigned vece, uint32_t dofs,
>      uint32_t aofs, int64_t c,
>      uint32_t oprsz, uint32_t maxsz)
> {
>      tcg_debug_assert(vece <= MO_64);
>      tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
> }
Good idea. I will try to these GVEC IRs.
>> +/* generate the helpers for OPIVV */
>> +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN)             \
>> +void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
>> +        void *vs2, CPURISCVState *env, uint32_t desc)     \
>> +{                                                         \
>> +    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
>> +    uint32_t mlen = vext_mlen(desc);                      \
>> +    uint32_t vm = vext_vm(desc);                          \
>> +    uint32_t vl = env->vl;                                \
>> +    uint32_t i;                                           \
>> +    for (i = 0; i < vl; i++) {                            \
>> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
>> +            continue;                                     \
>> +        }                                                 \
>> +        do_##NAME(vd, vs1, vs2, i);                       \
>> +    }                                                     \
>> +    if (i != 0) {                                         \
>> +        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
>> +    }                                                     \
>> +}
>> +
>> +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
>> +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
>> +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
>> +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
>> +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
>> +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
>> +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
>> +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
> The body of GEN_VEXT_VV can be an inline function, calling the helper functions
> that you generated above.
Yes, I will.
>> +/*
>> + * If XLEN < SEW, the value from the x register is sign-extended to SEW bits.
>> + * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator type.
>> + * (TX1)(T1)(target_long)s1 expands the operator type of widen operations
>> + * or narrow operations
>> + */
>> +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
>> +static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i)  \
>> +{                                                                   \
>> +    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
>> +    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1);         \
>> +}
> Why not just make the type of s1 be target_long in the parameter?
Yes, I should.
>
>> +/* generate the helpers for instructions with one vector and one sclar */
>> +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN)             \
>> +void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
>> +        void *vs2, CPURISCVState *env, uint32_t desc)     \
>> +{                                                         \
>> +    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
>> +    uint32_t mlen = vext_mlen(desc);                      \
>> +    uint32_t vm = vext_vm(desc);                          \
>> +    uint32_t vl = env->vl;                                \
>> +    uint32_t i;                                           \
>> +                                                          \
>> +    for (i = 0; i < vl; i++) {                            \
>> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
>> +            continue;                                     \
>> +        }                                                 \
>> +        do_##NAME(vd, s1, vs2, i);                        \
>> +    }                                                     \
>> +    if (i != 0) {                                         \
>> +        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
>> +    }                                                     \
>> +}
> Likewise an inline function.
Yes, I will.

Very informative comments. I will try to address them in next patch set 
soon.

Thanks very much.

Zhiwei
>
>
> r~
LIU Zhiwei March 23, 2020, 8:10 a.m. UTC | #3
On 2020/3/14 13:25, Richard Henderson wrote:
> On 3/12/20 7:58 AM, LIU Zhiwei wrote:
>> +    if (a->vm && s->vl_eq_vlmax) {                                 \
>> +        tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd),      \
>> +            vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),              \
>> +            MAXSZ(s), MAXSZ(s));                                   \
> The first argument here should be just s->sew.
> You should have see the assert fire:
>
>      tcg_debug_assert(vece <= MO_64);
>
> It would be nice to pull out the bulk of GEN_OPIVV_GVEC_TRANS as a function,
> and pass in tcg_gen_gvec_* as a function pointer, and fns as a pointer.
>
> In general, I prefer the functions that are generated by macros like this to
> have exactly one executable statement -- the call to the helper that does all
> of the work using the arguments provided.  That way a maximum number of lines
> are available for stepping with the debugger.
>
>> +        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);                        \
>> +        data = FIELD_DP32(data, VDATA, VM, a->vm);                            \
>> +        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                        \
> Why are these replicated in each trans_* function, and not done in opiv?_trans,
> where the rest of the descriptor is created?
>
>> +/* OPIVX without GVEC IR */
>> +#define GEN_OPIVX_TRANS(NAME, CHECK)                                     \
>> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
>> +{                                                                        \
>> +    if (CHECK(s, a)) {                                                   \
>> +        uint32_t data = 0;                                               \
>> +        static gen_helper_opivx const fns[4] = {                         \
>> +            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                \
>> +            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                \
>> +        };                                                               \
>> +                                                                         \
>> +        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);                   \
>> +        data = FIELD_DP32(data, VDATA, VM, a->vm);                       \
>> +        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                   \
>> +        return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \
>> +    }                                                                    \
>> +    return false;                                                        \
>> +}
>> +
>> +GEN_OPIVX_TRANS(vrsub_vx, opivx_check)
> Note that you *can* generate vector code for this,
> you just have to write your own helpers.
>
> E.g.
>
> static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 a)
> {
>      tcg_gen_vec_sub8_i64(d, b, a);
> }
> // etc, reversing the arguments and passing on to sub.
>
> static const GVecGen2s rsub_op[4] = {
>      { .fni8 = tcg_gen_vec_rsub8_i64,
>        .fniv = tcg_gen_rsub_vec,
>        .fno = gen_helper_gvec_rsubs8,
>        .opt_opc = vecop_list_sub,
>        .vece = MO_8 },
>      { .fni8 = tcg_gen_vec_rsub16_i64,
>        .fniv = tcg_gen_rsub_vec,
>        .fno = gen_helper_gvec_rsubs16,
>        .opt_opc = vecop_list_sub,
>        .vece = MO_16 },
>      { .fni4 = tcg_gen_rsub_i32,
>        .fniv = tcg_gen_rsub_vec,
>        .fno = gen_helper_gvec_rsubs32,
>        .opt_opc = vecop_list_sub,
>        .vece = MO_32 },
>      { .fni8 = tcg_gen_rsub_i64,
>        .fniv = tcg_gen_rsub_vec,
>        .fno = gen_helper_gvec_rsubs64,
>        .opt_opc = vecop_list_sub,
>        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
>        .vece = MO_64 },
> };
>
> static void gen_gvec_rsubs(unsigned vece, uint32_t dofs,
>      uint32_t aofs, TCGv_i64 c,
>      uint32_t oprsz, uint32_t maxsz)
> {
>      tcg_debug_assert(vece <= MO_64);
>      tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
> }
>
> static void gen_gvec_rsubi(unsigned vece, uint32_t dofs,
>      uint32_t aofs, int64_t c,
>      uint32_t oprsz, uint32_t maxsz)
> {
>      tcg_debug_assert(vece <= MO_64);
>      tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
> }
Hi Richard,

When I try to add GVEC IR rsubs,I find it is some difficult to keep it 
separate from tcg-runtime-gvec.c.

The .fno functions, e.g.,  gen_helper_gvec_rsubs8 need to be defined like

    void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)

    {

         intptr_t oprsz = simd_oprsz(desc);

         vec8 vecb = (vec8)DUP16(b);

         intptr_t i;

         for (i = 0; i < oprsz; i += sizeof(vec8)) {

             *(vec8 *)(d + i) = vecb - *(vec8 *)(a + i);

         }

         clear_high(d, oprsz, desc);

    }


The vec8 and DUP are defined in tcg-runtime-gvec.c.

Should I declare them  in somewhere else, or just put HELPER(gvec_subs8) 
into tcg-runtime-gvec.c?

Zhiwei

>> +/* generate the helpers for OPIVV */
>> +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN)             \
>> +void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
>> +        void *vs2, CPURISCVState *env, uint32_t desc)     \
>> +{                                                         \
>> +    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
>> +    uint32_t mlen = vext_mlen(desc);                      \
>> +    uint32_t vm = vext_vm(desc);                          \
>> +    uint32_t vl = env->vl;                                \
>> +    uint32_t i;                                           \
>> +    for (i = 0; i < vl; i++) {                            \
>> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
>> +            continue;                                     \
>> +        }                                                 \
>> +        do_##NAME(vd, vs1, vs2, i);                       \
>> +    }                                                     \
>> +    if (i != 0) {                                         \
>> +        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
>> +    }                                                     \
>> +}
>> +
>> +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
>> +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
>> +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
>> +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
>> +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
>> +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
>> +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
>> +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
> The body of GEN_VEXT_VV can be an inline function, calling the helper functions
> that you generated above.
>
>> +/*
>> + * If XLEN < SEW, the value from the x register is sign-extended to SEW bits.
>> + * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator type.
>> + * (TX1)(T1)(target_long)s1 expands the operator type of widen operations
>> + * or narrow operations
>> + */
>> +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
>> +static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i)  \
>> +{                                                                   \
>> +    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
>> +    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1);         \
>> +}
> Why not just make the type of s1 be target_long in the parameter?
>
>> +/* generate the helpers for instructions with one vector and one sclar */
>> +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN)             \
>> +void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
>> +        void *vs2, CPURISCVState *env, uint32_t desc)     \
>> +{                                                         \
>> +    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
>> +    uint32_t mlen = vext_mlen(desc);                      \
>> +    uint32_t vm = vext_vm(desc);                          \
>> +    uint32_t vl = env->vl;                                \
>> +    uint32_t i;                                           \
>> +                                                          \
>> +    for (i = 0; i < vl; i++) {                            \
>> +        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
>> +            continue;                                     \
>> +        }                                                 \
>> +        do_##NAME(vd, s1, vs2, i);                        \
>> +    }                                                     \
>> +    if (i != 0) {                                         \
>> +        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
>> +    }                                                     \
>> +}
> Likewise an inline function.
>
>
> r~
Richard Henderson March 23, 2020, 5:46 p.m. UTC | #4
On 3/23/20 1:10 AM, LIU Zhiwei wrote:
>> static void gen_gvec_rsubi(unsigned vece, uint32_t dofs,
>>     uint32_t aofs, int64_t c,
>>     uint32_t oprsz, uint32_t maxsz)
>> {
>>     tcg_debug_assert(vece <= MO_64);
>>     tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
>> }
> Hi Richard,
> 
> When I try to add GVEC IR rsubs,I find it is some difficult to keep it
> separate from tcg-runtime-gvec.c.
> 
> The .fno functions, e.g.,  gen_helper_gvec_rsubs8  need to be defined like
> 
>     void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
> 
>     {
> 
>         intptr_t oprsz = simd_oprsz(desc);
> 
>         vec8 vecb = (vec8)DUP16(b);
> 
>         intptr_t i;
> 
>         for (i = 0; i < oprsz; i += sizeof(vec8)) {
> 
>             *(vec8 *)(d + i) = vecb - *(vec8 *)(a + i);
> 
>         }
> 
>         clear_high(d, oprsz, desc);
> 
>     }
> 
>    
> The vec8 and DUP are defined in tcg-runtime-gvec.c. 

Update your branch -- they're gone since commit 0a83e43a9ee6.
Just use normal integer types.


r~
diff mbox series

Patch

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 70a4b05f75..e73701d4bb 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -269,3 +269,24 @@  DEF_HELPER_6(vamominw_v_w,  void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamomaxw_v_w,  void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 1330703720..d1034a0e61 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -44,6 +44,7 @@ 
 &u    imm rd
 &shift     shamt rs1 rd
 &atomic    aq rl rs2 rs1 rd
+&rmrr      vm rd rs1 rs2
 &rwdvm     vm wd rd rs1 rs2
 &r2nfvm    vm rd rs1 nf
 &rnfvm     vm rd rs1 rs2 nf
@@ -68,6 +69,7 @@ 
 @r2      .......   ..... ..... ... ..... ....... %rs1 %rd
 @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
 @r_nfvm  ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
+@r_vm    ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
 @r_wdvm  ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd
 @r2_zimm . zimm:11  ..... ... ..... ....... %rs1 %rd
 
@@ -275,5 +277,13 @@  vamominuw_v     11000 . . ..... ..... 110 ..... 0101111 @r_wdvm
 vamomaxuw_v     11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
 
 # *** new major opcode OP-V ***
+vadd_vv         000000 . ..... ..... 000 ..... 1010111 @r_vm
+vadd_vx         000000 . ..... ..... 100 ..... 1010111 @r_vm
+vadd_vi         000000 . ..... ..... 011 ..... 1010111 @r_vm
+vsub_vv         000010 . ..... ..... 000 ..... 1010111 @r_vm
+vsub_vx         000010 . ..... ..... 100 ..... 1010111 @r_vm
+vrsub_vx        000011 . ..... ..... 100 ..... 1010111 @r_vm
+vrsub_vi        000011 . ..... ..... 011 ..... 1010111 @r_vm
+
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
index 3c677160c5..00c7ec976f 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -730,3 +730,223 @@  GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check)
 GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check)
 GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check)
 #endif
+
+/*
+ *** Vector Integer Arithmetic Instructions
+ */
+#define MAXSZ(s) (s->vlen >> (3 - s->lmul))
+
+static bool opivv_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s, RVV) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false) &&
+            vext_check_reg(s, a->rs1, false));
+}
+
+/* OPIVV with GVEC IR */
+#define GEN_OPIVV_GVEC_TRANS(NAME, GVSUF)                          \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
+{                                                                  \
+    if (!opivv_check(s, a)) {                                      \
+        return false;                                              \
+    }                                                              \
+                                                                   \
+    if (a->vm && s->vl_eq_vlmax) {                                 \
+        tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd),      \
+            vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),              \
+            MAXSZ(s), MAXSZ(s));                                   \
+    } else {                                                       \
+        uint32_t data = 0;                                         \
+        static gen_helper_gvec_4_ptr * const fns[4] = {            \
+            gen_helper_##NAME##_b, gen_helper_##NAME##_h,          \
+            gen_helper_##NAME##_w, gen_helper_##NAME##_d,          \
+        };                                                         \
+                                                                   \
+        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);             \
+        data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
+        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
+        tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
+            vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),              \
+            cpu_env, 0, s->vlen / 8, data, fns[s->sew]);           \
+    }                                                              \
+    return true;                                                   \
+}
+GEN_OPIVV_GVEC_TRANS(vadd_vv, add)
+GEN_OPIVV_GVEC_TRANS(vsub_vv, sub)
+
+typedef void (*gen_helper_opivx)(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr,
+        TCGv_env, TCGv_i32);
+
+static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
+        uint32_t data, gen_helper_opivx fn, DisasContext *s)
+{
+    TCGv_ptr dest, src2, mask;
+    TCGv src1;
+    TCGv_i32 desc;
+
+    dest = tcg_temp_new_ptr();
+    mask = tcg_temp_new_ptr();
+    src2 = tcg_temp_new_ptr();
+    src1 = tcg_temp_new();
+    gen_get_gpr(src1, rs1);
+    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+
+    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
+    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
+
+    fn(dest, mask, src1, src2, cpu_env, desc);
+
+    tcg_temp_free_ptr(dest);
+    tcg_temp_free_ptr(mask);
+    tcg_temp_free_ptr(src2);
+    tcg_temp_free(src1);
+    tcg_temp_free_i32(desc);
+    return true;
+}
+
+static bool opivx_check(DisasContext *s, arg_rmrr *a)
+{
+    return (vext_check_isa_ill(s, RVV) &&
+            vext_check_overlap_mask(s, a->rd, a->vm, false) &&
+            vext_check_reg(s, a->rd, false) &&
+            vext_check_reg(s, a->rs2, false));
+}
+/* OPIVX with GVEC IR */
+#define GEN_OPIVX_GVEC_TRANS(NAME, GVSUF)                                     \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                        \
+{                                                                             \
+    if (!opivx_check(s, a)) {                                                 \
+        return false;                                                         \
+    }                                                                         \
+                                                                              \
+    if (a->vm && s->vl_eq_vlmax) {                                            \
+        TCGv_i64 src1 = tcg_temp_new_i64();                                   \
+        TCGv tmp = tcg_temp_new();                                            \
+        gen_get_gpr(tmp, a->rs1);                                             \
+        tcg_gen_ext_tl_i64(src1, tmp);                                        \
+        tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd),                 \
+            vreg_ofs(s, a->rs2), src1, MAXSZ(s), MAXSZ(s));                   \
+        tcg_temp_free_i64(src1);                                              \
+        tcg_temp_free(tmp);                                                   \
+        return true;                                                          \
+    } else {                                                                  \
+        uint32_t data = 0;                                                    \
+        static gen_helper_opivx const fns[4] = {                              \
+            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                     \
+            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                     \
+        };                                                                    \
+                                                                              \
+        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);                        \
+        data = FIELD_DP32(data, VDATA, VM, a->vm);                            \
+        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                        \
+        return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s);      \
+    }                                                                         \
+    return true;                                                              \
+}
+GEN_OPIVX_GVEC_TRANS(vadd_vx, adds)
+GEN_OPIVX_GVEC_TRANS(vsub_vx, subs)
+
+/* OPIVX without GVEC IR */
+#define GEN_OPIVX_TRANS(NAME, CHECK)                                     \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
+{                                                                        \
+    if (CHECK(s, a)) {                                                   \
+        uint32_t data = 0;                                               \
+        static gen_helper_opivx const fns[4] = {                         \
+            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                \
+            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                \
+        };                                                               \
+                                                                         \
+        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);                   \
+        data = FIELD_DP32(data, VDATA, VM, a->vm);                       \
+        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                   \
+        return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \
+    }                                                                    \
+    return false;                                                        \
+}
+
+GEN_OPIVX_TRANS(vrsub_vx, opivx_check)
+
+static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2,
+        uint32_t data, gen_helper_opivx fn, DisasContext *s, int zx)
+{
+    TCGv_ptr dest, src2, mask;
+    TCGv src1;
+    TCGv_i32 desc;
+
+    dest = tcg_temp_new_ptr();
+    mask = tcg_temp_new_ptr();
+    src2 = tcg_temp_new_ptr();
+    if (zx) {
+        src1 = tcg_const_tl(imm);
+    } else {
+        src1 = tcg_const_tl(sextract64(imm, 0, 5));
+    }
+    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+
+    tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
+    tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
+    tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
+
+    fn(dest, mask, src1, src2, cpu_env, desc);
+
+    tcg_temp_free_ptr(dest);
+    tcg_temp_free_ptr(mask);
+    tcg_temp_free_ptr(src2);
+    tcg_temp_free(src1);
+    tcg_temp_free_i32(desc);
+    return true;
+}
+
+/* OPIVI with GVEC IR */
+#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, GVSUF)                 \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)               \
+{                                                                    \
+    if (!opivx_check(s, a)) {                                        \
+        return false;                                                \
+    }                                                                \
+                                                                     \
+    if (a->vm && s->vl_eq_vlmax) {                                   \
+        tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd),        \
+            vreg_ofs(s, a->rs2), sextract64(a->rs1, 0, 5),           \
+            MAXSZ(s), MAXSZ(s));                                     \
+        return true;                                                 \
+    } else {                                                         \
+        uint32_t data = 0;                                           \
+        static gen_helper_opivx const fns[4] = {                     \
+            gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h,          \
+            gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d,          \
+        };                                                           \
+                                                                     \
+        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);               \
+        data = FIELD_DP32(data, VDATA, VM, a->vm);                   \
+        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);               \
+        return opivi_trans(a->rd, a->rs1, a->rs2, data,              \
+                fns[s->sew], s, ZX);                                 \
+    }                                                                \
+    return true;                                                     \
+}
+GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi)
+
+/* OPIVI without GVEC IR */
+#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK)                          \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
+{                                                                        \
+    if (CHECK(s, a)) {                                                   \
+        uint32_t data = 0;                                               \
+        static gen_helper_opivx const fns[4] = {                         \
+            gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h,              \
+            gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d,              \
+        };                                                               \
+        data = FIELD_DP32(data, VDATA, MLEN, s->mlen);                   \
+        data = FIELD_DP32(data, VDATA, VM, a->vm);                       \
+        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);                   \
+        return opivi_trans(a->rd, a->rs1, a->rs2, data,                  \
+                fns[s->sew], s, ZX);                                     \
+    }                                                                    \
+    return false;                                                        \
+}
+GEN_OPIVI_TRANS(vrsub_vi, 0, vrsub_vx, opivx_check)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index f9b409b169..abdf3b82a8 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -828,3 +828,125 @@  GEN_VEXT_AMO(vamominw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 GEN_VEXT_AMO(vamomaxw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl)
 GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
+
+/*
+ *** Vector Integer Arithmetic Instructions
+ */
+
+/* expand macro args before macro */
+#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
+
+/* (TD, T1, T2, TX1, TX2) */
+#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
+#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
+#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
+#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
+
+/* operation of two vector elements */
+#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
+{                                                               \
+    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
+    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
+}
+#define DO_SUB(N, M) (N - M)
+#define DO_RSUB(N, M) (M - N)
+
+RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
+RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
+RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
+RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
+RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
+RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
+RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
+RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
+
+/* generate the helpers for OPIVV */
+#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN)             \
+void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
+        void *vs2, CPURISCVState *env, uint32_t desc)     \
+{                                                         \
+    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
+    uint32_t mlen = vext_mlen(desc);                      \
+    uint32_t vm = vext_vm(desc);                          \
+    uint32_t vl = env->vl;                                \
+    uint32_t i;                                           \
+    for (i = 0; i < vl; i++) {                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
+            continue;                                     \
+        }                                                 \
+        do_##NAME(vd, vs1, vs2, i);                       \
+    }                                                     \
+    if (i != 0) {                                         \
+        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
+    }                                                     \
+}
+
+GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
+
+/*
+ * If XLEN < SEW, the value from the x register is sign-extended to SEW bits.
+ * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator type.
+ * (TX1)(T1)(target_long)s1 expands the operator type of widen operations
+ * or narrow operations
+ */
+#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
+static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i)  \
+{                                                                   \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
+    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1);         \
+}
+RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
+RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
+RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
+RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
+RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
+RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
+RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
+RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
+RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
+RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
+RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
+RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
+
+/* generate the helpers for instructions with one vector and one sclar */
+#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN)             \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
+        void *vs2, CPURISCVState *env, uint32_t desc)     \
+{                                                         \
+    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
+    uint32_t mlen = vext_mlen(desc);                      \
+    uint32_t vm = vext_vm(desc);                          \
+    uint32_t vl = env->vl;                                \
+    uint32_t i;                                           \
+                                                          \
+    for (i = 0; i < vl; i++) {                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
+            continue;                                     \
+        }                                                 \
+        do_##NAME(vd, s1, vs2, i);                        \
+    }                                                     \
+    if (i != 0) {                                         \
+        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
+    }                                                     \
+}
+GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq)
+GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb)
+GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh)
+GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl)
+GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq)