diff mbox series

[v2,16/17] RISC-V: add vector extension mask instructions

Message ID 1568183141-67641-17-git-send-email-zhiwei_liu@c-sky.com (mailing list archive)
State New, archived
Headers show
Series RISC-V: support vector extension | expand

Commit Message

LIU Zhiwei Sept. 11, 2019, 6:25 a.m. UTC
From: LIU Zhiwei <zhiwei_liu@c-sky.com>

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/helper.h                   |  16 +
 target/riscv/insn32.decode              |  17 +
 target/riscv/insn_trans/trans_rvv.inc.c |  27 ++
 target/riscv/vector_helper.c            | 635 ++++++++++++++++++++++++++++++++
 4 files changed, 695 insertions(+)

Comments

Richard Henderson Sept. 12, 2019, 5:07 p.m. UTC | #1
On 9/11/19 2:25 AM, liuzhiwei wrote:
> +    for (i = 0; i < vlmax; i++) {
> +        if (i < env->vfp.vstart) {
> +            continue;
> +        } else if (i < vl) {
> +            tmp = ~vector_mask_reg(env, rs1, width, lmul, i) &
> +                    vector_mask_reg(env, rs2, width, lmul, i);
> +            vector_mask_result(env, rd, width, lmul, i, tmp);
> +        } else {
> +            vector_mask_result(env, rd, width, lmul, i, 0);
> +        }
> +    }

These can be processed in uint64_t units, with a mask based on width:

   8: 0xffffffffffffffff
  16: 0x5555555555555555
  32: 0x1111111111111111
  64: 0x0101010101010101

  dest = ~in1 & in2 & mask;

with an additional final mask to handle vl not being a multiple of 64.

Again, I urge you not to bother with impossible vstart -- instructions like
this cannot be interrupted, and the spec allows you to not handle values of
vstart that cannot be produced by the implementation.


r~
diff mbox series

Patch

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index d36bd00..337ac2e 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -401,5 +401,21 @@  DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32)
 
+DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32)
+DEF_HELPER_3(vector_vid_v, void, env, i32, i32)
+DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32)
+DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32)
+
 DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32)
 DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 3f63bc1..1de776b 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -68,6 +68,7 @@ 
 @r_nfvm  nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd
 @r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd
 @r2_vm   ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd
+@r1_vm   ...... vm:1 ..... ..... ... ..... ....... %rd
 @r2_zimm . zimm:11  ..... ... ..... ....... %rs1 %rd
 
 @sfence_vma ....... ..... .....   ... ..... ....... %rs2 %rs1
@@ -541,5 +542,21 @@  vfredmax_vs     000111 . ..... ..... 001 ..... 1010111 @r_vm
 vfwredsum_vs    110001 . ..... ..... 001 ..... 1010111 @r_vm
 vfwredosum_vs   110011 . ..... ..... 001 ..... 1010111 @r_vm
 
+vmand_mm        011001 - ..... ..... 010 ..... 1010111 @r
+vmnand_mm       011101 - ..... ..... 010 ..... 1010111 @r
+vmandnot_mm     011000 - ..... ..... 010 ..... 1010111 @r
+vmor_mm         011010 - ..... ..... 010 ..... 1010111 @r
+vmxor_mm        011011 - ..... ..... 010 ..... 1010111 @r
+vmnor_mm        011110 - ..... ..... 010 ..... 1010111 @r
+vmornot_mm      011100 - ..... ..... 010 ..... 1010111 @r
+vmxnor_mm       011111 - ..... ..... 010 ..... 1010111 @r
+vmpopc_m        010100 . ..... ----- 010 ..... 1010111 @r2_vm
+vmfirst_m       010101 . ..... ----- 010 ..... 1010111 @r2_vm
+vmsbf_m         010110 . ..... 00001 010 ..... 1010111 @r2_vm
+vmsof_m         010110 . ..... 00010 010 ..... 1010111 @r2_vm
+vmsif_m         010110 . ..... 00011 010 ..... 1010111 @r2_vm
+viota_m         010110 . ..... 10000 010 ..... 1010111 @r2_vm
+vid_v           010110 . 00000 10001 010 ..... 1010111 @r1_vm
+
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
index 9a3d31b..85e435a 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -77,6 +77,17 @@  static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
     return true;                                       \
 }
 
+#define GEN_VECTOR_R1_VM(INSN) \
+static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
+{                                                      \
+    TCGv_i32 d  = tcg_const_i32(a->rd);                \
+    TCGv_i32 vm = tcg_const_i32(a->vm);                \
+    gen_helper_vector_##INSN(cpu_env, vm, d);        \
+    tcg_temp_free_i32(d);                              \
+    tcg_temp_free_i32(vm);                             \
+    return true;                                       \
+}
+
 #define GEN_VECTOR_R_VM(INSN) \
 static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \
 {                                                      \
@@ -444,5 +455,21 @@  GEN_VECTOR_R_VM(vfredmax_vs)
 GEN_VECTOR_R_VM(vfwredsum_vs)
 GEN_VECTOR_R_VM(vfwredosum_vs)
 
+GEN_VECTOR_R(vmandnot_mm)
+GEN_VECTOR_R(vmand_mm)
+GEN_VECTOR_R(vmor_mm)
+GEN_VECTOR_R(vmxor_mm)
+GEN_VECTOR_R(vmornot_mm)
+GEN_VECTOR_R(vmnand_mm)
+GEN_VECTOR_R(vmnor_mm)
+GEN_VECTOR_R(vmxnor_mm)
+GEN_VECTOR_R2_VM(vmpopc_m)
+GEN_VECTOR_R2_VM(vmfirst_m)
+GEN_VECTOR_R2_VM(vmsbf_m)
+GEN_VECTOR_R2_VM(vmsof_m)
+GEN_VECTOR_R2_VM(vmsif_m)
+GEN_VECTOR_R2_VM(viota_m)
+GEN_VECTOR_R1_VM(vid_v)
+
 GEN_VECTOR_R2_ZIMM(vsetvli)
 GEN_VECTOR_R(vsetvl)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 4a9083b..9e15df9 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1232,6 +1232,15 @@  static inline int vector_get_carry(CPURISCVState *env, int width, int lmul,
     return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1;
 }
 
+static inline int vector_mask_reg(CPURISCVState *env, uint32_t reg, int width,
+    int lmul, int index)
+{
+    int mlen = width / lmul;
+    int idx = (index * mlen) / 8;
+    int pos = (index * mlen) % 8;
+    return (env->vfp.vreg[reg].u8[idx] >> pos) & 0x1;
+}
+
 static inline void vector_mask_result(CPURISCVState *env, uint32_t reg,
         int width, int lmul, int index, uint32_t result)
 {
@@ -23996,3 +24005,629 @@  void VECTOR_HELPER(vfwredosum_vs)(CPURISCVState *env, uint32_t vm,
     env->vfp.vstart = 0;
     return;
 }
+
+/* vmandnot.mm vd, vs2, vs1 # vd = vs2 & ~vs1 */
+void VECTOR_HELPER(vmandnot_mm)(CPURISCVState *env, uint32_t rs1,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, i, vlmax;
+    uint32_t tmp;
+
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    vl = env->vfp.vl;
+    if (env->vfp.vstart >= vl) {
+        return;
+    }
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            tmp = ~vector_mask_reg(env, rs1, width, lmul, i) &
+                    vector_mask_reg(env, rs2, width, lmul, i);
+            vector_mask_result(env, rd, width, lmul, i, tmp);
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+
+    env->vfp.vstart = 0;
+    return;
+}
+/* vmand.mm vd, vs2, vs1 # vd = vs2 & vs1 */
+void VECTOR_HELPER(vmand_mm)(CPURISCVState *env, uint32_t rs1,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, i, vlmax;
+    uint32_t tmp;
+
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    vl = env->vfp.vl;
+    if (env->vfp.vstart >= vl) {
+        return;
+    }
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            tmp = vector_mask_reg(env, rs1, width, lmul, i) &
+                    vector_mask_reg(env, rs2, width, lmul, i);
+            vector_mask_result(env, rd, width, lmul, i, tmp);
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+
+    env->vfp.vstart = 0;
+    return;
+}
+/* vmor.mm vd, vs2, vs1 # vd = vs2 | vs1 */
+void VECTOR_HELPER(vmor_mm)(CPURISCVState *env, uint32_t rs1,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, i, vlmax;
+    uint32_t tmp;
+
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    vl = env->vfp.vl;
+    if (env->vfp.vstart >= vl) {
+        return;
+    }
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            tmp = vector_mask_reg(env, rs1, width, lmul, i) |
+                    vector_mask_reg(env, rs2, width, lmul, i);
+            vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+
+    env->vfp.vstart = 0;
+    return;
+}
+/* vmxor.mm vd, vs2, vs1 # vd = vs2 ^ vs1 */
+void VECTOR_HELPER(vmxor_mm)(CPURISCVState *env, uint32_t rs1,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, i, vlmax;
+    uint32_t tmp;
+
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    vl = env->vfp.vl;
+    if (env->vfp.vstart >= vl) {
+        return;
+    }
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
+                    vector_mask_reg(env, rs2, width, lmul, i);
+            vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+
+    env->vfp.vstart = 0;
+    return;
+}
+/* vmornot.mm vd, vs2, vs1 # vd = vs2 | ~vs1 */
+void VECTOR_HELPER(vmornot_mm)(CPURISCVState *env, uint32_t rs1,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, i, vlmax;
+    uint32_t tmp;
+
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    vl = env->vfp.vl;
+    if (env->vfp.vstart >= vl) {
+        return;
+    }
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            tmp = ~vector_mask_reg(env, rs1, width, lmul, i) |
+                    vector_mask_reg(env, rs2, width, lmul, i);
+            vector_mask_result(env, rd, width, lmul, i, tmp & 0x1);
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+
+    env->vfp.vstart = 0;
+    return;
+}
+/* vmnand.mm vd, vs2, vs1 # vd = ~(vs2 & vs1) */
+void VECTOR_HELPER(vmnand_mm)(CPURISCVState *env, uint32_t rs1,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, i, vlmax;
+    uint32_t tmp;
+
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    vl = env->vfp.vl;
+    if (env->vfp.vstart >= vl) {
+        return;
+    }
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            tmp = vector_mask_reg(env, rs1, width, lmul, i) &
+                    vector_mask_reg(env, rs2, width, lmul, i);
+            vector_mask_result(env, rd, width, lmul, i, (~tmp & 0x1));
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+
+    env->vfp.vstart = 0;
+    return;
+}
+/* vmnor.mm vd, vs2, vs1 # vd = ~(vs2 | vs1) */
+void VECTOR_HELPER(vmnor_mm)(CPURISCVState *env, uint32_t rs1,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, i, vlmax;
+    uint32_t tmp;
+
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    vl = env->vfp.vl;
+    if (env->vfp.vstart >= vl) {
+        return;
+    }
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            tmp = vector_mask_reg(env, rs1, width, lmul, i) |
+                    vector_mask_reg(env, rs2, width, lmul, i);
+            vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+
+    env->vfp.vstart = 0;
+    return;
+}
+
+/* vmxnor.mm vd, vs2, vs1 # vd = ~(vs2 ^ vs1) */
+void VECTOR_HELPER(vmxnor_mm)(CPURISCVState *env, uint32_t rs1,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, i, vlmax;
+    uint32_t tmp;
+
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    vl = env->vfp.vl;
+    if (env->vfp.vstart >= vl) {
+        return;
+    }
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            tmp = vector_mask_reg(env, rs1, width, lmul, i) ^
+                    vector_mask_reg(env, rs2, width, lmul, i);
+            vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1);
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+
+    env->vfp.vstart = 0;
+    return;
+}
+
+/* vmpopc.m rd, vs2, v0.t # x[rd] = sum_i ( vs2[i].LSB && v0[i].LSB ) */
+void VECTOR_HELPER(vmpopc_m)(CPURISCVState *env, uint32_t vm,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, vlmax;
+    int i;
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    vl = env->vfp.vl;
+
+    if (env->vfp.vstart != 0) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+    env->gpr[rd] = 0;
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < vl) {
+            if (vector_mask_reg(env, rs2, width, lmul, i) &&
+                vector_elem_mask(env, vm, width, lmul, i)) {
+                env->gpr[rd]++;
+            }
+        }
+    }
+    env->vfp.vstart = 0;
+    return;
+}
+
+/* vmfirst.m rd, vs2, vm */
+void VECTOR_HELPER(vmfirst_m)(CPURISCVState *env, uint32_t vm,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, vlmax;
+    int i;
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    vl = env->vfp.vl;
+
+    if (env->vfp.vstart != 0) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < vl) {
+            if (vector_mask_reg(env, rs2, width, lmul, i) &&
+                vector_elem_mask(env, vm, width, lmul, i)) {
+                env->gpr[rd] = i;
+                break;
+            }
+        } else {
+            env->gpr[rd] = -1;
+        }
+    }
+    env->vfp.vstart = 0;
+    return;
+}
+
+/* vmsbf.m vd, vs2, vm # set-before-first mask bit */
+void VECTOR_HELPER(vmsbf_m)(CPURISCVState *env, uint32_t vm,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, vlmax;
+    int i;
+    bool first_mask_bit = false;
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    vl = env->vfp.vl;
+
+    if (env->vfp.vstart != 0) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < vl) {
+            if (vector_elem_mask(env, vm, width, lmul, i)) {
+                if (first_mask_bit) {
+                    vector_mask_result(env, rd, width, lmul, i, 0);
+                    continue;
+                }
+                if (!vector_mask_reg(env, rs2, width, lmul, i)) {
+                    vector_mask_result(env, rd, width, lmul, i, 1);
+                } else {
+                    first_mask_bit = true;
+                    vector_mask_result(env, rd, width, lmul, i, 0);
+                }
+            }
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+    env->vfp.vstart = 0;
+    return;
+}
+
+/* vmsif.m vd, vs2, vm # set-including-first mask bit */
+void VECTOR_HELPER(vmsif_m)(CPURISCVState *env, uint32_t vm,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, vlmax;
+    int i;
+    bool first_mask_bit = false;
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    vl = env->vfp.vl;
+
+    if (env->vfp.vstart != 0) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < vl) {
+            if (vector_elem_mask(env, vm, width, lmul, i)) {
+                if (first_mask_bit) {
+                    vector_mask_result(env, rd, width, lmul, i, 0);
+                    continue;
+                }
+                if (!vector_mask_reg(env, rs2, width, lmul, i)) {
+                    vector_mask_result(env, rd, width, lmul, i, 1);
+                } else {
+                    first_mask_bit = true;
+                    vector_mask_result(env, rd, width, lmul, i, 1);
+                }
+            }
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+    env->vfp.vstart = 0;
+    return;
+}
+
+/* vmsof.m vd, vs2, vm # set-only-first mask bit */
+void VECTOR_HELPER(vmsof_m)(CPURISCVState *env, uint32_t vm,
+    uint32_t rs2, uint32_t rd)
+{
+    int width, lmul, vl, vlmax;
+    int i;
+    bool first_mask_bit = false;
+    if (vector_vtype_ill(env)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    lmul = vector_get_lmul(env);
+    vl = env->vfp.vl;
+
+    if (env->vfp.vstart != 0) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    for (i = 0; i < vlmax; i++) {
+        if (i < vl) {
+            if (vector_elem_mask(env, vm, width, lmul, i)) {
+                if (first_mask_bit) {
+                    vector_mask_result(env, rd, width, lmul, i, 0);
+                    continue;
+                }
+                if (!vector_mask_reg(env, rs2, width, lmul, i)) {
+                    vector_mask_result(env, rd, width, lmul, i, 0);
+                } else {
+                    first_mask_bit = true;
+                    vector_mask_result(env, rd, width, lmul, i, 1);
+                }
+            }
+        } else {
+            vector_mask_result(env, rd, width, lmul, i, 0);
+        }
+    }
+    env->vfp.vstart = 0;
+    return;
+}
+
+/* viota.m v4, v2, v0.t */
+void VECTOR_HELPER(viota_m)(CPURISCVState *env, uint32_t vm, uint32_t rs2,
+    uint32_t rd)
+{
+    int width, lmul, vl, vlmax;
+    int i, j, dest;
+    uint32_t sum = 0;
+
+    lmul = vector_get_lmul(env);
+    vl = env->vfp.vl;
+    if (vector_vtype_ill(env)
+        || vector_overlap_vm_force(vm, rd)
+        || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 1)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    if (env->vfp.vstart != 0) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    for (i = 0; i < vlmax; i++) {
+        dest = rd + (i / (VLEN / width));
+        j = i % (VLEN / width);
+        if (i < vl) {
+            switch (width) {
+            case 8:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    env->vfp.vreg[dest].u8[j] = sum;
+                    if (vector_mask_reg(env, rs2, width, lmul, i)) {
+                        sum++;
+                    }
+                }
+                break;
+            case 16:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    env->vfp.vreg[dest].u16[j] = sum;
+                    if (vector_mask_reg(env, rs2, width, lmul, i)) {
+                        sum++;
+                    }
+                }
+                break;
+            case 32:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    env->vfp.vreg[dest].u32[j] = sum;
+                    if (vector_mask_reg(env, rs2, width, lmul, i)) {
+                        sum++;
+                    }
+                }
+                break;
+            case 64:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    env->vfp.vreg[dest].u64[j] = sum;
+                    if (vector_mask_reg(env, rs2, width, lmul, i)) {
+                        sum++;
+                    }
+                }
+                break;
+            default:
+                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+                return;
+            }
+        } else {
+            vector_tail_common(env, dest, j, width);
+        }
+    }
+    env->vfp.vstart = 0;
+    return;
+}
+
+/* vid.v vd, vm # Write element ID to destination. */
+void VECTOR_HELPER(vid_v)(CPURISCVState *env, uint32_t vm, uint32_t rd)
+{
+    int width, lmul, vl, vlmax;
+    int i, j, dest;
+
+    lmul = vector_get_lmul(env);
+    vl = env->vfp.vl;
+
+    if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    vector_lmul_check_reg(env, lmul, rd, false);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    for (i = 0; i < vlmax; i++) {
+        dest = rd + (i / (VLEN / width));
+        j = i % (VLEN / width);
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            switch (width) {
+            case 8:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    env->vfp.vreg[dest].u8[j] = i;
+                }
+                break;
+            case 16:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    env->vfp.vreg[dest].u16[j] = i;
+                }
+                break;
+            case 32:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    env->vfp.vreg[dest].u32[j] = i;
+                }
+                break;
+            case 64:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    env->vfp.vreg[dest].u64[j] = i;
+                }
+                break;
+            default:
+                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+                return;
+            }
+        } else {
+            vector_tail_common(env, dest, j, width);
+        }
+    }
+    env->vfp.vstart = 0;
+    return;
+}