@@ -307,5 +307,42 @@ DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32)
DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32)
DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32)
+
DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32)
DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32)
@@ -447,5 +447,42 @@ vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm
vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm
vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm
+vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm
+vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm
+vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm
+vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm
+vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm
+vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm
+vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm
+vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm
+vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm
+vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm
+vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm
+vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm
+vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm
+vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm
+vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm
+vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm
+vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm
+vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm
+vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm
+vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm
+vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm
+vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm
+vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm
+vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm
+vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm
+vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm
+vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm
+vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm
+vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm
+vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm
+vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm
+vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm
+vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm
+vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm
+vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm
+vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm
+
vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
@@ -336,5 +336,42 @@ GEN_VECTOR_R_VM(vnclip_vv)
GEN_VECTOR_R_VM(vnclip_vx)
GEN_VECTOR_R_VM(vnclip_vi)
+GEN_VECTOR_R_VM(vfadd_vv)
+GEN_VECTOR_R_VM(vfadd_vf)
+GEN_VECTOR_R_VM(vfsub_vv)
+GEN_VECTOR_R_VM(vfsub_vf)
+GEN_VECTOR_R_VM(vfrsub_vf)
+GEN_VECTOR_R_VM(vfwadd_vv)
+GEN_VECTOR_R_VM(vfwadd_vf)
+GEN_VECTOR_R_VM(vfwadd_wv)
+GEN_VECTOR_R_VM(vfwadd_wf)
+GEN_VECTOR_R_VM(vfwsub_wv)
+GEN_VECTOR_R_VM(vfwsub_wf)
+GEN_VECTOR_R_VM(vfwsub_vv)
+GEN_VECTOR_R_VM(vfwsub_vf)
+GEN_VECTOR_R_VM(vfmul_vv)
+GEN_VECTOR_R_VM(vfmul_vf)
+GEN_VECTOR_R_VM(vfdiv_vv)
+GEN_VECTOR_R_VM(vfdiv_vf)
+GEN_VECTOR_R_VM(vfrdiv_vf)
+GEN_VECTOR_R_VM(vfwmul_vv)
+GEN_VECTOR_R_VM(vfwmul_vf)
+GEN_VECTOR_R_VM(vfmacc_vv)
+GEN_VECTOR_R_VM(vfmacc_vf)
+GEN_VECTOR_R_VM(vfnmacc_vv)
+GEN_VECTOR_R_VM(vfnmacc_vf)
+GEN_VECTOR_R_VM(vfmsac_vv)
+GEN_VECTOR_R_VM(vfmsac_vf)
+GEN_VECTOR_R_VM(vfnmsac_vv)
+GEN_VECTOR_R_VM(vfnmsac_vf)
+GEN_VECTOR_R_VM(vfmadd_vv)
+GEN_VECTOR_R_VM(vfmadd_vf)
+GEN_VECTOR_R_VM(vfnmadd_vv)
+GEN_VECTOR_R_VM(vfnmadd_vf)
+GEN_VECTOR_R_VM(vfmsub_vv)
+GEN_VECTOR_R_VM(vfmsub_vf)
+GEN_VECTOR_R_VM(vfnmsub_vv)
+GEN_VECTOR_R_VM(vfnmsub_vf)
+
GEN_VECTOR_R2_ZIMM(vsetvli)
GEN_VECTOR_R(vsetvl)
@@ -21,6 +21,7 @@
#include "exec/exec-all.h"
#include "exec/helper-proto.h"
#include "exec/cpu_ldst.h"
+#include "fpu/softfloat.h"
#include <math.h>
#define VECTOR_HELPER(name) HELPER(glue(vector_, name))
@@ -1125,6 +1126,41 @@ static void vector_tail_narrow(CPURISCVState *env, int vreg, int index,
}
}
+static void vector_tail_fcommon(CPURISCVState *env, int vreg, int index,
+ int width)
+{
+ switch (width) {
+ case 16:
+ env->vfp.vreg[vreg].u16[index] = 0;
+ break;
+ case 32:
+ env->vfp.vreg[vreg].u32[index] = 0;
+ break;
+ case 64:
+ env->vfp.vreg[vreg].u64[index] = 0;
+ break;
+ default:
+ helper_raise_exception(env, RISCV_EXCP_ILLEGAL_INST);
+ return;
+ }
+}
+
+static void vector_tail_fwiden(CPURISCVState *env, int vreg, int index,
+ int width)
+{
+ switch (width) {
+ case 16:
+ env->vfp.vreg[vreg].u32[index] = 0;
+ break;
+ case 32:
+ env->vfp.vreg[vreg].u64[index] = 0;
+ break;
+ default:
+ helper_raise_exception(env, RISCV_EXCP_ILLEGAL_INST);
+ return;
+ }
+}
+
static inline int vector_get_carry(CPURISCVState *env, int width, int lmul,
int index)
{
@@ -17114,3 +17150,2612 @@ void VECTOR_HELPER(vnclip_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
env->vfp.vstart = 0;
return;
}
+
+/* vfadd.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_add(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_add(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_add(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfadd.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_add(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_add(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_add(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfsub.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_sub(
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[src1].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_sub(
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[src1].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_sub(
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[src1].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - f[rs1] */
+void VECTOR_HELPER(vfsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_sub(
+ env->vfp.vreg[src2].f16[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_sub(
+ env->vfp.vreg[src2].f32[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_sub(
+ env->vfp.vreg[src2].f64[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - vs2[i] */
+void VECTOR_HELPER(vfrsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_sub(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_sub(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_sub(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfwadd.vv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_add(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_add(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfwadd.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (vector_vtype_ill(env)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_add(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_add(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfwadd.wv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_add(
+ env->vfp.vreg[src2].f32[k],
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_add(
+ env->vfp.vreg[src2].f64[k],
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfwadd.wf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwadd_wf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_add(
+ env->vfp.vreg[src2].f32[k],
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_add(
+ env->vfp.vreg[src2].f64[k],
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_widen(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfwsub.vv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_sub(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_sub(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfwsub.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_sub(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_sub(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfwsub.wv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_sub(
+ env->vfp.vreg[src2].f32[k],
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_sub(
+ env->vfp.vreg[src2].f64[k],
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfwsub.wf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwsub_wf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, true);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / (2 * width)));
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_sub(
+ env->vfp.vreg[src2].f32[k],
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_sub(
+ env->vfp.vreg[src2].f64[k],
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmul.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_mul(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_mul(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_mul(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmul.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_mul(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_mul(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_mul(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfdiv.vv vd, vs2, vs1, vm # Vector-vector */
+void VECTOR_HELPER(vfdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_div(
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[src1].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_div(
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[src1].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_div(
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[src1].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfdiv.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_div(
+ env->vfp.vreg[src2].f16[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_div(
+ env->vfp.vreg[src2].f32[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_div(
+ env->vfp.vreg[src2].f64[j],
+ env->fpr[rs1],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] */
+void VECTOR_HELPER(vfrdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_div(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_div(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_div(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfwmul.vv vd, vs2, vs1, vm # vector-vector */
+void VECTOR_HELPER(vfwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_mul(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->vfp.vreg[src1].f16[j], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_mul(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->vfp.vreg[src1].f32[j],
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+
+/* vfwmul.vf vd, vs2, rs1, vm # vector-scalar */
+void VECTOR_HELPER(vfwmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, k, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env)
+ || vector_overlap_vm_force(vm, rd)
+ || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, true);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / (2 * width)));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ k = i % (VLEN / (2 * width));
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[k] = float32_mul(
+ float16_to_float32(env->vfp.vreg[src2].f16[j], true,
+ &env->fp_status),
+ float16_to_float32(env->fpr[rs1], true,
+ &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[k] = float64_mul(
+ float32_to_float64(env->vfp.vreg[src2].f32[j],
+ &env->fp_status),
+ float32_to_float64(env->fpr[rs1], &env->fp_status),
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fwiden(env, dest, k, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */
+void VECTOR_HELPER(vfmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+
+/* vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */
+void VECTOR_HELPER(vfnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[src2].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i] */
+void VECTOR_HELPER(vfmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i] */
+void VECTOR_HELPER(vfmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ 0,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+/* vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - vs2[i] */
+void VECTOR_HELPER(vfnmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - vs2[i] */
+void VECTOR_HELPER(vfnmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_c |
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i] */
+void VECTOR_HELPER(vfmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+/* vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i] */
+void VECTOR_HELPER(vfmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_c,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+/* vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i] */
+void VECTOR_HELPER(vfnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src1, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs1, false);
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ src1 = rs1 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->vfp.vreg[src1].f16[j],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->vfp.vreg[src1].f32[j],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->vfp.vreg[src1].f64[j],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ return;
+
+ env->vfp.vstart = 0;
+}
+
+/* vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + vs2[i] */
+void VECTOR_HELPER(vfnmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1,
+ uint32_t rs2, uint32_t rd)
+{
+ int width, lmul, vl, vlmax;
+ int i, j, dest, src2;
+
+ lmul = vector_get_lmul(env);
+ vl = env->vfp.vl;
+
+ if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+
+ vector_lmul_check_reg(env, lmul, rs2, false);
+ vector_lmul_check_reg(env, lmul, rd, false);
+
+ if (env->vfp.vstart >= vl) {
+ return;
+ }
+
+ width = vector_get_width(env);
+ vlmax = vector_get_vlmax(env);
+
+ for (i = 0; i < vlmax; i++) {
+ dest = rd + (i / (VLEN / width));
+ src2 = rs2 + (i / (VLEN / width));
+ j = i % (VLEN / width);
+ if (i < env->vfp.vstart) {
+ continue;
+ } else if (i < vl) {
+ switch (width) {
+ case 16:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f16[j] = float16_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f16[j],
+ env->vfp.vreg[src2].f16[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 32:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f32[j] = float32_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f32[j],
+ env->vfp.vreg[src2].f32[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ case 64:
+ if (vector_elem_mask(env, vm, width, lmul, i)) {
+ env->vfp.vreg[dest].f64[j] = float64_muladd(
+ env->fpr[rs1],
+ env->vfp.vreg[dest].f64[j],
+ env->vfp.vreg[src2].f64[j],
+ float_muladd_negate_product,
+ &env->fp_status);
+ }
+ break;
+ default:
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+ return;
+ }
+ } else {
+ vector_tail_fcommon(env, dest, j, width);
+ }
+ }
+ env->vfp.vstart = 0;
+ return;
+}
+
+