@@ -63,6 +63,7 @@ struct CPUMIPSFPUContext {
uint32_t fcr31_rw_bitmask;
uint32_t fcr31;
#define FCR31_FS 24
+#define FCR31_MAC2008 20
#define FCR31_ABS2008 19
#define FCR31_NAN2008 18
#define SET_FP_COND(num, env) do { ((env).fcr31) |= \
@@ -1357,7 +1357,7 @@ FLOAT_MINMAX(mina_d, 64, minnummag)
} \
}
-/* FMA based operations */
+/* FMA based operations (both unfused and fused) */
#define FLOAT_FMA(name, type) \
uint64_t helper_float_ ## name ## _d(CPUMIPSState *env, \
uint64_t fdt0, uint64_t fdt1, \
@@ -1392,33 +1392,52 @@ uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env, \
UNFUSED_FMA(float32, fsth0, fsth1, fsth2, type); \
update_fcr31(env, GETPC()); \
return ((uint64_t)fsth0 << 32) | fst0; \
+} \
+uint64_t helper_float_ ## name ## f_d(CPUMIPSState *env, \
+ uint64_t fdt0, uint64_t fdt1, \
+ uint64_t fdt2) \
+{ \
+ fdt0 = float64_muladd(fdt0, fdt1, fdt2, type, \
+ &env->active_fpu.fp_status); \
+ update_fcr31(env, GETPC()); \
+ return fdt0; \
+} \
+ \
+uint32_t helper_float_ ## name ## f_s(CPUMIPSState *env, \
+ uint32_t fst0, uint32_t fst1, \
+ uint32_t fst2) \
+{ \
+ fst0 = float32_muladd(fst0, fst1, fst2, type, \
+ &env->active_fpu.fp_status); \
+ update_fcr31(env, GETPC()); \
+ return fst0; \
+} \
+ \
+uint64_t helper_float_ ## name ## f_ps(CPUMIPSState *env, \
+ uint64_t fdt0, uint64_t fdt1, \
+ uint64_t fdt2) \
+{ \
+ uint32_t fst0 = fdt0 & 0XFFFFFFFF; \
+ uint32_t fsth0 = fdt0 >> 32; \
+ uint32_t fst1 = fdt1 & 0XFFFFFFFF; \
+ uint32_t fsth1 = fdt1 >> 32; \
+ uint32_t fst2 = fdt2 & 0XFFFFFFFF; \
+ uint32_t fsth2 = fdt2 >> 32; \
+ \
+ fst0 = float32_muladd(fst0, fst1, fst2, type, \
+ &env->active_fpu.fp_status); \
+ fsth0 = float32_muladd(fsth0, fsth1, fsth2, type, \
+ &env->active_fpu.fp_status); \
+ update_fcr31(env, GETPC()); \
+ return ((uint64_t)fsth0 << 32) | fst0; \
}
+
FLOAT_FMA(madd, 0)
FLOAT_FMA(msub, float_muladd_negate_c)
FLOAT_FMA(nmadd, float_muladd_negate_result)
FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
#undef FLOAT_FMA
-#define FLOAT_FMADDSUB(name, bits, muladd_arg) \
-uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \
- uint ## bits ## _t fs, \
- uint ## bits ## _t ft, \
- uint ## bits ## _t fd) \
-{ \
- uint ## bits ## _t fdret; \
- \
- fdret = float ## bits ## _muladd(fs, ft, fd, muladd_arg, \
- &env->active_fpu.fp_status); \
- update_fcr31(env, GETPC()); \
- return fdret; \
-}
-
-FLOAT_FMADDSUB(maddf_s, 32, 0)
-FLOAT_FMADDSUB(maddf_d, 64, 0)
-FLOAT_FMADDSUB(msubf_s, 32, float_muladd_negate_product)
-FLOAT_FMADDSUB(msubf_d, 64, float_muladd_negate_product)
-#undef FLOAT_FMADDSUB
-
/* compare operations */
#define FOP_COND_D(op, cond) \
void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0, \
@@ -232,13 +232,6 @@ DEF_HELPER_3(float_mulr_ps, i64, env, i64, i64)
DEF_HELPER_FLAGS_2(float_class_s, TCG_CALL_NO_RWG_SE, i32, env, i32)
DEF_HELPER_FLAGS_2(float_class_d, TCG_CALL_NO_RWG_SE, i64, env, i64)
-#define FOP_PROTO(op) \
-DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32) \
-DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)
-FOP_PROTO(maddf)
-FOP_PROTO(msubf)
-#undef FOP_PROTO
-
#define FOP_PROTO(op) \
DEF_HELPER_3(float_ ## op ## _s, i32, env, i32, i32) \
DEF_HELPER_3(float_ ## op ## _d, i64, env, i64, i64)
@@ -305,7 +298,10 @@ FOP_PROTO(rsqrt2)
#define FOP_PROTO(op) \
DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32) \
DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64) \
-DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64)
+DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64) \
+DEF_HELPER_4(float_ ## op ## f_s, i32, env, i32, i32, i32) \
+DEF_HELPER_4(float_ ## op ## f_d, i64, env, i64, i64, i64) \
+DEF_HELPER_4(float_ ## op ## f_ps, i64, env, i64, i64, i64)
FOP_PROTO(madd)
FOP_PROTO(msub)
FOP_PROTO(nmadd)
@@ -2547,6 +2547,7 @@ typedef struct DisasContext {
bool mrp;
bool nan2008;
bool abs2008;
+ bool mac2008;
bool saar;
bool mi;
int gi;
@@ -12776,7 +12777,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr32(ctx, fp0, fs);
gen_load_fpr32(ctx, fp1, ft);
gen_load_fpr32(ctx, fp2, fr);
- gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i32(fp0);
tcg_temp_free_i32(fp1);
gen_store_fpr32(ctx, fp2, fd);
@@ -12794,7 +12799,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_maddf_d(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -12811,7 +12820,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_maddf_ps(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -12828,7 +12841,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr32(ctx, fp0, fs);
gen_load_fpr32(ctx, fp1, ft);
gen_load_fpr32(ctx, fp2, fr);
- gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_msubf_s(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i32(fp0);
tcg_temp_free_i32(fp1);
gen_store_fpr32(ctx, fp2, fd);
@@ -12846,7 +12863,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_msubf_d(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -12863,7 +12884,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_msubf_ps(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -12880,7 +12905,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr32(ctx, fp0, fs);
gen_load_fpr32(ctx, fp1, ft);
gen_load_fpr32(ctx, fp2, fr);
- gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_nmaddf_s(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i32(fp0);
tcg_temp_free_i32(fp1);
gen_store_fpr32(ctx, fp2, fd);
@@ -12898,7 +12927,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_nmaddf_d(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -12915,7 +12948,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_nmaddf_ps(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -12932,7 +12969,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr32(ctx, fp0, fs);
gen_load_fpr32(ctx, fp1, ft);
gen_load_fpr32(ctx, fp2, fr);
- gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_nmsubf_s(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i32(fp0);
tcg_temp_free_i32(fp1);
gen_store_fpr32(ctx, fp2, fd);
@@ -12950,7 +12991,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_nmsubf_d(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -12967,7 +13012,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+ if (ctx->mac2008) {
+ gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+ } else {
+ gen_helper_float_nmsubf_ps(fp2, cpu_env, fp0, fp1, fp2);
+ }
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -30807,6 +30856,7 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
ctx->mrp = (env->CP0_Config5 >> CP0C5_MRP) & 1;
ctx->nan2008 = (env->active_fpu.fcr31 >> FCR31_NAN2008) & 1;
ctx->abs2008 = (env->active_fpu.fcr31 >> FCR31_ABS2008) & 1;
+ ctx->mac2008 = (env->active_fpu.fcr31 >> FCR31_MAC2008) & 1;
ctx->mi = (env->CP0_Config5 >> CP0C5_MI) & 1;
ctx->gi = (env->CP0_Config5 >> CP0C5_GI) & 3;
restore_cpu_state(env, ctx);
MAC2008 was introduced in MIPS Release 3 but removed in MIPS Release 5. However, there are some processors implemented this feature. some Ingenic MCU can config MAC2008 status runtime while whole Loongson-64 family are MAC2008 only. FCSR.MAC2008 bit indicates FMA family of instructions on these processors have fused behavior, similiar to FMA in Release 6, so we can reuse helpers with them. Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> --- target/mips/cpu.h | 1 + target/mips/fpu_helper.c | 61 +++++++++++++++++++++------------ target/mips/helper.h | 12 +++---- target/mips/translate.c | 74 +++++++++++++++++++++++++++++++++------- 4 files changed, 107 insertions(+), 41 deletions(-)