[33/52] target-m68k: inline divu/divs
diff mbox

Message ID 1462396135-20925-1-git-send-email-laurent@vivier.eu
State New
Headers show

Commit Message

Laurent Vivier May 4, 2016, 9:08 p.m. UTC
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
 linux-user/main.c       |   7 ++
 target-m68k/cpu.h       |   4 -
 target-m68k/helper.h    |   2 -
 target-m68k/op_helper.c |  49 ------------
 target-m68k/qregs.def   |   2 -
 target-m68k/translate.c | 198 +++++++++++++++++++++++++++++++++++++++---------
 6 files changed, 169 insertions(+), 93 deletions(-)

Comments

Richard Henderson May 6, 2016, 7:44 p.m. UTC | #1
On 05/04/2016 11:08 AM, Laurent Vivier wrote:
> -void HELPER(divu)(CPUM68KState *env, uint32_t word)
> -{
> -    uint32_t num;
> -    uint32_t den;
> -    uint32_t quot;
> -    uint32_t rem;
> -
> -    num = env->div1;
> -    den = env->div2;
> -    /* ??? This needs to make sure the throwing location is accurate.  */
> -    if (den == 0) {
> -        raise_exception(env, EXCP_DIV0);
> -    }

Because of the exception, and required branching, I question taking the 
division operations back inline.  The throwing location is easily fixed; create 
a version of raise_exception that has an argument for GETPC and uses 
cpu_loop_exit_restore.

> @@ -1179,64 +1187,182 @@ DISAS_INSN(mulw)
>
>  DISAS_INSN(divw)
>  {
> +    TCGLabel *l1;
> +    TCGv t0, src;
> +    TCGv quot, rem;
>      int sign;
>
>      sign = (insn & 0x100) != 0;
> +
> +    tcg_gen_movi_i32(QREG_CC_C, 0); /* C is always cleared, use as 0 */
> +
> +    /* dest.l / src.w */
> +
> +    SRC_EA(env, t0, OS_WORD, sign, NULL);
> +
> +    src = tcg_temp_local_new_i32();
> +    tcg_gen_mov_i32(src, t0);
> +    l1 = gen_new_label();
> +    tcg_gen_brcondi_i32(TCG_COND_NE, src, 0, l1);
> +    tcg_gen_movi_i32(QREG_PC, s->insn_pc);
> +    gen_raise_exception(EXCP_DIV0);
> +    gen_set_label(l1);
> +
> +    tcg_gen_movi_i32(QREG_CC_C, 0); /* C is always cleared, use as 0 */

Redundant store to CC_C?  Is that simply so that the 0 is forward propagated 
within the second block by the tcg optimizer?  If so, the comments could be 
clearer.

> +    quot = tcg_temp_new();
> +    rem = tcg_temp_new();
>      if (sign) {
> -        gen_helper_divs(cpu_env, tcg_const_i32(1));
> +        tcg_gen_div_i32(quot, DREG(insn, 9), src);
> +        tcg_gen_rem_i32(rem, DREG(insn, 9), src);
> +        tcg_gen_ext16s_i32(QREG_CC_V, quot);
> +        tcg_gen_movi_i32(src, -1);
> +        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_V,
> +                            QREG_CC_V, quot,
> +                            QREG_CC_C /* 0 */, src /* -1 */);

setcond + neg is better than movcond here.

> +        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_V,
> +                            QREG_CC_V, QREG_CC_C /* 0 */,
> +                            QREG_CC_V /* 0 */, src /* -1 */);

Likewise.

> +    /* result rem:quot */
> +
> +    tcg_gen_ext16u_i32(quot, quot);
> +    tcg_gen_deposit_i32(quot, quot, rem, 16, 16);

The ext16u is redundant with the deposit.

> +    tcg_temp_free(rem);
> +
> +    /* on overflow, operands and flags are unaffected */
> +
> +    tcg_gen_movcond_i32(TCG_COND_EQ, DREG(insn, 9),
> +                        QREG_CC_V, QREG_CC_C /* zero */,
> +                        quot, DREG(insn, 9));
> +    tcg_gen_ext16s_i32(quot, quot);
> +    tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_Z,
> +                        QREG_CC_V, QREG_CC_C /* zero */,
> +                        quot, QREG_CC_Z);
> +    tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_N,
> +                        QREG_CC_V, QREG_CC_C /* zero */,
> +                        quot, QREG_CC_N);
> +    tcg_temp_free(quot);

Interesting.  The manual says "may", as in "may or may not".  Was the existing 
behaviour that didn't have early check for overflow a bug, or simply a bug for 
exactly emulating specific cpu models?

Anyway, this multiplicity of movcond is another reason to consider leaving the 
code out of line in a helper.

>      set_cc_op(s, CC_OP_FLAGS);

This needs to happen before the branch, along with an update_cc_op, surely. 
Otherwise the store to CC_C doesn't necessarily have an effect on the computed 
flags.

> +    if (sign) {
> +        tcg_gen_ext32s_i64(quot64, quot64);
> +        tcg_gen_extrh_i64_i32(rem, quot64);

This is a long way around to simply extract the same sign bit out of quot, i.e.

     tcg_gen_sari_i32(rem, quot, 31);

> +        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_V,
> +                            QREG_CC_V, rem,
> +                            QREG_CC_C /* 0 */, minusone /* -1 */);

Again with setcond + neg vs movcond.


r~

Patch
diff mbox

diff --git a/linux-user/main.c b/linux-user/main.c
index 5f3ec97..74b02c7 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3034,6 +3034,13 @@  void cpu_loop(CPUM68KState *env)
             info._sifields._sigfault._addr = env->pc;
             queue_signal(env, info.si_signo, &info);
             break;
+        case EXCP_DIV0:
+            info.si_signo = TARGET_SIGFPE;
+            info.si_errno = 0;
+            info.si_code = TARGET_FPE_INTDIV;
+            info._sifields._sigfault._addr = env->pc;
+            queue_signal(env, info.si_signo, &info);
+            break;
         case EXCP_TRAP0:
             {
                 ts->sim_syscalls = 0;
diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
index 9415d2b..b300a92 100644
--- a/target-m68k/cpu.h
+++ b/target-m68k/cpu.h
@@ -94,10 +94,6 @@  typedef struct CPUM68KState {
     uint32_t macsr;
     uint32_t mac_mask;
 
-    /* Temporary storage for DIV helpers.  */
-    uint32_t div1;
-    uint32_t div2;
-
     /* MMU status.  */
     struct {
         uint32_t ar;
diff --git a/target-m68k/helper.h b/target-m68k/helper.h
index 0b819cb..80ee4f8 100644
--- a/target-m68k/helper.h
+++ b/target-m68k/helper.h
@@ -2,8 +2,6 @@  DEF_HELPER_1(bitrev, i32, i32)
 DEF_HELPER_1(ff1, i32, i32)
 DEF_HELPER_2(bfffo, i32, i32, i32)
 DEF_HELPER_FLAGS_2(sats, TCG_CALL_NO_RWG_SE, i32, i32, i32)
-DEF_HELPER_2(divu, void, env, i32)
-DEF_HELPER_2(divs, void, env, i32)
 DEF_HELPER_2(set_sr, void, env, i32)
 DEF_HELPER_3(movec, void, env, i32, i32)
 
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index 71caba9..bf3c813 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -245,52 +245,3 @@  void HELPER(bitfield_store)(CPUM68KState *env, uint32_t addr, uint32_t offset,
         break;
     }
 }
-
-void HELPER(divu)(CPUM68KState *env, uint32_t word)
-{
-    uint32_t num;
-    uint32_t den;
-    uint32_t quot;
-    uint32_t rem;
-
-    num = env->div1;
-    den = env->div2;
-    /* ??? This needs to make sure the throwing location is accurate.  */
-    if (den == 0) {
-        raise_exception(env, EXCP_DIV0);
-    }
-    quot = num / den;
-    rem = num % den;
-
-    env->cc_v = (word && quot > 0xffff ? -1 : 0);
-    env->cc_z = quot;
-    env->cc_n = quot;
-    env->cc_c = 0;
-
-    env->div1 = quot;
-    env->div2 = rem;
-}
-
-void HELPER(divs)(CPUM68KState *env, uint32_t word)
-{
-    int32_t num;
-    int32_t den;
-    int32_t quot;
-    int32_t rem;
-
-    num = env->div1;
-    den = env->div2;
-    if (den == 0) {
-        raise_exception(env, EXCP_DIV0);
-    }
-    quot = num / den;
-    rem = num % den;
-
-    env->cc_v = (word && quot != (int16_t)quot ? -1 : 0);
-    env->cc_z = quot;
-    env->cc_n = quot;
-    env->cc_c = 0;
-
-    env->div1 = quot;
-    env->div2 = rem;
-}
diff --git a/target-m68k/qregs.def b/target-m68k/qregs.def
index 156c0f5..51ff43b 100644
--- a/target-m68k/qregs.def
+++ b/target-m68k/qregs.def
@@ -7,7 +7,5 @@  DEFO32(CC_C, cc_c)
 DEFO32(CC_N, cc_n)
 DEFO32(CC_V, cc_v)
 DEFO32(CC_Z, cc_z)
-DEFO32(DIV1, div1)
-DEFO32(DIV2, div2)
 DEFO32(MACSR, macsr)
 DEFO32(MAC_MASK, mac_mask)
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 00fd2f1..b47f9c1 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -1047,11 +1047,19 @@  static void gen_jmp(DisasContext *s, TCGv dest)
     s->is_jmp = DISAS_JUMP;
 }
 
+static inline void gen_raise_exception(int nr)
+{
+    TCGv_i32 tmp = tcg_const_i32(nr);
+
+    gen_helper_raise_exception(cpu_env, tmp);
+    tcg_temp_free_i32(tmp);
+}
+
 static void gen_exception(DisasContext *s, uint32_t where, int nr)
 {
     update_cc_op(s);
     gen_jmp_im(s, where);
-    gen_helper_raise_exception(cpu_env, tcg_const_i32(nr));
+    gen_raise_exception(nr);
 }
 
 static inline void gen_addr_fault(DisasContext *s)
@@ -1179,64 +1187,182 @@  DISAS_INSN(mulw)
 
 DISAS_INSN(divw)
 {
-    TCGv reg;
-    TCGv tmp;
-    TCGv src;
+    TCGLabel *l1;
+    TCGv t0, src;
+    TCGv quot, rem;
     int sign;
 
     sign = (insn & 0x100) != 0;
-    reg = DREG(insn, 9);
-    if (sign) {
-        tcg_gen_ext16s_i32(QREG_DIV1, reg);
-    } else {
-        tcg_gen_ext16u_i32(QREG_DIV1, reg);
-    }
-    SRC_EA(env, src, OS_WORD, sign, NULL);
-    tcg_gen_mov_i32(QREG_DIV2, src);
+
+    tcg_gen_movi_i32(QREG_CC_C, 0); /* C is always cleared, use as 0 */
+
+    /* dest.l / src.w */
+
+    SRC_EA(env, t0, OS_WORD, sign, NULL);
+
+    src = tcg_temp_local_new_i32();
+    tcg_gen_mov_i32(src, t0);
+    l1 = gen_new_label();
+    tcg_gen_brcondi_i32(TCG_COND_NE, src, 0, l1);
+    tcg_gen_movi_i32(QREG_PC, s->insn_pc);
+    gen_raise_exception(EXCP_DIV0);
+    gen_set_label(l1);
+
+    tcg_gen_movi_i32(QREG_CC_C, 0); /* C is always cleared, use as 0 */
+
+    quot = tcg_temp_new();
+    rem = tcg_temp_new();
     if (sign) {
-        gen_helper_divs(cpu_env, tcg_const_i32(1));
+        tcg_gen_div_i32(quot, DREG(insn, 9), src);
+        tcg_gen_rem_i32(rem, DREG(insn, 9), src);
+        tcg_gen_ext16s_i32(QREG_CC_V, quot);
+        tcg_gen_movi_i32(src, -1);
+        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_V,
+                            QREG_CC_V, quot,
+                            QREG_CC_C /* 0 */, src /* -1 */);
     } else {
-        gen_helper_divu(cpu_env, tcg_const_i32(1));
+        tcg_gen_divu_i32(quot, DREG(insn, 9), src);
+        tcg_gen_remu_i32(rem, DREG(insn, 9), src);
+        tcg_gen_shri_i32(QREG_CC_V, quot, 16);
+        tcg_gen_movi_i32(src, -1);
+        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_V,
+                            QREG_CC_V, QREG_CC_C /* 0 */,
+                            QREG_CC_V /* 0 */, src /* -1 */);
     }
+    tcg_temp_free(src);
 
-    tmp = tcg_temp_new();
-    src = tcg_temp_new();
-    tcg_gen_ext16u_i32(tmp, QREG_DIV1);
-    tcg_gen_shli_i32(src, QREG_DIV2, 16);
-    tcg_gen_or_i32(reg, tmp, src);
+    /* result rem:quot */
+
+    tcg_gen_ext16u_i32(quot, quot);
+    tcg_gen_deposit_i32(quot, quot, rem, 16, 16);
+    tcg_temp_free(rem);
+
+    /* on overflow, operands and flags are unaffected */
+
+    tcg_gen_movcond_i32(TCG_COND_EQ, DREG(insn, 9),
+                        QREG_CC_V, QREG_CC_C /* zero */,
+                        quot, DREG(insn, 9));
+    tcg_gen_ext16s_i32(quot, quot);
+    tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_Z,
+                        QREG_CC_V, QREG_CC_C /* zero */,
+                        quot, QREG_CC_Z);
+    tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_N,
+                        QREG_CC_V, QREG_CC_C /* zero */,
+                        quot, QREG_CC_N);
+    tcg_temp_free(quot);
 
     set_cc_op(s, CC_OP_FLAGS);
 }
 
 DISAS_INSN(divl)
 {
-    TCGv num;
-    TCGv den;
-    TCGv reg;
+    TCGLabel *l1;
+    TCGv t0, den, minusone, quot, rem;
+    TCGv_i64 num64, den64, quot64, rem64;
     uint16_t ext;
+    int sign;
 
     ext = read_im16(env, s);
-    if (ext & 0x87f8) {
+
+    if ((ext & 0x400) && !m68k_feature(s->env, M68K_FEATURE_QUAD_MULDIV)) {
         gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
         return;
     }
-    num = DREG(ext, 12);
-    reg = DREG(ext, 0);
-    tcg_gen_mov_i32(QREG_DIV1, num);
-    SRC_EA(env, den, OS_LONG, 0, NULL);
-    tcg_gen_mov_i32(QREG_DIV2, den);
-    if (ext & 0x0800) {
-        gen_helper_divs(cpu_env, tcg_const_i32(0));
+
+    sign = ext & 0x0800;
+
+    tcg_gen_movi_i32(QREG_CC_C, 0); /* used as 0 later */
+
+    SRC_EA(env, t0, OS_LONG, 0, NULL);
+
+    den = tcg_temp_local_new_i32();
+    tcg_gen_mov_i32(den, t0);
+    l1 = gen_new_label();
+    tcg_gen_brcondi_i32(TCG_COND_NE, den, 0, l1);
+    tcg_gen_movi_i32(QREG_PC, s->insn_pc);
+    gen_raise_exception(EXCP_DIV0);
+    gen_set_label(l1);
+
+    num64 = tcg_temp_new_i64();
+    den64 = tcg_temp_new_i64();
+    quot64 = tcg_temp_new_i64();
+    rem64 = tcg_temp_new_i64();
+    if (ext & 0x400) {
+        tcg_gen_concat_i32_i64(num64, DREG(ext, 12), DREG(ext, 0));
+    } else {
+        if (sign) {
+            tcg_gen_ext_i32_i64(num64, DREG(ext, 12));
+        } else {
+            tcg_gen_extu_i32_i64(num64, DREG(ext, 12));
+        }
+    }
+
+    if (sign) {
+        tcg_gen_ext_i32_i64(den64, den);
+        tcg_gen_div_i64(quot64, num64, den64);
+        tcg_gen_rem_i64(rem64, num64, den64);
+    } else {
+        tcg_gen_extu_i32_i64(den64, den);
+        tcg_gen_divu_i64(quot64, num64, den64);
+        tcg_gen_remu_i64(rem64, num64, den64);
+    }
+    tcg_temp_free_i64(den64);
+    tcg_temp_free_i64(num64);
+
+    /* compute result and overflow flag */
+
+    quot = tcg_temp_new();
+    tcg_gen_extr_i64_i32(quot, QREG_CC_V, quot64);
+
+    rem = tcg_temp_new();
+    minusone = tcg_const_i32(-1);
+    if (sign) {
+        tcg_gen_ext32s_i64(quot64, quot64);
+        tcg_gen_extrh_i64_i32(rem, quot64);
+        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_V,
+                            QREG_CC_V, rem,
+                            QREG_CC_C /* 0 */, minusone /* -1 */);
     } else {
-        gen_helper_divu(cpu_env, tcg_const_i32(0));
+        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_V,
+                            QREG_CC_V, QREG_CC_C /* 0 */ ,
+                            QREG_CC_V /* 0 */, minusone /* -1 */);
     }
-    if ((ext & 7) == ((ext >> 12) & 7)) {
-        /* div */
-        tcg_gen_mov_i32 (reg, QREG_DIV1);
+    tcg_temp_free(minusone);
+    tcg_temp_free_i64(quot64);
+
+    tcg_gen_extrl_i64_i32(rem, rem64);
+    tcg_temp_free_i64(rem64);
+
+    /* on overflow, operands and flags are unaffected */
+
+    if (m68k_feature(s->env, M68K_FEATURE_CF_ISA_A)) {
+        if (REG(ext, 0) == REG(ext, 12)) {
+            /* div */
+            tcg_gen_movcond_i32(TCG_COND_EQ, DREG(ext, 12),
+                                QREG_CC_V, QREG_CC_C /* zero */ ,
+                                quot, DREG(ext, 12));
+        } else {
+            /* rem */
+            tcg_gen_movcond_i32(TCG_COND_EQ, DREG(ext, 0),
+                                QREG_CC_V, QREG_CC_C /* zero */ ,
+                                rem, DREG(ext, 0));
+        }
     } else {
-        /* rem */
-        tcg_gen_mov_i32 (reg, QREG_DIV2);
+        tcg_gen_movcond_i32(TCG_COND_EQ, DREG(ext, 0),
+                            QREG_CC_V, QREG_CC_C /* zero */ ,
+                            rem, DREG(ext, 0));
+        tcg_gen_movcond_i32(TCG_COND_EQ, DREG(ext, 12),
+                            QREG_CC_V, QREG_CC_C /* zero */ ,
+                            quot, DREG(ext, 12));
     }
+    tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_Z,
+                        QREG_CC_V, QREG_CC_C /* zero */ ,
+                        quot, QREG_CC_Z);
+    tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_N,
+                        QREG_CC_V, QREG_CC_C /* zero */ ,
+                        quot, QREG_CC_N);
+    tcg_temp_free(quot);
+
     set_cc_op(s, CC_OP_FLAGS);
 }