diff mbox

[v2,3/3] target-m68k: add cas/cas2 ops

Message ID 1478121319-31986-4-git-send-email-laurent@vivier.eu (mailing list archive)
State New, archived
Headers show

Commit Message

Laurent Vivier Nov. 2, 2016, 9:15 p.m. UTC
Implement CAS using cmpxchg.
Implement CAS2 using helper and either cmpxchg when
the 32bit addresses are consecutive, or with
parallel_cpus+cpu_loop_exit_atomic() otherwise.

Suggested-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
 target-m68k/helper.h    |   2 +
 target-m68k/op_helper.c | 133 ++++++++++++++++++++++++++++++++++++++++++++
 target-m68k/translate.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 278 insertions(+)

Comments

Richard Henderson Nov. 3, 2016, 4:36 p.m. UTC | #1
On 11/02/2016 03:15 PM, Laurent Vivier wrote:
> +    if (c1 != l1) {
> +        env->cc_n = l1;
> +        env->cc_v = c1;
> +    } else {
> +        env->cc_n = l2;
> +        env->cc_v = c2;
> +    }
> +    env->cc_op = CC_OP_CMPL;
> +    env->dregs[Dc1] = deposit32(env->dregs[Dc1], 0, 16, l1);
> +    env->dregs[Dc2] = deposit32(env->dregs[Dc2], 0, 16, l2);

CC_OP_CMPW for cas2w.

> +void HELPER(cas2l)(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2)
> +{
> +    uint32_t Dc1 = extract32(regs, 9, 3);
> +    uint32_t Dc2 = extract32(regs, 6, 3);
> +    uint32_t Du1 = extract32(regs, 3, 3);
> +    uint32_t Du2 = extract32(regs, 0, 3);
> +    uint32_t c1 = env->dregs[Dc1];
> +    uint32_t c2 = env->dregs[Dc2];
> +    uint32_t u1 = env->dregs[Du1];
> +    uint32_t u2 = env->dregs[Du2];
> +    uint32_t l1, l2;
> +    uint64_t c, u, l;
> +    uintptr_t ra = GETPC();
> +#ifndef CONFIG_USER_ONLY
> +    int mmu_idx = cpu_mmu_index(env, 0);
> +    TCGMemOpIdx oi;
> +#endif
> +
> +    if (parallel_cpus) {
> +        /* We're executing in a parallel context -- must be atomic.  */
> +        if ((a1 & 7) == 0 && a2 == a1 + 4) {
> +            c = deposit64(c2, 32, 32, c1);
> +            u = deposit64(u2, 32, 32, u1);
> +#ifdef CONFIG_USER_ONLY
> +            uint64_t *ha1 = g2h(a1);
> +            l = atomic_cmpxchg__nocheck(ha1, c, u);
> +#else
> +            oi = make_memop_idx(MO_BEQ, mmu_idx);
> +            l = helper_atomic_cmpxchgq_be_mmu(env, a1, c, u, oi, ra);
> +#endif

We do need a check here for CONFIG_ATOMIC64.  If that's not set, the host 
doesn't have 64-bit cmpxchg.  Probably arrange this as

if (parallel_cpus) {
#ifdef CONFIG_ATOMIC64
     if ((a1 & 7) ...) {
        ...
     } else if ((a2 & 7) ...) {
        ...
     } else
#endif
     {
         /* Tell the main loop we need to serialize this insn.  */
         cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
     }
} else {
    ...
}

Which is pretty ugly, but the best we can do without re-organizing the helpers.

> +   regs = tcg_const_i32(REG(ext2, 6) |
> +                        (REG(ext1, 6) << 3) |
> +                        (REG(ext2, 0) << 6) |
> +                        (REG(ext1, 0) << 9));
> +   gen_helper_cas2w(cpu_env, regs, addr1, addr2);
> +   tcg_temp_free(regs);

Need

   /* Note that cas2w also assigned to env->cc_op.  */
   s->cc_op = CC_OP_CMPW;
   s->cc_op_synced = 1;

> +DISAS_INSN(cas2l)
> +{
...
> +   regs = tcg_const_i32(REG(ext2, 6) |
> +                        (REG(ext1, 6) << 3) |
> +                        (REG(ext2, 0) << 6) |
> +                        (REG(ext1, 0) << 9));
> +   gen_helper_cas2w(cpu_env, regs, addr1, addr2);

cas2l.

Also need to set cc_op to CC_OP_CMPL, as above.


r~
Laurent Vivier Nov. 3, 2016, 6:03 p.m. UTC | #2
Le 03/11/2016 à 17:36, Richard Henderson a écrit :
> On 11/02/2016 03:15 PM, Laurent Vivier wrote:
>> +    if (c1 != l1) {
>> +        env->cc_n = l1;
>> +        env->cc_v = c1;
>> +    } else {
>> +        env->cc_n = l2;
>> +        env->cc_v = c2;
>> +    }
>> +    env->cc_op = CC_OP_CMPL;
>> +    env->dregs[Dc1] = deposit32(env->dregs[Dc1], 0, 16, l1);
>> +    env->dregs[Dc2] = deposit32(env->dregs[Dc2], 0, 16, l2);
> 
> CC_OP_CMPW for cas2w.

It was working because I have used helper_be_ldsw_mmu() to load values,
is it better to use helper_be_lduw_mmu with CC_OP_CMPW?

>> +DISAS_INSN(cas2l)
>> +{
> ...
>> +   regs = tcg_const_i32(REG(ext2, 6) |
>> +                        (REG(ext1, 6) << 3) |
>> +                        (REG(ext2, 0) << 6) |
>> +                        (REG(ext1, 0) << 9));
>> +   gen_helper_cas2w(cpu_env, regs, addr1, addr2);
> 
> cas2l.

I should not use values with the high word equal to the low word to test
this...

Many thanks,
Laurent
Richard Henderson Nov. 3, 2016, 7:20 p.m. UTC | #3
On 11/03/2016 12:03 PM, Laurent Vivier wrote:
>> CC_OP_CMPW for cas2w.
>
> It was working because I have used helper_be_ldsw_mmu() to load values,
> is it better to use helper_be_lduw_mmu with CC_OP_CMPW?

IIRC, one needs the extra sign-extension here:

     case CC_OP_CMPB:                                                       \
     case CC_OP_CMPW:                                                       \
     case CC_OP_CMPL:                                                       \
         src1 = n;                                                          \
         src2 = v;                                                          \
         res = EXTSIGN(src1 - src2, op - CC_OP_CMPB);                       \

to get all of the flags correct, even though just Z would be correct without.


r~
diff mbox

Patch

diff --git a/target-m68k/helper.h b/target-m68k/helper.h
index d863e55..17ec342 100644
--- a/target-m68k/helper.h
+++ b/target-m68k/helper.h
@@ -9,6 +9,8 @@  DEF_HELPER_4(divull, void, env, int, int, i32)
 DEF_HELPER_4(divsll, void, env, int, int, s32)
 DEF_HELPER_2(set_sr, void, env, i32)
 DEF_HELPER_3(movec, void, env, i32, i32)
+DEF_HELPER_4(cas2w, void, env, i32, i32, i32)
+DEF_HELPER_4(cas2l, void, env, i32, i32, i32)
 
 DEF_HELPER_2(f64_to_i32, f32, env, f64)
 DEF_HELPER_2(f64_to_f32, f32, env, f64)
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index a4bfa4e..ff27211 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -359,3 +359,136 @@  void HELPER(divsll)(CPUM68KState *env, int numr, int regr, int32_t den)
     env->dregs[regr] = rem;
     env->dregs[numr] = quot;
 }
+
+void HELPER(cas2w)(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2)
+{
+    uint32_t Dc1 = extract32(regs, 9, 3);
+    uint32_t Dc2 = extract32(regs, 6, 3);
+    uint32_t Du1 = extract32(regs, 3, 3);
+    uint32_t Du2 = extract32(regs, 0, 3);
+    int16_t c1 = env->dregs[Dc1];
+    int16_t c2 = env->dregs[Dc2];
+    int16_t u1 = env->dregs[Du1];
+    int16_t u2 = env->dregs[Du2];
+    int16_t l1, l2;
+    uintptr_t ra = GETPC();
+
+    if (parallel_cpus) {
+        /* Tell the main loop we need to serialize this insn.  */
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+    } else {
+        /* We're executing in a serial context -- no need to be atomic.  */
+#ifdef CONFIG_USER_ONLY
+        int16_t *ha1 = g2h(a1);
+        int16_t *ha2 = g2h(a2);
+        l1 = ldsw_be_p(ha1);
+        l2 = ldsw_be_p(ha2);
+        if (l1 == c1 && l2 == c2) {
+            stw_be_p(ha1, u1);
+            stw_be_p(ha2, u2);
+        }
+#else
+        int mmu_idx = cpu_mmu_index(env, 0);
+        TCGMemOpIdx oi = make_memop_idx(MO_BEUW, mmu_idx);
+        l1 = helper_be_ldsw_mmu(env, a1, oi, ra);
+        l2 = helper_be_ldsw_mmu(env, a2, oi, ra);
+        if (l1 == c1 && l2 == c2) {
+            helper_be_stw_mmu(env, a1, u1, oi, ra);
+            helper_be_stw_mmu(env, a2, u2, oi, ra);
+        }
+#endif
+    }
+
+    if (c1 != l1) {
+        env->cc_n = l1;
+        env->cc_v = c1;
+    } else {
+        env->cc_n = l2;
+        env->cc_v = c2;
+    }
+    env->cc_op = CC_OP_CMPL;
+    env->dregs[Dc1] = deposit32(env->dregs[Dc1], 0, 16, l1);
+    env->dregs[Dc2] = deposit32(env->dregs[Dc2], 0, 16, l2);
+}
+
+void HELPER(cas2l)(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2)
+{
+    uint32_t Dc1 = extract32(regs, 9, 3);
+    uint32_t Dc2 = extract32(regs, 6, 3);
+    uint32_t Du1 = extract32(regs, 3, 3);
+    uint32_t Du2 = extract32(regs, 0, 3);
+    uint32_t c1 = env->dregs[Dc1];
+    uint32_t c2 = env->dregs[Dc2];
+    uint32_t u1 = env->dregs[Du1];
+    uint32_t u2 = env->dregs[Du2];
+    uint32_t l1, l2;
+    uint64_t c, u, l;
+    uintptr_t ra = GETPC();
+#ifndef CONFIG_USER_ONLY
+    int mmu_idx = cpu_mmu_index(env, 0);
+    TCGMemOpIdx oi;
+#endif
+
+    if (parallel_cpus) {
+        /* We're executing in a parallel context -- must be atomic.  */
+        if ((a1 & 7) == 0 && a2 == a1 + 4) {
+            c = deposit64(c2, 32, 32, c1);
+            u = deposit64(u2, 32, 32, u1);
+#ifdef CONFIG_USER_ONLY
+            uint64_t *ha1 = g2h(a1);
+            l = atomic_cmpxchg__nocheck(ha1, c, u);
+#else
+            oi = make_memop_idx(MO_BEQ, mmu_idx);
+            l = helper_atomic_cmpxchgq_be_mmu(env, a1, c, u, oi, ra);
+#endif
+            l1 = l >> 32;
+            l2 = l;
+        } else if ((a2 & 7) == 0 && a1 == a2 + 4) {
+            c = deposit64(c1, 32, 32, c2);
+            u = deposit64(u1, 32, 32, u2);
+#ifdef CONFIG_USER_ONLY
+            uint64_t *ha1 = g2h(a1);
+            l = atomic_cmpxchg__nocheck(ha1, c, u);
+#else
+            oi = make_memop_idx(MO_BEQ, mmu_idx);
+            l = helper_atomic_cmpxchgq_be_mmu(env, a1, c, u, oi, ra);
+#endif
+            l2 = l >> 32;
+            l1 = l;
+        } else {
+            /* Tell the main loop we need to serialize this insn.  */
+            cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+        }
+    } else {
+#ifdef CONFIG_USER_ONLY
+        uint32_t *ha1 = g2h(a1);
+        uint32_t *ha2 = g2h(a2);
+        l1 = ldl_be_p(ha1);
+        l2 = ldl_be_p(ha2);
+        if (l1 == c1 && l2 == c2) {
+            stl_be_p(ha1, u1);
+            stl_be_p(ha2, u2);
+        }
+#else
+        /* We're executing in a serial context -- no need to be atomic.  */
+        oi = make_memop_idx(MO_BEUL, mmu_idx);
+        l1 = helper_be_ldul_mmu(env, a1, oi, ra);
+        l2 = helper_be_ldul_mmu(env, a2, oi, ra);
+        if (l1 == c1 && l2 == c2) {
+            helper_be_stl_mmu(env, a1, u1, oi, ra);
+            helper_be_stl_mmu(env, a2, u2, oi, ra);
+        }
+#endif
+    }
+
+    if (c1 != l1) {
+        env->cc_n = l1;
+        env->cc_v = c1;
+    } else {
+        env->cc_n = l2;
+        env->cc_v = c2;
+    }
+    env->cc_op = CC_OP_CMPL;
+    env->dregs[Dc1] = l1;
+    env->dregs[Dc2] = l2;
+}
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 93f1270..68cb8d4 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -1880,6 +1880,146 @@  DISAS_INSN(arith_im)
     tcg_temp_free(dest);
 }
 
+DISAS_INSN(cas)
+{
+    int opsize;
+    TCGv addr;
+    uint16_t ext;
+    TCGv load;
+    TCGv cmp;
+    TCGMemOp opc;
+
+    switch ((insn >> 9) & 3) {
+    case 1:
+        opsize = OS_BYTE;
+        opc = MO_UB;
+        break;
+    case 2:
+        opsize = OS_WORD;
+        opc = MO_TEUW;
+        break;
+    case 3:
+        opsize = OS_LONG;
+        opc = MO_TEUL;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    opc |= MO_ALIGN;
+
+    ext = read_im16(env, s);
+
+    /* cas Dc,Du,<EA> */
+
+    addr = gen_lea(env, s, insn, opsize);
+    if (IS_NULL_QREG(addr)) {
+        gen_addr_fault(s);
+        return;
+    }
+
+    cmp = gen_extend(DREG(ext, 0), opsize, 0);
+
+    /* if  <EA> == Dc then
+     *     <EA> = Du
+     *     Dc = <EA> (because <EA> == Dc)
+     * else
+     *     Dc = <EA>
+     */
+
+    load = tcg_temp_new();
+    tcg_gen_atomic_cmpxchg_i32(load, addr, cmp, DREG(ext, 6),
+                               IS_USER(s), opc);
+    gen_partset_reg(opsize, DREG(ext, 0), load);
+
+    gen_update_cc_cmp(s, load, cmp, opsize);
+    tcg_temp_free(load);
+}
+
+DISAS_INSN(cas2w)
+{
+    uint16_t ext1, ext2;
+    TCGv addr1, addr2;
+    TCGv regs;
+
+    /* cas2 Dc1:Dc2,Du1:Du2,(Rn1):(Rn2) */
+
+    ext1 = read_im16(env, s);
+
+    if (ext1 & 0x8000) {
+        /* Address Register */
+        addr1 = AREG(ext1, 12);
+    } else {
+        /* Data Register */
+        addr1 = DREG(ext1, 12);
+    }
+
+    ext2 = read_im16(env, s);
+    if (ext2 & 0x8000) {
+        /* Address Register */
+        addr2 = AREG(ext2, 12);
+    } else {
+        /* Data Register */
+        addr2 = DREG(ext2, 12);
+    }
+
+    /* if (R1) == Dc1 && (R2) == Dc2 then
+     *     (R1) = Du1
+     *     (R2) = Du2
+     * else
+     *     Dc1 = (R1)
+     *     Dc2 = (R2)
+     */
+
+   regs = tcg_const_i32(REG(ext2, 6) |
+                        (REG(ext1, 6) << 3) |
+                        (REG(ext2, 0) << 6) |
+                        (REG(ext1, 0) << 9));
+   gen_helper_cas2w(cpu_env, regs, addr1, addr2);
+   tcg_temp_free(regs);
+}
+
+DISAS_INSN(cas2l)
+{
+    uint16_t ext1, ext2;
+    TCGv addr1, addr2, regs;
+
+    /* cas2 Dc1:Dc2,Du1:Du2,(Rn1):(Rn2) */
+
+    ext1 = read_im16(env, s);
+
+    if (ext1 & 0x8000) {
+        /* Address Register */
+        addr1 = AREG(ext1, 12);
+    } else {
+        /* Data Register */
+        addr1 = DREG(ext1, 12);
+    }
+
+    ext2 = read_im16(env, s);
+    if (ext2 & 0x8000) {
+        /* Address Register */
+        addr2 = AREG(ext2, 12);
+    } else {
+        /* Data Register */
+        addr2 = DREG(ext2, 12);
+    }
+
+    /* if (R1) == Dc1 && (R2) == Dc2 then
+     *     (R1) = Du1
+     *     (R2) = Du2
+     * else
+     *     Dc1 = (R1)
+     *     Dc2 = (R2)
+     */
+
+   regs = tcg_const_i32(REG(ext2, 6) |
+                        (REG(ext1, 6) << 3) |
+                        (REG(ext2, 0) << 6) |
+                        (REG(ext1, 0) << 9));
+   gen_helper_cas2w(cpu_env, regs, addr1, addr2);
+   tcg_temp_free(regs);
+}
+
 DISAS_INSN(byterev)
 {
     TCGv reg;
@@ -3885,6 +4025,9 @@  void register_m68k_insns (CPUM68KState *env)
     INSN(arith_im,  0680, fff8, CF_ISA_A);
     INSN(arith_im,  0c00, ff38, CF_ISA_A);
     INSN(arith_im,  0c00, ff00, M68000);
+    INSN(cas,       08c0, f9c0, CAS);
+    INSN(cas2w,     0cfc, ffff, CAS);
+    INSN(cas2l,     0efc, ffff, CAS);
     BASE(bitop_im,  0800, ffc0);
     BASE(bitop_im,  0840, ffc0);
     BASE(bitop_im,  0880, ffc0);