diff mbox

[32/52] target-m68k: bitfield ops

Message ID 1462392752-17703-33-git-send-email-laurent@vivier.eu (mailing list archive)
State New, archived
Headers show

Commit Message

Laurent Vivier May 4, 2016, 8:12 p.m. UTC
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
 target-m68k/helper.c    |  13 ++
 target-m68k/helper.h    |   4 +
 target-m68k/op_helper.c |  68 ++++++
 target-m68k/translate.c | 560 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 645 insertions(+)

Comments

Richard Henderson May 6, 2016, 7:11 p.m. UTC | #1
On 05/04/2016 10:12 AM, Laurent Vivier wrote:
> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
> ---
>  target-m68k/helper.c    |  13 ++
>  target-m68k/helper.h    |   4 +
>  target-m68k/op_helper.c |  68 ++++++
>  target-m68k/translate.c | 560 ++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 645 insertions(+)
>
> diff --git a/target-m68k/helper.c b/target-m68k/helper.c
> index e9e7cee..76dda44 100644
> --- a/target-m68k/helper.c
> +++ b/target-m68k/helper.c
> @@ -267,6 +267,19 @@ uint32_t HELPER(ff1)(uint32_t x)
>      return n;
>  }
>
> +uint32_t HELPER(bfffo)(uint32_t arg, uint32_t width)
> +{
> +    int n;
> +    uint32_t mask;
> +    mask = 0x80000000;
> +    for (n = 0; n < width; n++) {
> +       if (arg & mask)
> +           break;
> +       mask >>= 1;
> +    }
> +    return n;

   n = clz32(arg);
   return n < width ? n : width;

> +DEF_HELPER_2(bfffo, i32, i32, i32)

DEF_HELPER_FLAGS_*

> +DEF_HELPER_4(bitfield_load, i64, env, i32, i32, i32)
> +DEF_HELPER_5(bitfield_store, void, env, i32, i32, i32, i64)

Likewise.

> +        bitfield = cpu_ldub_data(env, addr);

Switch to using cpu_ldub_data_ra etc.  That will avoid having to store PC or 
update_cc before calling these helpers.

> +static inline void bcd_add(TCGv src, TCGv dest)

Did the bcd patch get squashed into this one by mistake?

Anyway, it might be nice to take off the inline marker for these, since they're 
fairly large functions.

> +static inline void bcd_neg(TCGv dest, TCGv src)

Likewise wrt inline.

> +    /* compute the 10's complement
> +     *
> +     *    bcd_add(0xff99 - (src + X), 0x0001)
> +     *
> +     *        t1 = 0xF9999999 - src - X)
> +     *        t2 = t1 + 0x06666666
> +     *        t3 = t2 + 0x00000001
> +     *        t4 = t2 ^ 0x00000001
> +     *        t5 = t3 ^ t4
> +     *        t6 = ~t5 & 0x11111110
> +     *        t7 = (t6 >> 2) | (t6 >> 3)
> +     *        return t3 - t7
> +     *
> +     * reduced in:
> +     *
> +     *        t2 = 0xFFFFFFFF + (-src)
> +     *        t3 = (-src)
> +     *        t4 = t2  ^ (X ^ 1)
> +     *        t5 = (t3 - X) ^ t4
> +     *        t6 = ~t5 & 0x11111110
> +     *        t7 = (t6 >> 2) | (t6 >> 3)
> +     *        return (t3 - X) - t7

Is there a reason that you're computing the ten's compliment to 7 digits when 
you're only going to use 3 of them?  Restricting to 3 means that these 
constants become smaller and therefore easier to build on a non-x86 host.

> +DISAS_INSN(abcd_mem)
> +{
> +    TCGv src;
> +    TCGv addr_src;
> +    TCGv dest;
> +    TCGv addr_dest;
> +
> +    gen_flush_flags(s); /* !Z is sticky */
> +
> +    addr_src = AREG(insn, 0);
> +    tcg_gen_subi_i32(addr_src, addr_src, opsize_bytes(OS_BYTE));
> +    src = gen_load(s, OS_BYTE, addr_src, 0);
> +
> +    addr_dest = AREG(insn, 9);
> +    tcg_gen_subi_i32(addr_dest, addr_dest, opsize_bytes(OS_BYTE));
> +    dest = gen_load(s, OS_BYTE, addr_dest, 0);
> +
> +    bcd_add(src, dest);
> +
> +    gen_store(s, OS_BYTE, addr_dest, dest);

Surely the write-back to the address registers only happens after any possible 
exception due to the loads.  Otherwise you can't restart the insn after a page 
fault.

> +    set_cc_op(s, CC_OP_FLAGS);

This is redundant with gen_flush_flags.

> +static void bitfield_param(uint16_t ext, TCGv *offset, TCGv *width, TCGv *mask)
> +{
> +    TCGv tmp;
> +
> +    /* offset */
> +
> +    if (ext & 0x0800) {
> +        *offset = tcg_temp_new_i32();
> +        tcg_gen_mov_i32(*offset, DREG(ext, 6));
> +    } else {
> +        *offset = tcg_temp_new_i32();
> +        tcg_gen_movi_i32(*offset, (ext >> 6) & 31);
> +    }

No need to have two copies of the *offset allocation.

> +    tmp = tcg_temp_new_i32();
> +    tcg_gen_sub_i32(tmp, tcg_const_i32(32), *width);
> +    *mask = tcg_temp_new_i32();
> +    tcg_gen_shl_i32(*mask, tcg_const_i32(0xffffffff), tmp);

Less garbage temporaries if you do

   tmp = tcg_const_i32(32);
   tcg_gen_sub_i32(tmp, tmp, *width);
   *mask = tcg_const_i32(-1);
   tcg_gen_shl_i32(*mask, *mask, tmp);
   tcg_temp_free_i32(tmp);

> +    if (ext & 0x0800)
> +        tcg_gen_andi_i32(offset, offset, 31);

Watch the coding style.


> +    if (op == 7) {
> +        TCGv tmp2;
> +
> +        tmp2 = tcg_temp_new_i32();
> +        tcg_gen_sub_i32(tmp2, tcg_const_i32(32), width);
> +        tcg_gen_shl_i32(tmp2, reg2, tmp2);
> +        tcg_gen_and_i32(tmp2, tmp2, mask);
> +        gen_logic_cc(s, tmp2, OS_LONG);

A rotate right of the width is easier to put the field into the most 
significant bits.  Also, you need to do this AND before you rotate the mask.

A comment here that we're computing the flags for BFINS wouldn't go amiss.

> +        tcg_temp_free_i32(tmp1);

tmp2.

> +    } else {
> +        gen_logic_cc(s, tmp1, OS_LONG);
> +    }

I also question the logic of doing

	mask = mask >>> offset;
	tmp = reg & mask
	tmp = tmp <<< offset

when just

	tmp = reg <<< offset;
	tmp = tmp & mask

would do.

Let's assume we make this change, that we align the field at MSB for both reg1 
and reg2.  That gives us

     TCGv tmp = tcg_temp_new();
     bitfield_param(ext, &offset, &width, &mask);

     if (op != 7) { /* All except BFINS */
           tcg_gen_rotl_i32(tmp, reg, offset);
         tcg_gen_and_i32(tmp, tmp, mask);
         gen_logic_cc(s, tmp, OS_LONG);
     }

     switch (op) {
     case 0: /* bftst */
         break;
     case 1: /* bfextu */
         tcg_gen_rotl_i32(reg2, QREG_CC_N, width);
         break;
     case 2: /* bfchg */
         tcg_gen_rotr_i32(mask, mask, offset);
         tcg_gen_xor_i32(reg, reg, mask);
         break;
     case 3: /* bfexts */
         tcg_gen_subfi_i32(width, 32, width);
         tcg_gen_sar_i32(reg2, QREG_CC_N, width);
         break;
     case 4: /* bfclr */
         tcg_gen_rotr_i32(mask, mask, offset);
         tcg_gen_andc_i32(reg, reg, mask);
         break;
     case 5: /* bfffo */
         /* Set all bits outside of mask, so that we find one
            when searching via clz32.  */
         tcg_gen_orc_i32(tmp, QREG_CC_N, mask);
         tcg_gen_shr_i32(tmp, tmp, offset);
         gen_helper_clz(reg2, tmp);
         break;
     case 6: /* bfset */
         tcg_gen_rotr_i32(mask, mask, offset);
         tcg_gen_or_i32(reg, reg, mask);
         break;
     case 7: /* bfins */
         tcg_gen_rotr_i32(tmp, reg2, width);
         tcg_gen_and_i32(tmp, tmp, mask);
         gen_logic_cc(s, tmp, OS_LONG);
         /* ??? If constant offset and constant width,
            and offset + width <= 32, then we can use
            tcg_gen_deposit_i32 here.  */
         tcg_gen_rotr_i32(mask, mask, offset);
         tcg_gen_rotr_i32(tmp, tmp, offset);
         tcg_gen_andc_i32(reg, reg, mask);
         tcg_gen_or_i32(reg, reg, tmp);
         break;
     }
     tcg_temp_free(tmp);

> +    /* tmp = (bitfield << offset) >> 32 */
> +
> +    tcg_gen_shri_i64(tmp64, tmp64, 32ULL);
> +    dest = tcg_temp_new_i32();
> +    tcg_gen_extrl_i64_i32(dest, tmp64);

tcg_gen_extrh_i64_i32.

> +static TCGv_i64 gen_bitfield_mask(TCGv offset, TCGv width)
> +{
> +    TCGv tmp;
> +    TCGv_i64 mask;
> +    TCGv_i64 shift;
> +
> +    mask = tcg_temp_new_i64();
> +
> +    /* mask = (1u << width) - 1; */
> +
> +    tcg_gen_extu_i32_i64(mask, width);
> +    tcg_gen_shl_i64(mask, tcg_const_i64(1), mask);
> +    tcg_gen_subi_i64(mask, mask, 1);
> +
> +    /* shift = 64 - (width + offset); */
> +
> +    tmp = tcg_temp_new_i32();
> +    tcg_gen_add_i32(tmp, offset, width);
> +    tcg_gen_sub_i32(tmp, tcg_const_i32(64), tmp);
> +    shift = tcg_temp_new_i64();
> +    tcg_gen_extu_i32_i64(shift, tmp);
> +
> +    /* mask <<= shift */
> +
> +    tcg_gen_shl_i64(mask, mask, shift);

Two less instructions with

   mask = -1 << width;
   mask <<= offset;
   mask >>= offset;

That said, similar comments apply to bitfield_mem re keeping the field at the MSB.

> +    gen_helper_bitfield_load(bitfield, cpu_env, src, offset, width);
...
> +        gen_logic_cc(s, val, OS_LONG);
...
> +        gen_helper_bitfield_store(cpu_env, src, offset, width, bitfield);

You risk clobbering CC before the exception if the page isn't writable.

One way to fix this is to have bitfield_load perform a probe_write (or two, if 
the field spans a page boundary).  That way when we come to bitfield_store we 
know that any exception will have already been taken.

Note that you can also make the helpers perform the rotates into and out of the 
MSB.  That off-loads a little of the required tcg code.


r~
diff mbox

Patch

diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index e9e7cee..76dda44 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -267,6 +267,19 @@  uint32_t HELPER(ff1)(uint32_t x)
     return n;
 }
 
+uint32_t HELPER(bfffo)(uint32_t arg, uint32_t width)
+{
+    int n;
+    uint32_t mask;
+    mask = 0x80000000;
+    for (n = 0; n < width; n++) {
+       if (arg & mask)
+           break;
+       mask >>= 1;
+    }
+    return n;
+}
+
 uint32_t HELPER(sats)(uint32_t val, uint32_t v)
 {
     /* The result has the opposite sign to the original value.  */
diff --git a/target-m68k/helper.h b/target-m68k/helper.h
index aae01f9..0b819cb 100644
--- a/target-m68k/helper.h
+++ b/target-m68k/helper.h
@@ -1,5 +1,6 @@ 
 DEF_HELPER_1(bitrev, i32, i32)
 DEF_HELPER_1(ff1, i32, i32)
+DEF_HELPER_2(bfffo, i32, i32, i32)
 DEF_HELPER_FLAGS_2(sats, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 DEF_HELPER_2(divu, void, env, i32)
 DEF_HELPER_2(divs, void, env, i32)
@@ -44,3 +45,6 @@  DEF_HELPER_2(flush_flags, void, env, i32)
 DEF_HELPER_2(set_ccr, void, env, i32)
 DEF_HELPER_FLAGS_1(get_ccr, TCG_CALL_NO_WG_SE, i32, env)
 DEF_HELPER_2(raise_exception, void, env, i32)
+
+DEF_HELPER_4(bitfield_load, i64, env, i32, i32, i32)
+DEF_HELPER_5(bitfield_store, void, env, i32, i32, i32, i64)
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index 1c95ea0..71caba9 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -178,6 +178,74 @@  void HELPER(raise_exception)(CPUM68KState *env, uint32_t tt)
     raise_exception(env, tt);
 }
 
+/* load from a bitfield */
+
+uint64_t HELPER(bitfield_load)(CPUM68KState *env, uint32_t addr,
+                               uint32_t offset, uint32_t width)
+{
+    uint64_t bitfield = 0;
+    int size;
+
+    size = (offset + width + 7) >> 3;
+    switch (size) {
+    case 1:
+        bitfield = cpu_ldub_data(env, addr);
+        bitfield <<= 56;
+        break;
+    case 2:
+        bitfield = cpu_lduw_data(env, addr);
+        bitfield <<= 48;
+        break;
+    case 3:
+        bitfield = cpu_lduw_data(env, addr);
+        bitfield <<= 8;
+        bitfield |= cpu_ldub_data(env, addr + 2);
+        bitfield <<= 40;
+        break;
+    case 4:
+        bitfield = cpu_ldl_data(env, addr);
+        bitfield <<= 32;
+        break;
+    case 5:
+        bitfield = cpu_ldl_data(env, addr);
+        bitfield <<= 8;
+        bitfield |= cpu_ldub_data(env, addr + 4);
+        bitfield <<= 24;
+        break;
+    }
+
+    return bitfield;
+}
+
+/* store to a bitfield */
+
+void HELPER(bitfield_store)(CPUM68KState *env, uint32_t addr, uint32_t offset,
+                            uint32_t width, uint64_t bitfield)
+{
+    int size;
+
+    size = (offset + width + 7) >> 3;
+    switch (size) {
+    case 1:
+        cpu_stb_data(env, addr, bitfield >> 56);
+        break;
+    case 2:
+        cpu_stw_data(env, addr, bitfield >> 48);
+        break;
+    case 3:
+        cpu_stw_data(env, addr, bitfield >> 48);
+        cpu_stb_data(env, addr + 2, bitfield >> 40);
+        break;
+    case 4:
+        cpu_stl_data(env, addr, bitfield >> 32);
+        break;
+    case 5:
+        cpu_stl_data(env, addr, bitfield >> 32);
+        cpu_stb_data(env, addr + 4, bitfield >> 24);
+        break;
+    }
+}
+
 void HELPER(divu)(CPUM68KState *env, uint32_t word)
 {
     uint32_t num;
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 817f0b3..00fd2f1 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -1240,6 +1240,231 @@  DISAS_INSN(divl)
     set_cc_op(s, CC_OP_FLAGS);
 }
 
+static inline void bcd_add(TCGv src, TCGv dest)
+{
+    TCGv t0, t1;
+
+    /* t1 = (src + 0x0066) + dest
+     *    = result with some possible exceding 0x6
+     */
+
+    t0 = tcg_const_i32(0x0066);
+    tcg_gen_add_i32(t0, t0, src);
+
+    t1 = tcg_temp_new();
+    tcg_gen_add_i32(t1, t0, dest);
+
+    /* we will remove exceding 0x6 where there is no carry */
+
+    /* t0 = (src + 0x0066) ^ dest
+     *    = t1 without carries
+     */
+
+    tcg_gen_xor_i32(t0, t0, dest);
+
+    /* extract the carries
+     * t0 = t0 ^ t1
+     *    = only the carries
+     */
+
+    tcg_gen_xor_i32(t0, t0, t1);
+
+    /* generate 0x1 where there is no carry */
+
+    tcg_gen_not_i32(t0, t0);
+    tcg_gen_andi_i32(t0, t0, 0x110);
+
+    /* for each 0x10, generate a 0x6 */
+
+    tcg_gen_shri_i32(dest, t0, 2);
+    tcg_gen_shri_i32(t0, t0, 3);
+    tcg_gen_or_i32(dest, dest, t0);
+    tcg_temp_free(t0);
+
+    /* remove the exceding 0x6
+     * for digits that have not generated a carry
+     */
+
+    tcg_gen_sub_i32(dest, t1, dest);
+    tcg_temp_free(t1);
+}
+
+static inline void bcd_neg(TCGv dest, TCGv src)
+{
+    TCGv t0, t1;
+
+    /* compute the 10's complement
+     *
+     *    bcd_add(0xff99 - (src + X), 0x0001)
+     *
+     *        t1 = 0xF9999999 - src - X)
+     *        t2 = t1 + 0x06666666
+     *        t3 = t2 + 0x00000001
+     *        t4 = t2 ^ 0x00000001
+     *        t5 = t3 ^ t4
+     *        t6 = ~t5 & 0x11111110
+     *        t7 = (t6 >> 2) | (t6 >> 3)
+     *        return t3 - t7
+     *
+     * reduced in:
+     *
+     *        t2 = 0xFFFFFFFF + (-src)
+     *        t3 = (-src)
+     *        t4 = t2  ^ (X ^ 1)
+     *        t5 = (t3 - X) ^ t4
+     *        t6 = ~t5 & 0x11111110
+     *        t7 = (t6 >> 2) | (t6 >> 3)
+     *        return (t3 - X) - t7
+     *
+     */
+
+    tcg_gen_neg_i32(src, src);
+
+    t0 = tcg_temp_new();
+    tcg_gen_addi_i32(t0, src, 0xffff);
+    tcg_gen_xori_i32(t0, t0, 1);
+    tcg_gen_xor_i32(t0, t0, QREG_CC_X);
+    tcg_gen_sub_i32(src, src, QREG_CC_X);
+    tcg_gen_xor_i32(t0, t0, src);
+    tcg_gen_not_i32(t0, t0);
+    tcg_gen_andi_i32(t0, t0, 0x110);
+
+    t1 = tcg_temp_new();
+    tcg_gen_shri_i32(t1, t0, 2);
+    tcg_gen_shri_i32(t0, t0, 3);
+    tcg_gen_or_i32(t0, t0, t1);
+    tcg_temp_free(t1);
+
+    tcg_gen_sub_i32(dest, src, t0);
+    tcg_temp_free(t0);
+}
+
+static inline void bcd_flags(TCGv val)
+{
+    tcg_gen_andi_i32(QREG_CC_C, val, 0x00ff);
+    tcg_gen_or_i32(QREG_CC_Z, QREG_CC_Z, QREG_CC_C);
+
+    tcg_gen_movi_i32(QREG_CC_X, 0);
+    tcg_gen_andi_i32(val, val, 0xff00);
+    tcg_gen_setcond_i32(TCG_COND_NE, QREG_CC_C, val, QREG_CC_X);
+
+    tcg_gen_mov_i32(QREG_CC_X, QREG_CC_C);
+}
+
+DISAS_INSN(abcd_reg)
+{
+    TCGv src;
+    TCGv dest;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    src = gen_extend(DREG(insn, 0), OS_BYTE, 0);
+    dest = gen_extend(DREG(insn, 9), OS_BYTE, 0);
+    bcd_add(src, dest);
+    gen_partset_reg(OS_BYTE, DREG(insn, 9), dest);
+
+    bcd_flags(dest);
+
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(abcd_mem)
+{
+    TCGv src;
+    TCGv addr_src;
+    TCGv dest;
+    TCGv addr_dest;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    addr_src = AREG(insn, 0);
+    tcg_gen_subi_i32(addr_src, addr_src, opsize_bytes(OS_BYTE));
+    src = gen_load(s, OS_BYTE, addr_src, 0);
+
+    addr_dest = AREG(insn, 9);
+    tcg_gen_subi_i32(addr_dest, addr_dest, opsize_bytes(OS_BYTE));
+    dest = gen_load(s, OS_BYTE, addr_dest, 0);
+
+    bcd_add(src, dest);
+
+    gen_store(s, OS_BYTE, addr_dest, dest);
+
+    bcd_flags(dest);
+
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(sbcd_reg)
+{
+    TCGv src;
+    TCGv dest;
+    TCGv tmp;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    src = gen_extend(DREG(insn, 0), OS_BYTE, 0);
+    dest = gen_extend(DREG(insn, 9), OS_BYTE, 0);
+
+    tmp = tcg_temp_new();
+    bcd_neg(tmp, src);
+    bcd_add(tmp, dest);
+    tcg_temp_free(tmp);
+
+    gen_partset_reg(OS_BYTE, DREG(insn, 9), dest);
+
+    bcd_flags(dest);
+
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(sbcd_mem)
+{
+    TCGv src;
+    TCGv addr_src;
+    TCGv dest;
+    TCGv addr_dest;
+    TCGv tmp;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    addr_src = AREG(insn, 0);
+    tcg_gen_subi_i32(addr_src, addr_src, opsize_bytes(OS_BYTE));
+    src = gen_load(s, OS_BYTE, addr_src, 0);
+
+    addr_dest = AREG(insn, 9);
+    tcg_gen_subi_i32(addr_dest, addr_dest, opsize_bytes(OS_BYTE));
+    dest = gen_load(s, OS_BYTE, addr_dest, 0);
+
+    tmp = tcg_temp_new();
+    bcd_neg(tmp, src);
+    bcd_add(tmp, dest);
+    tcg_temp_free(tmp);
+
+    gen_store(s, OS_BYTE, addr_dest, dest);
+
+    bcd_flags(dest);
+
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(nbcd)
+{
+    TCGv dest;
+    TCGv addr;
+
+    gen_flush_flags(s); /* !Z is sticky */
+
+    SRC_EA(env, dest, OS_BYTE, 0, &addr);
+
+    bcd_neg(dest, dest);
+
+    DEST_EA(env, insn, OS_BYTE, dest, &addr);
+
+    bcd_flags(dest);
+
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
 DISAS_INSN(addsub)
 {
     TCGv reg;
@@ -2329,6 +2554,334 @@  DISAS_INSN(shift_reg)
     set_cc_op(s, CC_OP_FLAGS);
 }
 
+static void bitfield_param(uint16_t ext, TCGv *offset, TCGv *width, TCGv *mask)
+{
+    TCGv tmp;
+
+    /* offset */
+
+    if (ext & 0x0800) {
+        *offset = tcg_temp_new_i32();
+        tcg_gen_mov_i32(*offset, DREG(ext, 6));
+    } else {
+        *offset = tcg_temp_new_i32();
+        tcg_gen_movi_i32(*offset, (ext >> 6) & 31);
+    }
+
+    /* width */
+
+    if (ext & 0x0020) {
+        *width = tcg_temp_new_i32();
+        tcg_gen_subi_i32(*width, DREG(ext, 0), 1);
+        tcg_gen_andi_i32(*width, *width, 31);
+        tcg_gen_addi_i32(*width, *width, 1);
+    } else {
+        *width = tcg_temp_new_i32();
+        tcg_gen_movi_i32(*width, ((ext - 1) & 31) + 1);
+    }
+
+    /* mask */
+
+    tmp = tcg_temp_new_i32();
+    tcg_gen_sub_i32(tmp, tcg_const_i32(32), *width);
+    *mask = tcg_temp_new_i32();
+    tcg_gen_shl_i32(*mask, tcg_const_i32(0xffffffff), tmp);
+}
+
+DISAS_INSN(bitfield_reg)
+{
+    uint16_t ext;
+    TCGv tmp;
+    TCGv tmp1;
+    TCGv reg;
+    TCGv offset;
+    TCGv width;
+    int op;
+    TCGv reg2;
+    TCGv mask;
+
+    reg = DREG(insn, 0);
+    op = (insn >> 8) & 7;
+    ext = read_im16(env, s);
+
+    bitfield_param(ext, &offset, &width, &mask);
+
+    if (ext & 0x0800)
+        tcg_gen_andi_i32(offset, offset, 31);
+    tcg_gen_rotr_i32(mask, mask, offset);
+
+    /* reg & mask */
+
+    tmp = tcg_temp_new_i32();
+    tcg_gen_and_i32(tmp, reg, mask);
+
+    tmp1 = tcg_temp_new_i32();
+    tcg_gen_rotl_i32(tmp1, tmp, offset);
+
+    reg2 = DREG(ext, 12);
+    if (op == 7) {
+        TCGv tmp2;
+
+        tmp2 = tcg_temp_new_i32();
+        tcg_gen_sub_i32(tmp2, tcg_const_i32(32), width);
+        tcg_gen_shl_i32(tmp2, reg2, tmp2);
+        tcg_gen_and_i32(tmp2, tmp2, mask);
+        gen_logic_cc(s, tmp2, OS_LONG);
+
+        tcg_temp_free_i32(tmp1);
+    } else {
+        gen_logic_cc(s, tmp1, OS_LONG);
+    }
+
+    switch (op) {
+    case 0: /* bftst */
+        break;
+    case 1: /* bfextu */
+        tcg_gen_add_i32(tmp1, offset, width);
+        tcg_gen_andi_i32(tmp1, tmp1, 31);
+        tcg_gen_rotl_i32(reg2, tmp, tmp1);
+        break;
+    case 2: /* bfchg */
+        tcg_gen_xor_i32(reg, reg, mask);
+        break;
+    case 3: /* bfexts */
+        tcg_gen_rotl_i32(reg2, tmp, offset);
+        tcg_gen_sub_i32(width, tcg_const_i32(32), width);
+        tcg_gen_sar_i32(reg2, reg2, width);
+        break;
+    case 4: /* bfclr */
+        tcg_gen_not_i32(mask, mask);
+        tcg_gen_and_i32(reg, reg, mask);
+        break;
+    case 5: /* bfffo */
+        tcg_gen_rotl_i32(reg2, tmp, offset);
+        gen_helper_bfffo(tmp, tmp, width);
+        tcg_gen_add_i32(reg2, tmp, offset);
+        break;
+    case 6: /* bfset */
+        tcg_gen_or_i32(reg, reg, mask);
+        break;
+    case 7: /* bfins */
+        tcg_gen_shl_i32(tmp1, tcg_const_i32(1), width);
+        tcg_gen_subi_i32(tmp1, tmp1, 1);
+        tcg_gen_and_i32(tmp, reg2, tmp1);
+        tcg_gen_add_i32(tmp1, offset, width);
+        tcg_gen_andi_i32(tmp1, tmp1, 31);
+        tcg_gen_rotr_i32(tmp, tmp, tmp1);
+        tcg_gen_not_i32(mask, mask);
+        tcg_gen_and_i32(reg, reg, mask);
+        tcg_gen_or_i32(reg, reg, tmp);
+        break;
+    }
+}
+
+static TCGv gen_bitfield_cc(DisasContext *s,
+                            TCGv offset, TCGv mask_cc, TCGv_i64 bitfield)
+{
+    TCGv dest;
+    TCGv_i64 tmp64;
+
+    /* move bitfield to a 32bit */
+
+    tmp64 = tcg_temp_new_i64();
+
+    tcg_gen_extu_i32_i64(tmp64, offset);
+
+    /* tmp64 = bitfield << offset */
+
+    tcg_gen_shl_i64(tmp64, bitfield, tmp64);
+
+    /* tmp = (bitfield << offset) >> 32 */
+
+    tcg_gen_shri_i64(tmp64, tmp64, 32ULL);
+    dest = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(dest, tmp64);
+    tcg_gen_and_i32(dest, dest, mask_cc);
+
+    return dest;
+}
+
+static TCGv_i64 gen_bitfield_mask(TCGv offset, TCGv width)
+{
+    TCGv tmp;
+    TCGv_i64 mask;
+    TCGv_i64 shift;
+
+    mask = tcg_temp_new_i64();
+
+    /* mask = (1u << width) - 1; */
+
+    tcg_gen_extu_i32_i64(mask, width);
+    tcg_gen_shl_i64(mask, tcg_const_i64(1), mask);
+    tcg_gen_subi_i64(mask, mask, 1);
+
+    /* shift = 64 - (width + offset); */
+
+    tmp = tcg_temp_new_i32();
+    tcg_gen_add_i32(tmp, offset, width);
+    tcg_gen_sub_i32(tmp, tcg_const_i32(64), tmp);
+    shift = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(shift, tmp);
+
+    /* mask <<= shift */
+
+    tcg_gen_shl_i64(mask, mask, shift);
+
+    return mask;
+}
+
+static void gen_bitfield_ins(TCGv offset, TCGv width, TCGv src,
+                                 TCGv_i64 val)
+{
+    TCGv_i64 insert;
+    TCGv_i64 shift;
+    TCGv tmp;
+
+    tmp = tcg_temp_new_i32();
+
+    /* tmp = (1u << width) - 1; */
+
+    /* width is between 1 and 32
+     * tcg_gen_shl_i32() cannot manage value 32
+     */
+    tcg_gen_subi_i32(tmp, width, 1);
+    tcg_gen_shl_i32(tmp, tcg_const_i32(2), tmp);
+    tcg_gen_subi_i32(tmp, tmp, 1);
+
+    /* tmp = tmp & src; */
+
+    tcg_gen_and_i32(tmp, tmp, src);
+
+    /* insert = (i64)tmp; */
+
+    insert = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(insert, tmp);
+
+    /* tmp = 64 - (width + offset); */
+
+    tcg_gen_add_i32(tmp, offset, width);
+    tcg_gen_sub_i32(tmp, tcg_const_i32(64), tmp);
+    shift = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(shift, tmp);
+
+    /* insert <<= shift */
+
+    tcg_gen_shl_i64(insert, insert, shift);
+
+    /* val |=  select */
+
+    tcg_gen_or_i64(val, val, insert);
+}
+
+DISAS_INSN(bitfield_mem)
+{
+    uint16_t ext;
+    int op;
+    TCGv_i64 bitfield;
+    TCGv_i64 mask_bitfield;
+    TCGv mask_cc;
+    TCGv shift;
+    TCGv val;
+    TCGv src;
+    TCGv offset;
+    TCGv width;
+    TCGv reg;
+    TCGv tmp;
+
+    op = (insn >> 8) & 7;
+    ext = read_im16(env, s);
+    src = gen_lea(env, s, insn, OS_LONG);
+    if (IS_NULL_QREG(src)) {
+       gen_addr_fault(s);
+       return;
+    }
+
+    bitfield_param(ext, &offset, &width, &mask_cc);
+
+    /* adjust src and offset */
+
+    /* src += offset >> 3; */
+
+    tmp = tcg_temp_new_i32();
+    tcg_gen_shri_i32(tmp, offset, 3);
+    tcg_gen_add_i32(src, src, tmp);
+
+    /* offset &= 7; */
+
+    tcg_gen_andi_i32(offset, offset, 7);
+
+    /* load */
+
+    bitfield = tcg_temp_new_i64();
+    gen_helper_bitfield_load(bitfield, cpu_env, src, offset, width);
+
+    /* compute CC and move bitfield into a 32bit */
+
+    val = gen_bitfield_cc(s, offset, mask_cc, bitfield);
+
+    /* execute operation */
+
+    reg = DREG(ext, 12);
+
+    if (op == 7) {
+        TCGv tmp1;
+
+        tmp1 = tcg_temp_new_i32();
+        tcg_gen_sub_i32(tmp1, tcg_const_i32(32), width);
+        tcg_gen_shl_i32(tmp1, reg, tmp1);
+        tcg_gen_and_i32(tmp1, tmp1, mask_cc);
+        gen_logic_cc(s, tmp1, OS_LONG);
+
+        tcg_temp_free_i32(tmp1);
+    } else {
+        gen_logic_cc(s, val, OS_LONG);
+    }
+
+    switch (op) {
+    case 0: /* bftst */
+        break;
+    case 1: /* bfextu */
+        shift = tcg_temp_new_i32();
+        tcg_gen_sub_i32(shift, tcg_const_i32(32), width);
+        tcg_gen_shr_i32(reg, val, shift);
+        break;
+    case 2: /* bfchg */
+        mask_bitfield = gen_bitfield_mask(offset, width);
+        tcg_gen_xor_i64(bitfield, bitfield, mask_bitfield);
+        gen_helper_bitfield_store(cpu_env, src, offset, width, bitfield);
+        break;
+    case 3: /* bfexts */
+        shift = tcg_temp_new_i32();
+        tcg_gen_sub_i32(shift, tcg_const_i32(32), width);
+        tcg_gen_sar_i32(reg, val, shift);
+        break;
+    case 4: /* bfclr */
+        mask_bitfield = gen_bitfield_mask(offset, width);
+        tcg_gen_not_i64(mask_bitfield, mask_bitfield);
+        tcg_gen_and_i64(bitfield, bitfield, mask_bitfield);
+        gen_helper_bitfield_store(cpu_env, src, offset, width, bitfield);
+        break;
+    case 5: /* bfffo */
+        gen_helper_bfffo(val, val, width);
+        tcg_gen_add_i32(reg, val, offset);
+        break;
+    case 6: /* bfset */
+        mask_bitfield = gen_bitfield_mask(offset, width);
+        tcg_gen_or_i64(bitfield, bitfield, mask_bitfield);
+        gen_helper_bitfield_store(cpu_env, src, offset, width, bitfield);
+        break;
+    case 7: /* bfins */
+        /* clear */
+        mask_bitfield = gen_bitfield_mask(offset, width);
+        tcg_gen_not_i64(mask_bitfield, mask_bitfield);
+        tcg_gen_and_i64(bitfield, bitfield, mask_bitfield);
+        /* insert */
+        gen_bitfield_ins(offset, width, reg, bitfield);
+        gen_helper_bitfield_store(cpu_env, src, offset, width, bitfield);
+        break;
+    }
+}
+
 DISAS_INSN(ff1)
 {
     TCGv reg;
@@ -3296,6 +3849,7 @@  void register_m68k_insns (CPUM68KState *env)
     INSN(not,       4600, ff00, M68000);
     INSN(undef,     46c0, ffc0, M68000);
     INSN(move_to_sr, 46c0, ffc0, CF_ISA_A);
+    INSN(nbcd,      4800, ffc0, M68000);
     BASE(pea,       4840, ffc0);
     BASE(swap,      4840, fff8);
     INSN(bkpt,      4848, fff8, M68000);
@@ -3350,6 +3904,8 @@  void register_m68k_insns (CPUM68KState *env)
     INSN(mvzs,      7100, f100, CF_ISA_B);
     BASE(or,        8000, f000);
     BASE(divw,      80c0, f0c0);
+    INSN(sbcd_reg,  8100, f1f8, M68000);
+    INSN(sbcd_mem,  8108, f1f8, M68000);
     BASE(addsub,    9000, f000);
     INSN(undef,     90c0, f0c0, CF_ISA_A);
     INSN(subx_reg,  9180, f1f8, CF_ISA_A);
@@ -3382,6 +3938,8 @@  void register_m68k_insns (CPUM68KState *env)
     INSN(eor,       b180, f1c0, CF_ISA_A);
     BASE(and,       c000, f000);
     BASE(mulw,      c0c0, f0c0);
+    INSN(abcd_reg,  c100, f1f8, M68000);
+    INSN(abcd_mem,  c108, f1f8, M68000);
     BASE(addsub,    d000, f000);
     INSN(undef,     d0c0, f0c0, CF_ISA_A);
     INSN(addx_reg,      d180, f1f8, CF_ISA_A);
@@ -3391,6 +3949,8 @@  void register_m68k_insns (CPUM68KState *env)
     INSN(adda,      d0c0, f0c0, M68000);
     INSN(shift_im,  e080, f0f0, CF_ISA_A);
     INSN(shift_reg, e0a0, f0f0, CF_ISA_A);
+    INSN(bitfield_mem,e8c0, f8c0, BITFIELD);
+    INSN(bitfield_reg,e8c0, f8f8, BITFIELD);
     INSN(undef_fpu, f000, f000, CF_ISA_A);
     INSN(fpu,       f200, ffc0, CF_FPU);
     INSN(fbcc,      f280, ffc0, CF_FPU);