diff mbox series

[15/30] tcg/loongarch: Implement clz/ctz ops

Message ID 20210920080451.408655-16-git@xen0n.name (mailing list archive)
State New, archived
Headers show
Series 64-bit LoongArch port of QEMU TCG | expand

Commit Message

WANG Xuerui Sept. 20, 2021, 8:04 a.m. UTC
Signed-off-by: WANG Xuerui <git@xen0n.name>
---
 tcg/loongarch/tcg-target-con-set.h |  1 +
 tcg/loongarch/tcg-target.c.inc     | 31 ++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

Comments

Richard Henderson Sept. 20, 2021, 4:10 p.m. UTC | #1
On 9/20/21 1:04 AM, WANG Xuerui wrote:
> +static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
> +                           TCGReg a0, TCGReg a1, TCGReg a2)
> +{
> +    /* all clz/ctz insns belong to DJ-format */
> +    tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1));
> +    /* a0 = a1 ? REG_TMP0 : a2 */
> +    tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1);
> +    tcg_out_opc_masknez(s, a0, a2, a1);
> +    tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
> +}

 From Song Gao's translation, I believe that ctz(0) == 32 or 64, depending on the 
operation width.  This is in fact the most common result, so it's worth specializing.  See 
tcg/i386/tcg-target.c.inc, tcg_out_clz, have_lzcnt.

But what's here looks ok.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
diff mbox series

Patch

diff --git a/tcg/loongarch/tcg-target-con-set.h b/tcg/loongarch/tcg-target-con-set.h
index b0751c4bb0..417c97549a 100644
--- a/tcg/loongarch/tcg-target-con-set.h
+++ b/tcg/loongarch/tcg-target-con-set.h
@@ -18,4 +18,5 @@  C_O0_I1(r)
 C_O1_I1(r, r)
 C_O1_I2(r, r, r)
 C_O1_I2(r, r, rU)
+C_O1_I2(r, r, rZ)
 C_O1_I2(r, 0, rZ)
diff --git a/tcg/loongarch/tcg-target.c.inc b/tcg/loongarch/tcg-target.c.inc
index d617b833e5..e817964a7e 100644
--- a/tcg/loongarch/tcg-target.c.inc
+++ b/tcg/loongarch/tcg-target.c.inc
@@ -362,6 +362,17 @@  static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
     tcg_out_opc_addi_w(s, ret, arg, 0);
 }
 
+static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
+                           TCGReg a0, TCGReg a1, TCGReg a2)
+{
+    /* all clz/ctz insns belong to DJ-format */
+    tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1));
+    /* a0 = a1 ? REG_TMP0 : a2 */
+    tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1);
+    tcg_out_opc_masknez(s, a0, a2, a1);
+    tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
+}
+
 /*
  * Entry-points
  */
@@ -488,6 +499,20 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_opc_revb_d(s, a0, a1);
         break;
 
+    case INDEX_op_clz_i32:
+        tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2);
+        break;
+    case INDEX_op_clz_i64:
+        tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2);
+        break;
+
+    case INDEX_op_ctz_i32:
+        tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2);
+        break;
+    case INDEX_op_ctz_i64:
+        tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2);
+        break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     default:
@@ -541,6 +566,12 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         /* LoongArch reg-imm bitops have their imms ZERO-extended */
         return C_O1_I2(r, r, rU);
 
+    case INDEX_op_clz_i32:
+    case INDEX_op_clz_i64:
+    case INDEX_op_ctz_i32:
+    case INDEX_op_ctz_i64:
+        return C_O1_I2(r, r, rZ);
+
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
         /* Must deposit into the same register as input */