diff mbox series

[v3,08/11] Hexagon (target/hexagon) Add overrides for compound compare and jump

Message ID 20221104192631.29434-9-tsimpson@quicinc.com (mailing list archive)
State New, archived
Headers show
Series Hexagon (target/hexagon) performance and bug fixes | expand

Commit Message

Taylor Simpson Nov. 4, 2022, 7:26 p.m. UTC
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/gen_tcg.h | 177 +++++++++++++++++++++++++++++++++++++++
 target/hexagon/genptr.c  |  72 ++++++++++++++++
 2 files changed, 249 insertions(+)

Comments

Richard Henderson Nov. 5, 2022, 12:15 a.m. UTC | #1
On 11/5/22 06:26, Taylor Simpson wrote:
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---
>   target/hexagon/gen_tcg.h | 177 +++++++++++++++++++++++++++++++++++++++
>   target/hexagon/genptr.c  |  72 ++++++++++++++++
>   2 files changed, 249 insertions(+)

Acked-by: Richard Henderson <richard.henderson@linaro.org>

r~
diff mbox series

Patch

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index fe0a5e9c13..8b311d16e0 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -620,6 +620,183 @@ 
 #define fGEN_TCG_J2_callf(SHORTCODE) \
     gen_cond_call(ctx, PuV, false, riV)
 
+/*
+ * Compound compare and jump instructions
+ * Here is a primer to understand the tag names
+ *
+ * Comparison
+ *      cmpeqi   compare equal to an immediate
+ *      cmpgti   compare greater than an immediate
+ *      cmpgtiu  compare greater than an unsigned immediate
+ *      cmpeqn1  compare equal to negative 1
+ *      cmpgtn1  compare greater than negative 1
+ *      cmpeq    compare equal (two registers)
+ *      cmpgtu   compare greater than unsigned (two registers)
+ *      tstbit0  test bit zero
+ *
+ * Condition
+ *      tp0      p0 is true     p0 = cmp.eq(r0,#5); if (p0.new) jump:nt address
+ *      fp0      p0 is false    p0 = cmp.eq(r0,#5); if (!p0.new) jump:nt address
+ *      tp1      p1 is true     p1 = cmp.eq(r0,#5); if (p1.new) jump:nt address
+ *      fp1      p1 is false    p1 = cmp.eq(r0,#5); if (!p1.new) jump:nt address
+ *
+ * Prediction (not modelled in qemu)
+ *      _nt      not taken
+ *      _t       taken
+ */
+#define fGEN_TCG_J4_cmpeq_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_EQ, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_EQ, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_EQ, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_EQ, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_EQ, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_EQ, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_EQ, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_EQ, false, RsV, RtV, riV)
+
+#define fGEN_TCG_J4_cmpgt_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_GT, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_GT, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_GT, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_GT, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_GT, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_GT, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_GT, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_GT, false, RsV, RtV, riV)
+
+#define fGEN_TCG_J4_cmpgtu_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_GTU, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_GTU, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_GTU, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 0, TCG_COND_GTU, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_GTU, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_GTU, true, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_GTU, false, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp(ctx, 1, TCG_COND_GTU, false, RsV, RtV, riV)
+
+#define fGEN_TCG_J4_cmpeqi_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_EQ, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_EQ, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_EQ, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_EQ, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_EQ, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_EQ, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_EQ, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_EQ, false, RsV, UiV, riV)
+
+#define fGEN_TCG_J4_cmpgti_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_GT, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_GT, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_GT, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_GT, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_GT, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_GT, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_GT, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_GT, false, RsV, UiV, riV)
+
+#define fGEN_TCG_J4_cmpgtui_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_GTU, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_GTU, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_GTU, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 0, TCG_COND_GTU, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_GTU, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_GTU, true, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_GTU, false, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp(ctx, 1, TCG_COND_GTU, false, RsV, UiV, riV)
+
+#define fGEN_TCG_J4_cmpeqn1_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 0, TCG_COND_EQ, true, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 0, TCG_COND_EQ, true, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 0, TCG_COND_EQ, false, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 0, TCG_COND_EQ, false, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 1, TCG_COND_EQ, true, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 1, TCG_COND_EQ, true, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 1, TCG_COND_EQ, false, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 1, TCG_COND_EQ, false, RsV, riV)
+
+#define fGEN_TCG_J4_cmpgtn1_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 0, TCG_COND_GT, true, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 0, TCG_COND_GT, true, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 0, TCG_COND_GT, false, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 0, TCG_COND_GT, false, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 1, TCG_COND_GT, true, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 1, TCG_COND_GT, true, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 1, TCG_COND_GT, false, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp(ctx, 1, TCG_COND_GT, false, RsV, riV)
+
+#define fGEN_TCG_J4_tstbit0_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 0, true, RsV, riV)
+#define fGEN_TCG_J4_tstbit0_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 0, true, RsV, riV)
+#define fGEN_TCG_J4_tstbit0_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 0, false, RsV, riV)
+#define fGEN_TCG_J4_tstbit0_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 0, false, RsV, riV)
+#define fGEN_TCG_J4_tstbit0_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 1, true, RsV, riV)
+#define fGEN_TCG_J4_tstbit0_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 1, true, RsV, riV)
+#define fGEN_TCG_J4_tstbit0_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 1, false, RsV, riV)
+#define fGEN_TCG_J4_tstbit0_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 1, false, RsV, riV)
+
 #define fGEN_TCG_J2_pause(SHORTCODE) \
     do { \
         uiV = uiV; \
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 88e6898027..a2a088cd25 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -499,6 +499,78 @@  static void gen_set_usr_fieldi(int field, int x)
     gen_set_usr_field(field, val);
 }
 
+static void gen_compare(TCGCond cond, TCGv res, TCGv arg1, TCGv arg2)
+{
+    TCGv one = tcg_constant_tl(0xff);
+    TCGv zero = tcg_constant_tl(0);
+
+    tcg_gen_movcond_tl(cond, res, arg1, arg2, one, zero);
+}
+
+static void gen_cond_jump(DisasContext *ctx, TCGv pred, int pc_off)
+{
+    gen_write_new_pc_pcrel(ctx, pc_off, pred);
+}
+
+static void gen_cmpnd_cmp_jmp(DisasContext *ctx,
+                              int pnum, TCGCond cond,
+                              bool sense, TCGv arg1, TCGv arg2,
+                              int pc_off)
+{
+    if (ctx->insn->part1) {
+        TCGv pred = tcg_temp_new();
+        gen_compare(cond, pred, arg1, arg2);
+        gen_log_pred_write(ctx, pnum, pred);
+        tcg_temp_free(pred);
+    } else {
+        TCGv pred = tcg_temp_new();
+
+        tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]);
+        if (!sense) {
+            tcg_gen_xori_tl(pred, pred, 0xff);
+        }
+
+        gen_cond_jump(ctx, pred, pc_off);
+
+        tcg_temp_free(pred);
+    }
+}
+
+static void gen_cmpnd_cmpi_jmp(DisasContext *ctx,
+                               int pnum, TCGCond cond,
+                               bool sense, TCGv arg1, int arg2,
+                               int pc_off)
+{
+    TCGv tmp = tcg_constant_tl(arg2);
+    gen_cmpnd_cmp_jmp(ctx, pnum, cond, sense, arg1, tmp, pc_off);
+}
+
+static void gen_cmpnd_cmp_n1_jmp(DisasContext *ctx, int pnum, TCGCond cond,
+                                 bool sense, TCGv arg, int pc_off)
+{
+    gen_cmpnd_cmpi_jmp(ctx, pnum, cond, sense, arg, -1, pc_off);
+}
+
+static void gen_cmpnd_tstbit0_jmp(DisasContext *ctx,
+                                  int pnum, bool sense, TCGv arg, int pc_off)
+{
+    if (ctx->insn->part1) {
+        TCGv pred = tcg_temp_new();
+        tcg_gen_andi_tl(pred, arg, 1);
+        gen_8bitsof(pred, pred);
+        gen_log_pred_write(ctx, pnum, pred);
+        tcg_temp_free(pred);
+    } else {
+        TCGv pred = tcg_temp_new();
+        tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]);
+        if (!sense) {
+            tcg_gen_xori_tl(pred, pred, 0xff);
+        }
+        gen_cond_jump(ctx, pred, pc_off);
+        tcg_temp_free(pred);
+    }
+}
+
 static void gen_call(DisasContext *ctx, int pc_off)
 {
     TCGv next_PC =