diff mbox series

[v4,08/11] Hexagon (target/hexagon) Add overrides for compound compare and jump

Message ID 20221108040552.22175-9-tsimpson@quicinc.com (mailing list archive)
State New, archived
Headers show
Series Hexagon (target/hexagon) performance and bug fixes | expand

Commit Message

Taylor Simpson Nov. 8, 2022, 4:05 a.m. UTC
Acked-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/gen_tcg.h | 177 +++++++++++++++++++++++++++++++++++++++
 target/hexagon/genptr.c  |  90 ++++++++++++++++++++
 2 files changed, 267 insertions(+)
diff mbox series

Patch

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 1bdc787a02..506b454e4e 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -620,6 +620,183 @@ 
 #define fGEN_TCG_J2_callf(SHORTCODE) \
     gen_cond_call(ctx, PuV, TCG_COND_NE, riV)
 
+/*
+ * Compound compare and jump instructions
+ * Here is a primer to understand the tag names
+ *
+ * Comparison
+ *      cmpeqi   compare equal to an immediate
+ *      cmpgti   compare greater than an immediate
+ *      cmpgtiu  compare greater than an unsigned immediate
+ *      cmpeqn1  compare equal to negative 1
+ *      cmpgtn1  compare greater than negative 1
+ *      cmpeq    compare equal (two registers)
+ *      cmpgtu   compare greater than unsigned (two registers)
+ *      tstbit0  test bit zero
+ *
+ * Condition
+ *      tp0      p0 is true     p0 = cmp.eq(r0,#5); if (p0.new) jump:nt address
+ *      fp0      p0 is false    p0 = cmp.eq(r0,#5); if (!p0.new) jump:nt address
+ *      tp1      p1 is true     p1 = cmp.eq(r0,#5); if (p1.new) jump:nt address
+ *      fp1      p1 is false    p1 = cmp.eq(r0,#5); if (!p1.new) jump:nt address
+ *
+ * Prediction (not modelled in qemu)
+ *      _nt      not taken
+ *      _t       taken
+ */
+#define fGEN_TCG_J4_cmpeq_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 0, TCG_COND_EQ, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 0, TCG_COND_EQ, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 0, TCG_COND_EQ, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 0, TCG_COND_EQ, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 1, TCG_COND_EQ, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 1, TCG_COND_EQ, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 1, TCG_COND_EQ, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpeq_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 1, TCG_COND_EQ, RsV, RtV, riV)
+
+#define fGEN_TCG_J4_cmpgt_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 0, TCG_COND_GT, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 0, TCG_COND_GT, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 0, TCG_COND_GT, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 0, TCG_COND_GT, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 1, TCG_COND_GT, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 1, TCG_COND_GT, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 1, TCG_COND_GT, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgt_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 1, TCG_COND_GT, RsV, RtV, riV)
+
+#define fGEN_TCG_J4_cmpgtu_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 0, TCG_COND_GTU, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 0, TCG_COND_GTU, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 0, TCG_COND_GTU, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 0, TCG_COND_GTU, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 1, TCG_COND_GTU, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_t(ctx, 1, TCG_COND_GTU, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 1, TCG_COND_GTU, RsV, RtV, riV)
+#define fGEN_TCG_J4_cmpgtu_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_jmp_f(ctx, 1, TCG_COND_GTU, RsV, RtV, riV)
+
+#define fGEN_TCG_J4_cmpeqi_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 0, TCG_COND_EQ, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 0, TCG_COND_EQ, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 0, TCG_COND_EQ, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 0, TCG_COND_EQ, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 1, TCG_COND_EQ, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 1, TCG_COND_EQ, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 1, TCG_COND_EQ, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpeqi_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 1, TCG_COND_EQ, RsV, UiV, riV)
+
+#define fGEN_TCG_J4_cmpgti_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 0, TCG_COND_GT, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 0, TCG_COND_GT, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 0, TCG_COND_GT, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 0, TCG_COND_GT, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 1, TCG_COND_GT, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 1, TCG_COND_GT, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 1, TCG_COND_GT, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgti_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 1, TCG_COND_GT, RsV, UiV, riV)
+
+#define fGEN_TCG_J4_cmpgtui_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 0, TCG_COND_GTU, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 0, TCG_COND_GTU, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 0, TCG_COND_GTU, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 0, TCG_COND_GTU, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 1, TCG_COND_GTU, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_t(ctx, 1, TCG_COND_GTU, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 1, TCG_COND_GTU, RsV, UiV, riV)
+#define fGEN_TCG_J4_cmpgtui_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmpi_jmp_f(ctx, 1, TCG_COND_GTU, RsV, UiV, riV)
+
+#define fGEN_TCG_J4_cmpeqn1_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_t(ctx, 0, TCG_COND_EQ, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_t(ctx, 0, TCG_COND_EQ, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_f(ctx, 0, TCG_COND_EQ, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_f(ctx, 0, TCG_COND_EQ, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_t(ctx, 1, TCG_COND_EQ, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_t(ctx, 1, TCG_COND_EQ, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_f(ctx, 1, TCG_COND_EQ, RsV, riV)
+#define fGEN_TCG_J4_cmpeqn1_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_f(ctx, 1, TCG_COND_EQ, RsV, riV)
+
+#define fGEN_TCG_J4_cmpgtn1_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_t(ctx, 0, TCG_COND_GT, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_t(ctx, 0, TCG_COND_GT, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_f(ctx, 0, TCG_COND_GT, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_f(ctx, 0, TCG_COND_GT, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_t(ctx, 1, TCG_COND_GT, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_t(ctx, 1, TCG_COND_GT, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_f(ctx, 1, TCG_COND_GT, RsV, riV)
+#define fGEN_TCG_J4_cmpgtn1_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_cmp_n1_jmp_f(ctx, 1, TCG_COND_GT, RsV, riV)
+
+#define fGEN_TCG_J4_tstbit0_tp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 0, RsV, TCG_COND_EQ, riV)
+#define fGEN_TCG_J4_tstbit0_tp0_jump_t(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 0, RsV, TCG_COND_EQ, riV)
+#define fGEN_TCG_J4_tstbit0_fp0_jump_nt(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 0, RsV, TCG_COND_NE, riV)
+#define fGEN_TCG_J4_tstbit0_fp0_jump_t(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 0, RsV, TCG_COND_NE, riV)
+#define fGEN_TCG_J4_tstbit0_tp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 1, RsV, TCG_COND_EQ, riV)
+#define fGEN_TCG_J4_tstbit0_tp1_jump_t(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 1, RsV, TCG_COND_EQ, riV)
+#define fGEN_TCG_J4_tstbit0_fp1_jump_nt(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 1, RsV, TCG_COND_NE, riV)
+#define fGEN_TCG_J4_tstbit0_fp1_jump_t(SHORTCODE) \
+    gen_cmpnd_tstbit0_jmp(ctx, 1, RsV, TCG_COND_NE, riV)
+
 #define fGEN_TCG_J2_pause(SHORTCODE) \
     do { \
         uiV = uiV; \
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index ffaea5a639..00df8851a2 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -487,6 +487,96 @@  static void gen_write_new_pc_pcrel(DisasContext *ctx, int pc_off,
     gen_write_new_pc_addr(ctx, tcg_constant_tl(dest), cond, pred);
 }
 
+static void gen_compare(TCGCond cond, TCGv res, TCGv arg1, TCGv arg2)
+{
+    TCGv one = tcg_constant_tl(0xff);
+    TCGv zero = tcg_constant_tl(0);
+
+    tcg_gen_movcond_tl(cond, res, arg1, arg2, one, zero);
+}
+
+static void gen_cond_jump(DisasContext *ctx, TCGCond cond, TCGv pred,
+                          int pc_off)
+{
+    gen_write_new_pc_pcrel(ctx, pc_off, cond, pred);
+}
+
+static void gen_cmpnd_cmp_jmp(DisasContext *ctx,
+                              int pnum, TCGCond cond1, TCGv arg1, TCGv arg2,
+                              TCGCond cond2, int pc_off)
+{
+    if (ctx->insn->part1) {
+        TCGv pred = tcg_temp_new();
+        gen_compare(cond1, pred, arg1, arg2);
+        gen_log_pred_write(ctx, pnum, pred);
+        tcg_temp_free(pred);
+    } else {
+        TCGv pred = tcg_temp_new();
+        tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]);
+        gen_cond_jump(ctx, cond2, pred, pc_off);
+        tcg_temp_free(pred);
+    }
+}
+
+static void gen_cmpnd_cmp_jmp_t(DisasContext *ctx,
+                                int pnum, TCGCond cond, TCGv arg1, TCGv arg2,
+                                int pc_off)
+{
+    gen_cmpnd_cmp_jmp(ctx, pnum, cond, arg1, arg2, TCG_COND_EQ, pc_off);
+}
+
+static void gen_cmpnd_cmp_jmp_f(DisasContext *ctx,
+                                int pnum, TCGCond cond, TCGv arg1, TCGv arg2,
+                                int pc_off)
+{
+    gen_cmpnd_cmp_jmp(ctx, pnum, cond, arg1, arg2, TCG_COND_NE, pc_off);
+}
+
+static void gen_cmpnd_cmpi_jmp_t(DisasContext *ctx,
+                                 int pnum, TCGCond cond, TCGv arg1, int arg2,
+                                 int pc_off)
+{
+    TCGv tmp = tcg_constant_tl(arg2);
+    gen_cmpnd_cmp_jmp(ctx, pnum, cond, arg1, tmp, TCG_COND_EQ, pc_off);
+}
+
+static void gen_cmpnd_cmpi_jmp_f(DisasContext *ctx,
+                                 int pnum, TCGCond cond, TCGv arg1, int arg2,
+                                 int pc_off)
+{
+    TCGv tmp = tcg_constant_tl(arg2);
+    gen_cmpnd_cmp_jmp(ctx, pnum, cond, arg1, tmp, TCG_COND_NE, pc_off);
+}
+
+static void gen_cmpnd_cmp_n1_jmp_t(DisasContext *ctx, int pnum, TCGCond cond,
+                                   TCGv arg, int pc_off)
+{
+    gen_cmpnd_cmpi_jmp_t(ctx, pnum, cond, arg, -1, pc_off);
+}
+
+static void gen_cmpnd_cmp_n1_jmp_f(DisasContext *ctx, int pnum, TCGCond cond,
+                                   TCGv arg, int pc_off)
+{
+    gen_cmpnd_cmpi_jmp_f(ctx, pnum, cond, arg, -1, pc_off);
+}
+
+static void gen_cmpnd_tstbit0_jmp(DisasContext *ctx,
+                                  int pnum, TCGv arg, TCGCond cond, int pc_off)
+{
+    if (ctx->insn->part1) {
+        TCGv pred = tcg_temp_new();
+        tcg_gen_andi_tl(pred, arg, 1);
+        gen_8bitsof(pred, pred);
+        gen_log_pred_write(ctx, pnum, pred);
+        tcg_temp_free(pred);
+    } else {
+        TCGv pred = tcg_temp_new();
+        tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]);
+        gen_cond_jump(ctx, cond, pred, pc_off);
+        tcg_temp_free(pred);
+    }
+}
+
 static void gen_call(DisasContext *ctx, int pc_off)
 {
     TCGv next_PC =