@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -98,7 +98,6 @@ typedef struct CPUArchState {
target_ulong pred_written;
MemLog mem_log_stores[STORES_MAX];
- target_ulong pkt_has_store_s1;
target_ulong dczero_addr;
float_status fp_status;
@@ -508,6 +508,60 @@
#define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \
fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN))
+/*
+ * dealloc_return
+ * Assembler mapped to
+ * r31:30 = dealloc_return(r30):raw
+ */
+#define fGEN_TCG_L4_return(SHORTCODE) \
+ gen_return(ctx, RddV, RsV)
+
+/*
+ * sub-instruction version (no RddV, so handle it manually)
+ */
+#define fGEN_TCG_SL2_return(SHORTCODE) \
+ do { \
+ TCGv_i64 RddV = tcg_temp_new_i64(); \
+ gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \
+ gen_log_reg_write_pair(HEX_REG_FP, RddV); \
+ tcg_temp_free_i64(RddV); \
+ } while (0)
+
+/*
+ * Conditional returns follow this naming convention
+ * _t predicate true
+ * _f predicate false
+ * _tnew_pt predicate.new true predict taken
+ * _fnew_pt predicate.new false predict taken
+ * _tnew_pnt predicate.new true predict not taken
+ * _fnew_pnt predicate.new false predict not taken
+ * Predictions are not modelled in QEMU
+ *
+ * Example:
+ * if (p1) r31:30 = dealloc_return(r30):raw
+ */
+#define fGEN_TCG_L4_return_t(SHORTCODE) \
+ gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_EQ);
+#define fGEN_TCG_L4_return_f(SHORTCODE) \
+ gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_NE)
+#define fGEN_TCG_L4_return_tnew_pt(SHORTCODE) \
+ gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
+#define fGEN_TCG_L4_return_fnew_pt(SHORTCODE) \
+ gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
+#define fGEN_TCG_L4_return_tnew_pnt(SHORTCODE) \
+ gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
+#define fGEN_TCG_L4_return_fnew_pnt(SHORTCODE) \
+ gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
+
+#define fGEN_TCG_SL2_return_t(SHORTCODE) \
+ gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_pred[0])
+#define fGEN_TCG_SL2_return_f(SHORTCODE) \
+ gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0])
+#define fGEN_TCG_SL2_return_tnew(SHORTCODE) \
+ gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0])
+#define fGEN_TCG_SL2_return_fnew(SHORTCODE) \
+ gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0])
+
/*
* Mathematical operations with more than one definition require
* special handling
@@ -746,6 +746,92 @@ static void gen_cond_callr(DisasContext *ctx,
gen_set_label(skip);
}
+/* frame ^= (int64_t)FRAMEKEY << 32 */
+static void gen_frame_unscramble(TCGv_i64 frame)
+{
+ TCGv_i64 framekey = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(framekey, hex_gpr[HEX_REG_FRAMEKEY]);
+ tcg_gen_shli_i64(framekey, framekey, 32);
+ tcg_gen_xor_i64(frame, frame, framekey);
+ tcg_temp_free_i64(framekey);
+}
+
+static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA)
+{
+ Insn *insn = ctx->insn; /* Needed for CHECK_NOSHUF */
+ CHECK_NOSHUF(EA, 8);
+ tcg_gen_qemu_ld64(frame, EA, ctx->mem_idx);
+}
+
+static void gen_return_base(DisasContext *ctx, TCGv_i64 dst, TCGv src,
+ TCGv r29)
+{
+ /*
+ * frame = *src
+ * dst = frame_unscramble(frame)
+ * SP = src + 8
+ * PC = dst.w[1]
+ */
+ TCGv_i64 frame = tcg_temp_new_i64();
+ TCGv r31 = tcg_temp_new();
+
+ gen_load_frame(ctx, frame, src);
+ gen_frame_unscramble(frame);
+ tcg_gen_mov_i64(dst, frame);
+ tcg_gen_addi_tl(r29, src, 8);
+ tcg_gen_extrh_i64_i32(r31, dst);
+ gen_jumpr(ctx, r31);
+
+ tcg_temp_free_i64(frame);
+ tcg_temp_free(r31);
+}
+
+static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src)
+{
+ TCGv r29 = tcg_temp_new();
+ gen_return_base(ctx, dst, src, r29);
+ gen_log_reg_write(HEX_REG_SP, r29);
+ tcg_temp_free(r29);
+}
+
+/* if (pred) dst = dealloc_return(src):raw */
+static void gen_cond_return(DisasContext *ctx, TCGv_i64 dst, TCGv src,
+ TCGv pred, TCGCond cond)
+{
+ TCGv LSB = tcg_temp_new();
+ TCGv mask = tcg_temp_new();
+ TCGv r29 = tcg_temp_local_new();
+ TCGLabel *skip = gen_new_label();
+ tcg_gen_andi_tl(LSB, pred, 1);
+
+ /* Initialize the results in case the predicate is false */
+ tcg_gen_movi_i64(dst, 0);
+ tcg_gen_movi_tl(r29, 0);
+
+ /* Set the bit in hex_slot_cancelled if the predicate is flase */
+ tcg_gen_movi_tl(mask, 1 << ctx->insn->slot);
+ tcg_gen_or_tl(mask, hex_slot_cancelled, mask);
+ tcg_gen_movcond_tl(cond, hex_slot_cancelled, LSB, tcg_constant_tl(0),
+ mask, hex_slot_cancelled);
+ tcg_temp_free(mask);
+
+ tcg_gen_brcondi_tl(cond, LSB, 0, skip);
+ tcg_temp_free(LSB);
+ gen_return_base(ctx, dst, src, r29);
+ gen_set_label(skip);
+ gen_log_predicated_reg_write(HEX_REG_SP, r29, ctx->insn->slot);
+ tcg_temp_free(r29);
+}
+
+/* sub-instruction version (no RddV, so handle it manually) */
+static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred)
+{
+ TCGv_i64 RddV = tcg_temp_local_new_i64();
+ gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond);
+ gen_log_predicated_reg_write_pair(HEX_REG_FP, RddV, ctx->insn->slot);
+ tcg_temp_free_i64(RddV);
+}
+
static void gen_endloop0(DisasContext *ctx)
{
TCGv lpcfg = tcg_temp_local_new();
@@ -105,30 +105,6 @@ void log_store64(CPUHexagonState *env, target_ulong addr,
env->mem_log_stores[slot].data64 = val;
}
-void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof,
- target_ulong addr)
-{
- HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
-
- if (pkt_has_multi_cof) {
- /*
- * If more than one branch is taken in a packet, only the first one
- * is actually done.
- */
- if (env->branch_taken) {
- HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
- "ignoring the second one\n");
- } else {
- fCHECK_PCALIGN(addr);
- env->gpr[HEX_REG_PC] = addr;
- env->branch_taken = 1;
- }
- } else {
- fCHECK_PCALIGN(addr);
- env->gpr[HEX_REG_PC] = addr;
- }
-}
-
/* Handy place to set a breakpoint */
void HELPER(debug_start_packet)(CPUHexagonState *env)
{
@@ -525,51 +501,6 @@ void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask,
}
}
-/*
- * mem_noshuf
- * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
- *
- * If the load is in slot 0 and there is a store in slot1 (that
- * wasn't cancelled), we have to do the store first.
- */
-static void check_noshuf(CPUHexagonState *env, uint32_t slot,
- target_ulong vaddr, int size)
-{
- if (slot == 0 && env->pkt_has_store_s1 &&
- ((env->slot_cancelled & (1 << 1)) == 0)) {
- HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX);
- HELPER(commit_store)(env, 1);
- }
-}
-
-uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
-{
- uintptr_t ra = GETPC();
- check_noshuf(env, slot, vaddr, 1);
- return cpu_ldub_data_ra(env, vaddr, ra);
-}
-
-uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
-{
- uintptr_t ra = GETPC();
- check_noshuf(env, slot, vaddr, 2);
- return cpu_lduw_data_ra(env, vaddr, ra);
-}
-
-uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
-{
- uintptr_t ra = GETPC();
- check_noshuf(env, slot, vaddr, 4);
- return cpu_ldl_data_ra(env, vaddr, ra);
-}
-
-uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
-{
- uintptr_t ra = GETPC();
- check_noshuf(env, slot, vaddr, 8);
- return cpu_ldq_data_ra(env, vaddr, ra);
-}
-
/* Floating point */
float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
{
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -42,7 +42,6 @@ TCGv hex_store_addr[STORES_MAX];
TCGv hex_store_width[STORES_MAX];
TCGv hex_store_val32[STORES_MAX];
TCGv_i64 hex_store_val64[STORES_MAX];
-TCGv hex_pkt_has_store_s1;
TCGv hex_dczero_addr;
TCGv hex_llsc_addr;
TCGv hex_llsc_val;
@@ -287,7 +286,6 @@ static void gen_start_packet(DisasContext *ctx)
for (i = 0; i < STORES_MAX; i++) {
ctx->store_width[i] = 0;
}
- tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
ctx->s1_store_processed = false;
ctx->pre_commit = true;
@@ -1026,8 +1024,6 @@ void hexagon_translate_init(void)
offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
hex_branch_taken = tcg_global_mem_new(cpu_env,
offsetof(CPUHexagonState, branch_taken), "branch_taken");
- hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env,
- offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1");
hex_dczero_addr = tcg_global_mem_new(cpu_env,
offsetof(CPUHexagonState, dczero_addr), "dczero_addr");
hex_llsc_addr = tcg_global_mem_new(cpu_env,
These instructions perform a deallocframe+return (jumpr r31) Add overrides for L4_return SL2_return L4_return_t L4_return_f L4_return_tnew_pt L4_return_fnew_pt L4_return_tnew_pnt L4_return_fnew_pnt SL2_return_t SL2_return_f SL2_return_tnew SL2_return_fnew This patch eliminates the last helper that uses write_new_pc, so we remove it from op_helper.c This patch also eliminates the last helper for load instructions, so we remove the pkt_has_store_s1 runtime field as well as the mem_load[1248] functions. Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> --- target/hexagon/cpu.h | 3 +- target/hexagon/gen_tcg.h | 54 ++++++++++++++++++++++++ target/hexagon/genptr.c | 86 ++++++++++++++++++++++++++++++++++++++ target/hexagon/op_helper.c | 69 ------------------------------ target/hexagon/translate.c | 6 +-- 5 files changed, 142 insertions(+), 76 deletions(-)