@@ -110,11 +110,8 @@ typedef struct CPUArchState {
MMVector future_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
MMVector tmp_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
- VRegMask VRegs_updated;
-
MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16);
MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16);
- QRegMask QRegs_updated;
/* Temporaries used within instructions */
MMVectorPair VuuV QEMU_ALIGNED(16);
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -133,17 +133,12 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
do { \
TCGv lsb = tcg_temp_new(); \
TCGLabel *false_label = gen_new_label(); \
- TCGLabel *end_label = gen_new_label(); \
tcg_gen_andi_tl(lsb, PsV, 1); \
tcg_gen_brcondi_tl(TCG_COND_NE, lsb, PRED, false_label); \
tcg_temp_free(lsb); \
tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \
sizeof(MMVector), sizeof(MMVector)); \
- tcg_gen_br(end_label); \
gen_set_label(false_label); \
- tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
- 1 << insn->slot); \
- gen_set_label(end_label); \
} while (0)
@@ -560,18 +555,13 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
do { \
TCGv LSB = tcg_temp_new(); \
TCGLabel *false_label = gen_new_label(); \
- TCGLabel *end_label = gen_new_label(); \
GET_EA; \
PRED; \
tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \
tcg_temp_free(LSB); \
gen_vreg_load(ctx, DSTOFF, EA, true); \
INC; \
- tcg_gen_br(end_label); \
gen_set_label(false_label); \
- tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
- 1 << insn->slot); \
- gen_set_label(end_label); \
} while (0)
#define fGEN_TCG_PRED_VEC_LOAD_pred_pi \
@@ -731,18 +721,13 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
do { \
TCGv LSB = tcg_temp_new(); \
TCGLabel *false_label = gen_new_label(); \
- TCGLabel *end_label = gen_new_label(); \
GET_EA; \
PRED; \
tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \
tcg_temp_free(LSB); \
gen_vreg_store(ctx, EA, SRCOFF, insn->slot, ALIGN); \
INC; \
- tcg_gen_br(end_label); \
gen_set_label(false_label); \
- tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
- 1 << insn->slot); \
- gen_set_label(end_label); \
} while (0)
#define fGEN_TCG_PRED_VEC_STORE_pred_pi(ALIGN) \
@@ -49,7 +49,6 @@ typedef struct DisasContext {
int tmp_vregs_idx;
int tmp_vregs_num[VECTOR_TEMPS_MAX];
int vreg_log[NUM_VREGS];
- bool vreg_is_predicated[NUM_VREGS];
int vreg_log_idx;
DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
DECLARE_BITMAP(vregs_updated, NUM_VREGS);
@@ -57,7 +56,6 @@ typedef struct DisasContext {
DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
int qreg_log[NUM_QREGS];
- bool qreg_is_predicated[NUM_QREGS];
int qreg_log_idx;
bool pre_commit;
TCGCond branch_cond;
@@ -110,11 +108,12 @@ static inline void ctx_log_vreg_write(DisasContext *ctx,
bool is_predicated)
{
if (type != EXT_TMP) {
- ctx->vreg_log[ctx->vreg_log_idx] = rnum;
- ctx->vreg_is_predicated[ctx->vreg_log_idx] = is_predicated;
- ctx->vreg_log_idx++;
+ if (!test_bit(rnum, ctx->vregs_updated)) {
+ ctx->vreg_log[ctx->vreg_log_idx] = rnum;
+ ctx->vreg_log_idx++;
+ set_bit(rnum, ctx->vregs_updated);
+ }
- set_bit(rnum, ctx->vregs_updated);
if (is_predicated) {
set_bit(rnum, ctx->predicated_future_vregs);
}
@@ -139,10 +138,9 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
}
static inline void ctx_log_qreg_write(DisasContext *ctx,
- int rnum, bool is_predicated)
+ int rnum)
{
ctx->qreg_log[ctx->qreg_log_idx] = rnum;
- ctx->qreg_is_predicated[ctx->qreg_log_idx] = is_predicated;
ctx->qreg_log_idx++;
}
@@ -163,8 +161,6 @@ extern TCGv hex_dczero_addr;
extern TCGv hex_llsc_addr;
extern TCGv hex_llsc_val;
extern TCGv_i64 hex_llsc_val_i64;
-extern TCGv hex_VRegs_updated;
-extern TCGv hex_QRegs_updated;
extern TCGv hex_vstore_addr[VSTORES_MAX];
extern TCGv hex_vstore_size[VSTORES_MAX];
extern TCGv hex_vstore_pending[VSTORES_MAX];
@@ -1029,70 +1029,27 @@ static intptr_t vreg_src_off(DisasContext *ctx, int num)
}
static void gen_log_vreg_write(DisasContext *ctx, intptr_t srcoff, int num,
- VRegWriteType type, int slot_num,
- bool is_predicated)
+ VRegWriteType type)
{
- TCGLabel *label_end = NULL;
intptr_t dstoff;
- if (is_predicated) {
- TCGv cancelled = tcg_temp_local_new();
- label_end = gen_new_label();
-
- /* Don't do anything if the slot was cancelled */
- tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
- tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
- tcg_temp_free(cancelled);
- }
-
if (type != EXT_TMP) {
dstoff = ctx_future_vreg_off(ctx, num, 1, true);
tcg_gen_gvec_mov(MO_64, dstoff, srcoff,
sizeof(MMVector), sizeof(MMVector));
- tcg_gen_ori_tl(hex_VRegs_updated, hex_VRegs_updated, 1 << num);
} else {
dstoff = ctx_tmp_vreg_off(ctx, num, 1, false);
tcg_gen_gvec_mov(MO_64, dstoff, srcoff,
sizeof(MMVector), sizeof(MMVector));
}
-
- if (is_predicated) {
- gen_set_label(label_end);
- }
}
static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num,
- VRegWriteType type, int slot_num,
- bool is_predicated)
+ VRegWriteType type)
{
- gen_log_vreg_write(ctx, srcoff, num ^ 0, type, slot_num, is_predicated);
+ gen_log_vreg_write(ctx, srcoff, num ^ 0, type);
srcoff += sizeof(MMVector);
- gen_log_vreg_write(ctx, srcoff, num ^ 1, type, slot_num, is_predicated);
-}
-
-static void gen_log_qreg_write(intptr_t srcoff, int num, int vnew,
- int slot_num, bool is_predicated)
-{
- TCGLabel *label_end = NULL;
- intptr_t dstoff;
-
- if (is_predicated) {
- TCGv cancelled = tcg_temp_local_new();
- label_end = gen_new_label();
-
- /* Don't do anything if the slot was cancelled */
- tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
- tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
- tcg_temp_free(cancelled);
- }
-
- dstoff = offsetof(CPUHexagonState, future_QRegs[num]);
- tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMQReg), sizeof(MMQReg));
-
- if (is_predicated) {
- tcg_gen_ori_tl(hex_QRegs_updated, hex_QRegs_updated, 1 << num);
- gen_set_label(label_end);
- }
+ gen_log_vreg_write(ctx, srcoff, num ^ 1, type);
}
static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src,
@@ -55,8 +55,6 @@ TCGv hex_dczero_addr;
TCGv hex_llsc_addr;
TCGv hex_llsc_val;
TCGv_i64 hex_llsc_val_i64;
-TCGv hex_VRegs_updated;
-TCGv hex_QRegs_updated;
TCGv hex_vstore_addr[VSTORES_MAX];
TCGv hex_vstore_size[VSTORES_MAX];
TCGv hex_vstore_pending[VSTORES_MAX];
@@ -247,12 +245,11 @@ static bool check_for_attrib(Packet *pkt, int attrib)
static bool need_slot_cancelled(Packet *pkt)
{
- /* We only need slot_cancelled for conditional store and HVX instructions */
+ /* We only need slot_cancelled for conditional store instructions */
for (int i = 0; i < pkt->num_insns; i++) {
uint16_t opcode = pkt->insn[i].opcode;
if (GET_ATTRIB(opcode, A_CONDEXEC) &&
- (GET_ATTRIB(opcode, A_STORE) ||
- GET_ATTRIB(opcode, A_CVI))) {
+ GET_ATTRIB(opcode, A_SCALAR_STORE)) {
return true;
}
}
@@ -447,11 +444,6 @@ static void gen_start_packet(DisasContext *ctx)
i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
}
}
-
- if (pkt->pkt_has_hvx) {
- tcg_gen_movi_tl(hex_VRegs_updated, 0);
- tcg_gen_movi_tl(hex_QRegs_updated, 0);
- }
}
bool is_gather_store_insn(DisasContext *ctx)
@@ -724,67 +716,31 @@ static void gen_commit_hvx(DisasContext *ctx)
/*
* for (i = 0; i < ctx->vreg_log_idx; i++) {
* int rnum = ctx->vreg_log[i];
- * if (ctx->vreg_is_predicated[i]) {
- * if (env->VRegs_updated & (1 << rnum)) {
- * env->VRegs[rnum] = env->future_VRegs[rnum];
- * }
- * } else {
- * env->VRegs[rnum] = env->future_VRegs[rnum];
- * }
+ * env->VRegs[rnum] = env->future_VRegs[rnum];
* }
*/
for (i = 0; i < ctx->vreg_log_idx; i++) {
int rnum = ctx->vreg_log[i];
- bool is_predicated = ctx->vreg_is_predicated[i];
intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
size_t size = sizeof(MMVector);
- if (is_predicated) {
- TCGv cmp = tcg_temp_new();
- TCGLabel *label_skip = gen_new_label();
-
- tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum);
- tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
- tcg_temp_free(cmp);
- tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
- gen_set_label(label_skip);
- } else {
- tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
- }
+ tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
}
/*
* for (i = 0; i < ctx->qreg_log_idx; i++) {
* int rnum = ctx->qreg_log[i];
- * if (ctx->qreg_is_predicated[i]) {
- * if (env->QRegs_updated) & (1 << rnum)) {
- * env->QRegs[rnum] = env->future_QRegs[rnum];
- * }
- * } else {
- * env->QRegs[rnum] = env->future_QRegs[rnum];
- * }
+ * env->QRegs[rnum] = env->future_QRegs[rnum];
* }
*/
for (i = 0; i < ctx->qreg_log_idx; i++) {
int rnum = ctx->qreg_log[i];
- bool is_predicated = ctx->qreg_is_predicated[i];
intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
size_t size = sizeof(MMQReg);
- if (is_predicated) {
- TCGv cmp = tcg_temp_new();
- TCGLabel *label_skip = gen_new_label();
-
- tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum);
- tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
- tcg_temp_free(cmp);
- tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
- gen_set_label(label_skip);
- } else {
- tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
- }
+ tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
}
if (pkt_has_hvx_store(ctx->pkt)) {
@@ -1117,10 +1073,6 @@ void hexagon_translate_init(void)
offsetof(CPUHexagonState, llsc_val), "llsc_val");
hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
- hex_VRegs_updated = tcg_global_mem_new(cpu_env,
- offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated");
- hex_QRegs_updated = tcg_global_mem_new(cpu_env,
- offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated");
for (i = 0; i < STORES_MAX; i++) {
snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
hex_store_addr[i] = tcg_global_mem_new(cpu_env,
@@ -137,31 +137,25 @@ For HVX vectors, the generator behaves slightly differently. The wide vectors
won't fit in a TCGv or TCGv_i64, so we pass TCGv_ptr variables to pass the
address to helper functions. Here's an example for an HVX vector-add-word
istruction.
- static void generate_V6_vaddw(
- CPUHexagonState *env,
- DisasContext *ctx,
- Insn *insn,
- Packet *pkt)
+ static void generate_V6_vaddw(DisasContext *ctx)
{
+ Insn *insn __attribute__((unused)) = ctx->insn;
const int VdN = insn->regno[0];
const intptr_t VdV_off =
ctx_future_vreg_off(ctx, VdN, 1, true);
- TCGv_ptr VdV = tcg_temp_local_new_ptr();
+ TCGv_ptr VdV = tcg_temp_new_ptr();
tcg_gen_addi_ptr(VdV, cpu_env, VdV_off);
const int VuN = insn->regno[1];
const intptr_t VuV_off =
vreg_src_off(ctx, VuN);
- TCGv_ptr VuV = tcg_temp_local_new_ptr();
+ TCGv_ptr VuV = tcg_temp_new_ptr();
const int VvN = insn->regno[2];
const intptr_t VvV_off =
vreg_src_off(ctx, VvN);
- TCGv_ptr VvV = tcg_temp_local_new_ptr();
+ TCGv_ptr VvV = tcg_temp_new_ptr();
tcg_gen_addi_ptr(VuV, cpu_env, VuV_off);
tcg_gen_addi_ptr(VvV, cpu_env, VvV_off);
- TCGv slot = tcg_constant_tl(insn->slot);
- gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV, slot);
- tcg_temp_free(slot);
- gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false);
+ gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV);
tcg_temp_free_ptr(VdV);
tcg_temp_free_ptr(VuV);
tcg_temp_free_ptr(VvV);
@@ -177,12 +171,9 @@ functions from tcg-op-gvec.h. Here's the override for this instruction.
Finally, we notice that the override doesn't use the TCGv_ptr variables, so
we don't generate them when an override is present. Here is what we generate
when the override is present.
- static void generate_V6_vaddw(
- CPUHexagonState *env,
- DisasContext *ctx,
- Insn *insn,
- Packet *pkt)
+ static void generate_V6_vaddw(DisasContext *ctx)
{
+ Insn *insn __attribute__((unused)) = ctx->insn;
const int VdN = insn->regno[0];
const intptr_t VdV_off =
ctx_future_vreg_off(ctx, VdN, 1, true);
@@ -193,7 +184,6 @@ when the override is present.
const intptr_t VvV_off =
vreg_src_off(ctx, VvN);
fGEN_TCG_V6_vaddw({ fHIDE(int i;) fVFOREACH(32, i) { VdV.w[i] = VuV.w[i] + VvV.w[i] ; } });
- gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false);
}
We also generate an analyze_<tag> function for each instruction. Currently,
@@ -286,10 +276,8 @@ For Hexagon Vector eXtensions (HVX), the following fields are used
VRegs Vector registers
future_VRegs Registers to be stored during packet commit
tmp_VRegs Temporary registers *not* stored during commit
- VRegs_updated Mask of predicated vector writes
QRegs Q (vector predicate) registers
future_QRegs Registers to be stored during packet commit
- QRegs_updated Mask of predicated vector writes
*** Debugging ***
@@ -110,8 +110,7 @@ def analyze_opn_old(f, tag, regtype, regid, regno):
if (regid in {"d", "e", "x"}):
f.write(" const int %s = insn->regno[%d];\n" % \
(regN, regno))
- f.write(" ctx_log_qreg_write(ctx, %s, %s);\n" % \
- (regN, predicated))
+ f.write(" ctx_log_qreg_write(ctx, %s);\n" % (regN))
elif (regid in {"s", "t", "u", "v"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
@@ -472,36 +472,18 @@ def genptr_dst_write(f, tag, regtype, regid):
def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"):
if (regtype == "V"):
- if (regid in {"dd", "xx", "yy"}):
- if ('A_CONDEXEC' in hex_common.attribdict[tag]):
- is_predicated = "true"
- else:
- is_predicated = "false"
+ if (regid in {"xx"}):
f.write(" gen_log_vreg_write_pair(ctx, %s%sV_off, %s%sN, " % \
(regtype, regid, regtype, regid))
- f.write("%s, insn->slot, %s);\n" % \
- (newv, is_predicated))
- elif (regid in {"d", "x", "y"}):
- if ('A_CONDEXEC' in hex_common.attribdict[tag]):
- is_predicated = "true"
- else:
- is_predicated = "false"
- f.write(" gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s, " % \
+ f.write("%s);\n" % \
+ (newv))
+ elif (regid in {"y"}):
+ f.write(" gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s);\n" % \
(regtype, regid, regtype, regid, newv))
- f.write("insn->slot, %s);\n" % \
- (is_predicated))
- else:
+ elif (regid not in {"dd", "d", "x"}):
print("Bad register parse: ", regtype, regid)
elif (regtype == "Q"):
- if (regid in {"d", "e", "x"}):
- if ('A_CONDEXEC' in hex_common.attribdict[tag]):
- is_predicated = "true"
- else:
- is_predicated = "false"
- f.write(" gen_log_qreg_write(%s%sV_off, %s%sN, %s, " % \
- (regtype, regid, regtype, regid, newv))
- f.write("insn->slot, %s);\n" % (is_predicated))
- else:
+ if (regid not in {"d", "e", "x"}):
print("Bad register parse: ", regtype, regid)
else:
print("Bad register parse: ", regtype, regid)
The following improvements are made for predicated HVX instructions During gen_commit_hvx, unconditionally move the "new" value into the dest Don't set slot_cancelled Remove runtime bookkeeping of which registers were updated Reduce the cases where gen_log_vreg_write[_pair] is called It's only needed for special operands VxxV and VyV Remove gen_log_qreg_write Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> --- target/hexagon/cpu.h | 3 -- target/hexagon/gen_tcg_hvx.h | 17 +------- target/hexagon/translate.h | 16 +++----- target/hexagon/genptr.c | 51 ++---------------------- target/hexagon/translate.c | 60 +++-------------------------- target/hexagon/README | 28 ++++---------- target/hexagon/gen_analyze_funcs.py | 3 +- target/hexagon/gen_tcg_funcs.py | 32 ++++----------- 8 files changed, 33 insertions(+), 177 deletions(-)