Message ID | 20220920080746.26791-3-tsimpson@quicinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Hexagon (target/hexagon) improve store handling | expand |
On 9/20/22 01:07, Taylor Simpson wrote: > The store width is needed for packet commit, so it is stored in > ctx->store_width. Currently, it is set when a store has a TCG > override instead of a QEMU helper. In the QEMU helper case, the > ctx->store_width is not set, we invoke a helper during packet commit > that uses the runtime store width. > > This patch ensures ctx->store_width is set for all store instructions, > so performance is improved because packet commit can generate the proper > TCG store rather than the generic helper. > > We do this by > - Use the attributes from the instructions during translation to > set ctx->store_width > - Remove setting of ctx->store_width from genptr.c > > Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> > --- > target/hexagon/macros.h | 8 ++++---- > target/hexagon/genptr.c | 36 ++++++++++++------------------------ > target/hexagon/translate.c | 26 ++++++++++++++++++++++++++ > 3 files changed, 42 insertions(+), 28 deletions(-) > > diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h > index 92eb8bbf05..c8805bdaeb 100644 > --- a/target/hexagon/macros.h > +++ b/target/hexagon/macros.h > @@ -156,7 +156,7 @@ > __builtin_choose_expr(TYPE_TCGV(X), \ > gen_store1, (void)0)) > #define MEM_STORE1(VA, DATA, SLOT) \ > - MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) > + MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, SLOT) > > #define MEM_STORE2_FUNC(X) \ > __builtin_choose_expr(TYPE_INT(X), \ > @@ -164,7 +164,7 @@ > __builtin_choose_expr(TYPE_TCGV(X), \ > gen_store2, (void)0)) > #define MEM_STORE2(VA, DATA, SLOT) \ > - MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) > + MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, SLOT) > > #define MEM_STORE4_FUNC(X) \ > __builtin_choose_expr(TYPE_INT(X), \ > @@ -172,7 +172,7 @@ > __builtin_choose_expr(TYPE_TCGV(X), \ > gen_store4, (void)0)) > #define MEM_STORE4(VA, DATA, SLOT) \ > - MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) > + MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, SLOT) > > #define MEM_STORE8_FUNC(X) \ > __builtin_choose_expr(TYPE_INT(X), \ > @@ -180,7 +180,7 @@ > __builtin_choose_expr(TYPE_TCGV_I64(X), \ > gen_store8, (void)0)) > #define MEM_STORE8(VA, DATA, SLOT) \ > - MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) > + MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT) > #else > #define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA)) > #define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA)) > diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c > index 8a334ba07b..806d0974ff 100644 > --- a/target/hexagon/genptr.c > +++ b/target/hexagon/genptr.c > @@ -401,62 +401,50 @@ static inline void gen_store32(TCGv vaddr, TCGv src, int width, int slot) > tcg_gen_mov_tl(hex_store_val32[slot], src); > } > > -static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, > - DisasContext *ctx, int slot) > +static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot) > { > gen_store32(vaddr, src, 1, slot); > - ctx->store_width[slot] = 1; > } > > -static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src, > - DisasContext *ctx, int slot) > +static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot) > { > TCGv tmp = tcg_constant_tl(src); > - gen_store1(cpu_env, vaddr, tmp, ctx, slot); > + gen_store1(cpu_env, vaddr, tmp, slot); > } > > -static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src, > - DisasContext *ctx, int slot) > +static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot) > { > gen_store32(vaddr, src, 2, slot); > - ctx->store_width[slot] = 2; > } > > -static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src, > - DisasContext *ctx, int slot) > +static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot) > { > TCGv tmp = tcg_constant_tl(src); > - gen_store2(cpu_env, vaddr, tmp, ctx, slot); > + gen_store2(cpu_env, vaddr, tmp, slot); > } > > -static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src, > - DisasContext *ctx, int slot) > +static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot) > { > gen_store32(vaddr, src, 4, slot); > - ctx->store_width[slot] = 4; > } > > -static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, > - DisasContext *ctx, int slot) > +static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot) > { > TCGv tmp = tcg_constant_tl(src); > - gen_store4(cpu_env, vaddr, tmp, ctx, slot); > + gen_store4(cpu_env, vaddr, tmp, slot); > } > > -static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src, > - DisasContext *ctx, int slot) > +static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src, int slot) > { > tcg_gen_mov_tl(hex_store_addr[slot], vaddr); > tcg_gen_movi_tl(hex_store_width[slot], 8); > tcg_gen_mov_i64(hex_store_val64[slot], src); > - ctx->store_width[slot] = 8; > } > > -static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, > - DisasContext *ctx, int slot) > +static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, int slot) > { > TCGv_i64 tmp = tcg_constant_i64(src); > - gen_store8(cpu_env, vaddr, tmp, ctx, slot); > + gen_store8(cpu_env, vaddr, tmp, slot); > } > > static TCGv gen_8bitsof(TCGv result, TCGv value) > diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c > index 0e8a0772f7..bc02870b9f 100644 > --- a/target/hexagon/translate.c > +++ b/target/hexagon/translate.c > @@ -327,6 +327,31 @@ static void mark_implicit_pred_writes(DisasContext *ctx, Insn *insn) > mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P3, 3); > } > > +static void mark_store_width(DisasContext *ctx, Insn *insn) > +{ > + uint16_t opcode = insn->opcode; > + uint32_t slot = insn->slot; > + > + if (GET_ATTRIB(opcode, A_STORE)) { > + if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { > + ctx->store_width[slot] = 1; > + return; > + } > + if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) { > + ctx->store_width[slot] = 2; > + return; > + } > + if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) { > + ctx->store_width[slot] = 4; > + return; > + } > + if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) { > + ctx->store_width[slot] = 8; > + return; > + } Hmm. Perhaps int size = 0; if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { size |= 1; } ... tcg_debug_assert(is_power_of_2(size)); ctx->store_width[slot] = size; just to make sure you get exactly one of the above cases. Otherwise, LGTM, Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 92eb8bbf05..c8805bdaeb 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -156,7 +156,7 @@ __builtin_choose_expr(TYPE_TCGV(X), \ gen_store1, (void)0)) #define MEM_STORE1(VA, DATA, SLOT) \ - MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) + MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, SLOT) #define MEM_STORE2_FUNC(X) \ __builtin_choose_expr(TYPE_INT(X), \ @@ -164,7 +164,7 @@ __builtin_choose_expr(TYPE_TCGV(X), \ gen_store2, (void)0)) #define MEM_STORE2(VA, DATA, SLOT) \ - MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) + MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, SLOT) #define MEM_STORE4_FUNC(X) \ __builtin_choose_expr(TYPE_INT(X), \ @@ -172,7 +172,7 @@ __builtin_choose_expr(TYPE_TCGV(X), \ gen_store4, (void)0)) #define MEM_STORE4(VA, DATA, SLOT) \ - MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) + MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, SLOT) #define MEM_STORE8_FUNC(X) \ __builtin_choose_expr(TYPE_INT(X), \ @@ -180,7 +180,7 @@ __builtin_choose_expr(TYPE_TCGV_I64(X), \ gen_store8, (void)0)) #define MEM_STORE8(VA, DATA, SLOT) \ - MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) + MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT) #else #define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA)) #define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA)) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 8a334ba07b..806d0974ff 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -401,62 +401,50 @@ static inline void gen_store32(TCGv vaddr, TCGv src, int width, int slot) tcg_gen_mov_tl(hex_store_val32[slot], src); } -static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, - DisasContext *ctx, int slot) +static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot) { gen_store32(vaddr, src, 1, slot); - ctx->store_width[slot] = 1; } -static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src, - DisasContext *ctx, int slot) +static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot) { TCGv tmp = tcg_constant_tl(src); - gen_store1(cpu_env, vaddr, tmp, ctx, slot); + gen_store1(cpu_env, vaddr, tmp, slot); } -static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src, - DisasContext *ctx, int slot) +static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot) { gen_store32(vaddr, src, 2, slot); - ctx->store_width[slot] = 2; } -static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src, - DisasContext *ctx, int slot) +static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot) { TCGv tmp = tcg_constant_tl(src); - gen_store2(cpu_env, vaddr, tmp, ctx, slot); + gen_store2(cpu_env, vaddr, tmp, slot); } -static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src, - DisasContext *ctx, int slot) +static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot) { gen_store32(vaddr, src, 4, slot); - ctx->store_width[slot] = 4; } -static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, - DisasContext *ctx, int slot) +static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot) { TCGv tmp = tcg_constant_tl(src); - gen_store4(cpu_env, vaddr, tmp, ctx, slot); + gen_store4(cpu_env, vaddr, tmp, slot); } -static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src, - DisasContext *ctx, int slot) +static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src, int slot) { tcg_gen_mov_tl(hex_store_addr[slot], vaddr); tcg_gen_movi_tl(hex_store_width[slot], 8); tcg_gen_mov_i64(hex_store_val64[slot], src); - ctx->store_width[slot] = 8; } -static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, - DisasContext *ctx, int slot) +static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, int slot) { TCGv_i64 tmp = tcg_constant_i64(src); - gen_store8(cpu_env, vaddr, tmp, ctx, slot); + gen_store8(cpu_env, vaddr, tmp, slot); } static TCGv gen_8bitsof(TCGv result, TCGv value) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 0e8a0772f7..bc02870b9f 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -327,6 +327,31 @@ static void mark_implicit_pred_writes(DisasContext *ctx, Insn *insn) mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P3, 3); } +static void mark_store_width(DisasContext *ctx, Insn *insn) +{ + uint16_t opcode = insn->opcode; + uint32_t slot = insn->slot; + + if (GET_ATTRIB(opcode, A_STORE)) { + if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { + ctx->store_width[slot] = 1; + return; + } + if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) { + ctx->store_width[slot] = 2; + return; + } + if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) { + ctx->store_width[slot] = 4; + return; + } + if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) { + ctx->store_width[slot] = 8; + return; + } + } +} + static void gen_insn(CPUHexagonState *env, DisasContext *ctx, Insn *insn, Packet *pkt) { @@ -334,6 +359,7 @@ static void gen_insn(CPUHexagonState *env, DisasContext *ctx, mark_implicit_reg_writes(ctx, insn); insn->generate(env, ctx, insn, pkt); mark_implicit_pred_writes(ctx, insn); + mark_store_width(ctx, insn); } else { gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); }
The store width is needed for packet commit, so it is stored in ctx->store_width. Currently, it is set when a store has a TCG override instead of a QEMU helper. In the QEMU helper case, the ctx->store_width is not set, we invoke a helper during packet commit that uses the runtime store width. This patch ensures ctx->store_width is set for all store instructions, so performance is improved because packet commit can generate the proper TCG store rather than the generic helper. We do this by - Use the attributes from the instructions during translation to set ctx->store_width - Remove setting of ctx->store_width from genptr.c Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> --- target/hexagon/macros.h | 8 ++++---- target/hexagon/genptr.c | 36 ++++++++++++------------------------ target/hexagon/translate.c | 26 ++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 28 deletions(-)