Message ID | 20210413211129.457272-6-luis.pires@eldorado.org.br (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Base for adding PowerPC 64-bit instructions | expand |
Hi Luis, On 4/13/21 11:11 PM, Luis Pires wrote: > This implements the Power ISA 3.1 prefixed (64-bit) paddi > instruction, while also replacing the legacy addi implementation. > Both using the decode tree. > > Signed-off-by: Luis Pires <luis.pires@eldorado.org.br> > Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br> > --- > target/ppc/ppc.decode | 8 +++++++ > target/ppc/translate.c | 15 +------------ > target/ppc/translate/fixedpoint-impl.c.inc | 26 ++++++++++++++++++++++ > 3 files changed, 35 insertions(+), 14 deletions(-) > create mode 100644 target/ppc/translate/fixedpoint-impl.c.inc > > diff --git a/target/ppc/ppc.decode b/target/ppc/ppc.decode > index 84bb73ac19..c87174dc20 100644 > --- a/target/ppc/ppc.decode > +++ b/target/ppc/ppc.decode > @@ -16,3 +16,11 @@ > # You should have received a copy of the GNU Lesser General Public > # License along with this library; if not, see <http://www.gnu.org/licenses/>. > # > + > +%p_D8_SI 32:s18 0:16 > + > +# Fixed-Point Facility Instructions > +&addi r rt ra si > + > +paddi 000001 10 0 -- r:1 -- .................. 001110 rt:5 ra:5 ................ si=%p_D8_SI &addi IIUC you should be able to do something like catch ra=0 first ...: { addi_movi 000001 10 0 -- r:1 -- .................. 001110 rt:5 ..... ................ si=%p_D8_SI &addi ra=0 addi 000001 10 0 -- r:1 -- .................. 001110 rt:5 ra:5 ................ si=%p_D8_SI &addi } > +addi 001110 rt:5 ra:5 si:s16 &addi r=0 > diff --git a/target/ppc/translate.c b/target/ppc/translate.c > index 0f123f7b80..2ff192c9e5 100644 > --- a/target/ppc/translate.c > +++ b/target/ppc/translate.c > @@ -945,19 +945,6 @@ GEN_INT_ARITH_ADD(addex, 0x05, cpu_ov, 1, 1, 0); > /* addze addze. addzeo addzeo.*/ > GEN_INT_ARITH_ADD_CONST(addze, 0x06, 0, cpu_ca, 1, 1, 0) > GEN_INT_ARITH_ADD_CONST(addzeo, 0x16, 0, cpu_ca, 1, 1, 1) > -/* addi */ > -static void gen_addi(DisasContext *ctx) > -{ > - target_long simm = SIMM(ctx->opcode); > - > - if (rA(ctx->opcode) == 0) { > - /* li case */ > - tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], simm); > - } else { > - tcg_gen_addi_tl(cpu_gpr[rD(ctx->opcode)], > - cpu_gpr[rA(ctx->opcode)], simm); > - } > -} > /* addic addic.*/ > static inline void gen_op_addic(DisasContext *ctx, bool compute_rc0) > { > @@ -6967,6 +6954,7 @@ static target_ulong ppc_peek_next_insn_size(DisasContext *ctx) > } > > #include "decode-ppc.c.inc" > +#include "translate/fixedpoint-impl.c.inc" > > #include "translate/fp-impl.c.inc" > > @@ -7091,7 +7079,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300), > GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205), > GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300), > GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL), > -GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), > GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), > GEN_HANDLER2(addic_, "addic.", 0x0D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), > GEN_HANDLER(addis, 0x0F, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), > diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc > new file mode 100644 > index 0000000000..8620954b57 > --- /dev/null > +++ b/target/ppc/translate/fixedpoint-impl.c.inc > @@ -0,0 +1,26 @@ > +static bool trans_paddi(DisasContext *ctx, arg_paddi *a) (Nitpick, use the format: arg_addi, not arg_paddi). > +{ > + if (a->r == 0) { > + if (a->ra == 0) { > + /* li case */ > + tcg_gen_movi_tl(cpu_gpr[a->rt], a->si); > + } else { > + tcg_gen_addi_tl(cpu_gpr[a->rt], > + cpu_gpr[a->ra], a->si); > + } > + } else { > + if (a->ra == 0) { > + tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_nip, a->si); > + } else { > + /* invalid form */ > + gen_invalid(ctx); > + } > + } > + > + return true; > +} > + > +static bool trans_addi(DisasContext *ctx, arg_addi *a) > +{ > + return trans_paddi(ctx, a); > +} ... which then simplifies the trans_OPCODE() logic: static bool trans_addi_movi(DisasContext *ctx, arg_addi *a) { if (a->r == 0) { /* li case */ tcg_gen_movi_tl(cpu_gpr[a->rt], a->si); } else { tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_nip, a->si); } return true; } static bool trans_addi(DisasContext *ctx, arg_addi *a) { if (a->r == 0) { tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], a->si); } else { /* invalid form */ gen_invalid(ctx); } return true; } Maybe you can do the same with the r bit to remove the dual addi_movi. Regards, Phil.
Hi Phil, > > + > > +%p_D8_SI 32:s18 0:16 > > + > > +# Fixed-Point Facility Instructions > > +&addi r rt ra si > > + > > +paddi 000001 10 0 -- r:1 -- .................. 001110 rt:5 ra:5 ................ > si=%p_D8_SI &addi > > IIUC you should be able to do something like catch ra=0 first ...: > > { > addi_movi 000001 10 0 -- r:1 -- .................. 001110 rt:5 ..... > ................ si=%p_D8_SI &addi ra=0 > addi 000001 10 0 -- r:1 -- .................. 001110 rt:5 ra:5 > ................ si=%p_D8_SI &addi > } > > +++ b/target/ppc/translate/fixedpoint-impl.c.inc > > @@ -0,0 +1,26 @@ > > +static bool trans_paddi(DisasContext *ctx, arg_paddi *a) > > (Nitpick, use the format: arg_addi, not arg_paddi). Sure, will do! I was using the exact function signature generated by decodetree, but using 'arg_addi' makes more sense, as it will make it clearer that it's the same struct. > > > +{ > > + if (a->r == 0) { > > + if (a->ra == 0) { > > + /* li case */ > > + tcg_gen_movi_tl(cpu_gpr[a->rt], a->si); > > + } else { > > + tcg_gen_addi_tl(cpu_gpr[a->rt], > > + cpu_gpr[a->ra], a->si); > > + } > > + } else { > > + if (a->ra == 0) { > > + tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_nip, a->si); > > + } else { > > + /* invalid form */ > > + gen_invalid(ctx); > > + } > > + } > > + > > + return true; > > +} > > + > > +static bool trans_addi(DisasContext *ctx, arg_addi *a) { > > + return trans_paddi(ctx, a); > > +} > > ... which then simplifies the trans_OPCODE() logic: > > static bool trans_addi_movi(DisasContext *ctx, arg_addi *a) { > if (a->r == 0) { > /* li case */ > tcg_gen_movi_tl(cpu_gpr[a->rt], a->si); > } else { > tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_nip, a->si); > } > > return true; > } > > static bool trans_addi(DisasContext *ctx, arg_addi *a) { > if (a->r == 0) { > tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], a->si); > } else { > /* invalid form */ > gen_invalid(ctx); > } > > return true; > } > > Maybe you can do the same with the r bit to remove the dual addi_movi. Right, that can be done. On the other hand, keeping this logic inside trans_paddi and not in the .decode file makes it simpler (and less error-prone) to check that the implementation matches the official ISA documentation, when opening both side by side. This is because the ISA specifies the instruction format for paddi as a whole (which can be easily matched to what would be in the .decode file) and, after that, the ISA specifies how each case should be handled (which could then be checked by just looking at the trans_paddi implementation, which would be in a single function). Since the trans_paddi implementation with the ra/r handling is not that complex either, I would recommend keeping the clearer separation between the instruction formats (in the .decode file) and the handling of each case in the trans_OPCODE() logic. What do you think? Thanks, Luis
On 4/13/21 2:11 PM, Luis Pires wrote: > +++ b/target/ppc/translate/fixedpoint-impl.c.inc > @@ -0,0 +1,26 @@ Missing copyright+license header. > +static bool trans_paddi(DisasContext *ctx, arg_paddi *a) > +{ > + if (a->r == 0) { > + if (a->ra == 0) { > + /* li case */ > + tcg_gen_movi_tl(cpu_gpr[a->rt], a->si); > + } else { > + tcg_gen_addi_tl(cpu_gpr[a->rt], > + cpu_gpr[a->ra], a->si); > + } > + } else { > + if (a->ra == 0) { > + tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_nip, a->si); > + } else { > + /* invalid form */ > + gen_invalid(ctx); > + } > + } > + > + return true; > +} > + > +static bool trans_addi(DisasContext *ctx, arg_addi *a) > +{ > + return trans_paddi(ctx, a); > +} Just a note about decodetree: this kind of thing is where you would use the same name for both patterns, so that you would not need to have a separate symbol for addi (or vice versa). That said, I'm now having a bit of a read-up on power10, and I have some suggestions. First, type 2 and type 3 prefixes modify existing instructions. Which means that you are going to wind up with a lot of duplication. Preferentially, we should avoid that. One example of how to approach this is target/microblaze, which has an "imm" instruction prefix to extend a 16-bit immediate to a 32-bit immediate. This can be worked into decodetree directly: # Include any IMM prefix in the value reported. %extimm 0:s16 !function=typeb_imm @typeb ...... rd:5 ra:5 ................ \ &typeb imm=%extimm static int typeb_imm(DisasContext *dc, int x) { if (dc->tb_flags & IMM_FLAG) { return deposit32(dc->ext_imm, 0, 16, x); } return x; } static bool trans_imm(DisasContext *dc, arg_imm *arg) { if (invalid_delay_slot(dc, "imm")) { return true; } dc->ext_imm = arg->imm << 16; tcg_gen_movi_i32(cpu_imm, dc->ext_imm); dc->tb_flags_to_set = IMM_FLAG; return true; } We decode "imm" as a separate instruction, set some bits in DisasContext, and then use those bits while decoding the next instruction. I think the exact mechanism that microblaze uses is going to be too simplistic for Power10, but the basic idea of modifying the "normal" instructions is still sound, I think. Using addi+paddi as an example, what about # All ppc formats have names -- use them. %MLS r ie prefix_MLS 000001 10 -- r:1 -- ie:s18 &MLS # TODO: decodetree extension -- allow :type after name. # The SI field needs to be 64-bit for MLS:D-form. %D rt ra si:int64_t @D ...... rt:5 ra:5 si:s16 ADDI 001110 ..... ..... ................ @D static bool trans_prefix_MLS(DisasContext *ctx, arg_MLS *a) { if (cpu does not support prefixes || ctx->prefix_type != PREFIX_NONE) { return false; } /* Record the prefix for the next instruction. */ ctx->prefix_type = PREFIX_MLS; ctx->prefix_data.mls = *a; return true; } static bool allow_prefix_MLS(DisasContext *ctx, arg_D *a) { int64_t imm; /* Require MLS prefix or no prefix. */ if (ctx->prefix_type != PREFIX_MLS) { if (ctx->prefix_type == PREFIX_NONE) { return true; } gen_invalid(ctx); return false; } /* * Concatenate the two immediate fields. * Note that IE is sign-extended 18 bits, * so this forms a signed 34-bit constant. */ imm = deposit64(a->si, 16, 48, ctx->prefix_data.mls.ie); /* * If R, then the constant is pc-relative, * and RA must be 0. */ if (ctx->prefix_data.mls.r) { if (ctx->prefix_data.mls.ra != 0) { gen_invalid(ctx); return false; } imm += ctx->cia; } a->si = imm; return true; } static bool trans_ADDI(DisasContext *ctx, arg_D *a) { if (!allow_prefix_MLS(ctx, a)) { return true; } if (a->ra) { tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], a->si); } else { tcg_gen_movi_tl(cpu_gpr[a->rt], a->si); } return true; } This approach seems like it will work fine for MLS and MMIR prefixes. For 8LS, 8RR, and MRR prefixes, we'll need some extra help within ppc_tr_translate_insn. E.g. insn = translator_ldl_swap(env, ctx->base.pc_next, need_byteswap(ctx)); switch (ctx->prefix_type) { case PREFIX_NONE: ok = decode_opcode_space_0(ctx, insn) || decode_legacy(ctx, insn); break; case PREFIX_MLS: case PREFIX_MMIRR: ok = decode_opcode_space_0(ctx, insn); break; case PREFIX_8LS: case PREFIX_8RR: ok = decode_opcode_space_1(ctx, insn); break; case PREFIX_MRR: /* * The only instruction with this prefix is PNOP. * TODO: diagnose the set of patterns that are illegal: * branches, rfebb, sync other than isync, or a * service processor attention. * The Engineering Note allows us to either diagnose * these as illegal, or treat them all as no-op. */ ok = true; break; default: g_assert_not_reached(); } if (!ok) { gen_invalid(ctx); } r~
On 4/14/21 12:11 PM, Richard Henderson wrote: > static bool > allow_prefix_MLS(DisasContext *ctx, arg_D *a) > { > int64_t imm; > > /* Require MLS prefix or no prefix. */ > if (ctx->prefix_type != PREFIX_MLS) { > if (ctx->prefix_type == PREFIX_NONE) { > return true; > } > gen_invalid(ctx); > return false; > } Combined with the switch on prefix_type in translate_insn, I think this can just simplify to if (ctx->prefix_type != PREFIX_MLS) { return ctx->prefix_type == PREFIX_NONE; } because decode_legacy is only called from within PREFIX_NONE. r~
On 4/14/21 12:11 PM, Richard Henderson wrote: > This approach seems like it will work fine for MLS and MMIR prefixes. For 8LS, > 8RR, and MRR prefixes, we'll need some extra help within ppc_tr_translate_insn. > E.g. > > insn = translator_ldl_swap(env, ctx->base.pc_next, > need_byteswap(ctx)); > switch (ctx->prefix_type) { > case PREFIX_NONE: > ok = decode_opcode_space_0(ctx, insn) || > decode_legacy(ctx, insn); > break; > case PREFIX_MLS: > case PREFIX_MMIRR: > ok = decode_opcode_space_0(ctx, insn); > break; I played about with this last night, and there's an interesting trade-off: (1) The thousands of 32-bit insns which do not allow prefixes now each require 3 lines to assert that no prefix is present, (2) There are only 12 MLS and 29 MMIRR prefixed insns. I think it may well be that eliminating boiler-plate from thousands of insns it a good trade-off to special-casing 41 insns. At which point, considering the multiple variations on 8RR and MMIRR prefixes, seems to indicate that we should decode all 64 bits all at once. r~
diff --git a/target/ppc/ppc.decode b/target/ppc/ppc.decode index 84bb73ac19..c87174dc20 100644 --- a/target/ppc/ppc.decode +++ b/target/ppc/ppc.decode @@ -16,3 +16,11 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, see <http://www.gnu.org/licenses/>. # + +%p_D8_SI 32:s18 0:16 + +# Fixed-Point Facility Instructions +&addi r rt ra si + +paddi 000001 10 0 -- r:1 -- .................. 001110 rt:5 ra:5 ................ si=%p_D8_SI &addi +addi 001110 rt:5 ra:5 si:s16 &addi r=0 diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 0f123f7b80..2ff192c9e5 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -945,19 +945,6 @@ GEN_INT_ARITH_ADD(addex, 0x05, cpu_ov, 1, 1, 0); /* addze addze. addzeo addzeo.*/ GEN_INT_ARITH_ADD_CONST(addze, 0x06, 0, cpu_ca, 1, 1, 0) GEN_INT_ARITH_ADD_CONST(addzeo, 0x16, 0, cpu_ca, 1, 1, 1) -/* addi */ -static void gen_addi(DisasContext *ctx) -{ - target_long simm = SIMM(ctx->opcode); - - if (rA(ctx->opcode) == 0) { - /* li case */ - tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], simm); - } else { - tcg_gen_addi_tl(cpu_gpr[rD(ctx->opcode)], - cpu_gpr[rA(ctx->opcode)], simm); - } -} /* addic addic.*/ static inline void gen_op_addic(DisasContext *ctx, bool compute_rc0) { @@ -6967,6 +6954,7 @@ static target_ulong ppc_peek_next_insn_size(DisasContext *ctx) } #include "decode-ppc.c.inc" +#include "translate/fixedpoint-impl.c.inc" #include "translate/fp-impl.c.inc" @@ -7091,7 +7079,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205), GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300), GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL), -GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER2(addic_, "addic.", 0x0D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER(addis, 0x0F, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc new file mode 100644 index 0000000000..8620954b57 --- /dev/null +++ b/target/ppc/translate/fixedpoint-impl.c.inc @@ -0,0 +1,26 @@ +static bool trans_paddi(DisasContext *ctx, arg_paddi *a) +{ + if (a->r == 0) { + if (a->ra == 0) { + /* li case */ + tcg_gen_movi_tl(cpu_gpr[a->rt], a->si); + } else { + tcg_gen_addi_tl(cpu_gpr[a->rt], + cpu_gpr[a->ra], a->si); + } + } else { + if (a->ra == 0) { + tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_nip, a->si); + } else { + /* invalid form */ + gen_invalid(ctx); + } + } + + return true; +} + +static bool trans_addi(DisasContext *ctx, arg_addi *a) +{ + return trans_paddi(ctx, a); +}