Message ID | 1468346602-20700-5-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Jul 12, 2016 at 11:33:20PM +0530, Nikunj A Dadhania wrote: > ISA 3.0 Compare Ranged Byte instruction useful for > isupper/islower/isaplha kind of operation. At least until you have locale-aware versions of those... > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > --- > target-ppc/translate.c | 40 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 40 insertions(+) > > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index 93c7c66..8de217f 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -817,6 +817,45 @@ static void gen_cmpli(DisasContext *ctx) > } > } > > +/* cmprb - range comparison: isupper, isaplha, islower*/ > +static void gen_cmprb(DisasContext *ctx) > +{ > + TCGLabel *lab1 = gen_new_label(); > + TCGLabel *lab2 = gen_new_label(); > + TCGv src1 = tcg_temp_local_new(); > + TCGv src2 = tcg_temp_local_new(); > + TCGv src2lo = tcg_temp_local_new(); > + TCGv src2hi = tcg_temp_local_new(); > + > + tcg_gen_andi_tl(src1, cpu_gpr[rA(ctx->opcode)], 0xFF); > + tcg_gen_andi_tl(src2, cpu_gpr[rB(ctx->opcode)], 0xFFFFFFFF); > + > + tcg_gen_andi_tl(src2lo, src2, 0xFF); > + tcg_gen_shri_tl(src2hi, src2, 8); > + tcg_gen_andi_tl(src2hi, src2hi, 0xFF); > + > + tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab1); > + tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab1); > + tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT); > + tcg_gen_br(lab2); > + gen_set_label(lab1); > + > + if (ctx->opcode & 0x00200000) { > + tcg_gen_shri_tl(src2hi, src2, 24); > + tcg_gen_andi_tl(src2hi, src2hi, 0xFF); > + tcg_gen_shri_tl(src2lo, src2, 16); > + tcg_gen_andi_tl(src2lo, src2lo, 0xFF); > + tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab2); > + tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab2); > + tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT); > + } > + gen_set_label(lab2); > + tcg_temp_free(src1); > + tcg_temp_free(src2); > + tcg_temp_free(src2lo); > + tcg_temp_free(src2hi); > +} > + > /* isel (PowerPC 2.03 specification) */ > static void gen_isel(DisasContext *ctx) > { > @@ -9898,6 +9937,7 @@ GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER), > GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER), > GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER), > GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205), > +GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300), > GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL), > GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), > GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
On 07/12/2016 11:33 PM, Nikunj A Dadhania wrote: > +/* cmprb - range comparison: isupper, isaplha, islower*/ > +static void gen_cmprb(DisasContext *ctx) > +{ > + TCGLabel *lab1 = gen_new_label(); > + TCGLabel *lab2 = gen_new_label(); > + TCGv src1 = tcg_temp_local_new(); > + TCGv src2 = tcg_temp_local_new(); > + TCGv src2lo = tcg_temp_local_new(); > + TCGv src2hi = tcg_temp_local_new(); > + > + tcg_gen_andi_tl(src1, cpu_gpr[rA(ctx->opcode)], 0xFF); > + tcg_gen_andi_tl(src2, cpu_gpr[rB(ctx->opcode)], 0xFFFFFFFF); There's no point in this mask, since it's covered by > + > + tcg_gen_andi_tl(src2lo, src2, 0xFF); > + tcg_gen_shri_tl(src2hi, src2, 8); > + tcg_gen_andi_tl(src2hi, src2hi, 0xFF); these ones. > + > + tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab1); > + tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab1); > + tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT); > + tcg_gen_br(lab2); > + gen_set_label(lab1); > + > + if (ctx->opcode & 0x00200000) { > + tcg_gen_shri_tl(src2hi, src2, 24); > + tcg_gen_andi_tl(src2hi, src2hi, 0xFF); > + tcg_gen_shri_tl(src2lo, src2, 16); > + tcg_gen_andi_tl(src2lo, src2lo, 0xFF); > + tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab2); > + tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab2); > + tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT); > + } > + gen_set_label(lab2); > + tcg_temp_free(src1); > + tcg_temp_free(src2); > + tcg_temp_free(src2lo); > + tcg_temp_free(src2hi); > +} You've forgotten to clear crf in the false case. This is better implemented without branches, like TCGv_i32 src1, src2, src2lo, src2hi; TCGv_i32 crf = cpu_crf[cdfD(ctx->opcode)]; // allocate all 4 "src" temps tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]); tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]); tcg_gen_ext8u_i32(src2lo, src2); tcg_gen_shri_i32(src2, src2, 8); tcg_gen_extu8_i32(src2hi, src2hi); tcg_gen_setcond_tl(TCG_COND_LEU, src2lo, src2lo, src1); tcg_gen_setcond_tl(TCG_COND_LEU, src2hi, src1, src2hi); tcg_gen_and_tl(crf, src2lo, src2hi); if (ctx->opcode & 0x00200000) { tcg_gen_shri_i32(src2, src2, 8); tcg_gen_ext8u_i32(src2lo, src2); tcg_gen_shri_i32(src2, src2, 8); tcg_gen_ext8u_i32(src2hi, src2); tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); tcg_gen_and_i32(src2lo, src2lo, src2hi); tcg_gen_or_i32(crf, crf, src2lo); } tcg_gen_shli_i32(crf, crf, CRF_GT); // free all 4 "src" temps r~
Richard Henderson <rth@twiddle.net> writes: > On 07/12/2016 11:33 PM, Nikunj A Dadhania wrote: >> +/* cmprb - range comparison: isupper, isaplha, islower*/ >> +static void gen_cmprb(DisasContext *ctx) >> +{ >> + TCGLabel *lab1 = gen_new_label(); >> + TCGLabel *lab2 = gen_new_label(); >> + TCGv src1 = tcg_temp_local_new(); >> + TCGv src2 = tcg_temp_local_new(); >> + TCGv src2lo = tcg_temp_local_new(); >> + TCGv src2hi = tcg_temp_local_new(); >> + >> + tcg_gen_andi_tl(src1, cpu_gpr[rA(ctx->opcode)], 0xFF); >> + tcg_gen_andi_tl(src2, cpu_gpr[rB(ctx->opcode)], 0xFFFFFFFF); > > There's no point in this mask, since it's covered by > >> + >> + tcg_gen_andi_tl(src2lo, src2, 0xFF); >> + tcg_gen_shri_tl(src2hi, src2, 8); >> + tcg_gen_andi_tl(src2hi, src2hi, 0xFF); > > these ones. Right. >> + >> + tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab1); >> + tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab1); >> + tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT); >> + tcg_gen_br(lab2); >> + gen_set_label(lab1); >> + >> + if (ctx->opcode & 0x00200000) { >> + tcg_gen_shri_tl(src2hi, src2, 24); >> + tcg_gen_andi_tl(src2hi, src2hi, 0xFF); >> + tcg_gen_shri_tl(src2lo, src2, 16); >> + tcg_gen_andi_tl(src2lo, src2lo, 0xFF); >> + tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab2); >> + tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab2); >> + tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT); >> + } >> + gen_set_label(lab2); >> + tcg_temp_free(src1); >> + tcg_temp_free(src2); >> + tcg_temp_free(src2lo); >> + tcg_temp_free(src2hi); >> +} > > You've forgotten to clear crf in the false case. Yes, next version has the fix. > This is better implemented without branches, like > > TCGv_i32 src1, src2, src2lo, src2hi; > TCGv_i32 crf = cpu_crf[cdfD(ctx->opcode)]; > > // allocate all 4 "src" temps > > tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]); > tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]); > > tcg_gen_ext8u_i32(src2lo, src2); > tcg_gen_shri_i32(src2, src2, 8); > tcg_gen_extu8_i32(src2hi, src2hi); > > tcg_gen_setcond_tl(TCG_COND_LEU, src2lo, src2lo, src1); > tcg_gen_setcond_tl(TCG_COND_LEU, src2hi, src1, src2hi); > tcg_gen_and_tl(crf, src2lo, src2hi); > > if (ctx->opcode & 0x00200000) { > tcg_gen_shri_i32(src2, src2, 8); > tcg_gen_ext8u_i32(src2lo, src2); > tcg_gen_shri_i32(src2, src2, 8); > tcg_gen_ext8u_i32(src2hi, src2); > tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); > tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); > tcg_gen_and_i32(src2lo, src2lo, src2hi); > tcg_gen_or_i32(crf, crf, src2lo); > } > > tcg_gen_shli_i32(crf, crf, CRF_GT); > > // free all 4 "src" temps Sure. Regards Nikunj
Richard Henderson <rth@twiddle.net> writes: > This is better implemented without branches, like > > TCGv_i32 src1, src2, src2lo, src2hi; > TCGv_i32 crf = cpu_crf[cdfD(ctx->opcode)]; > > // allocate all 4 "src" temps > > tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]); > tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]); > As the user input can pass something more than 0xFF, we need only the RA(56:63). Anton's fuzzer test hit this bug. :-) GPR06 0x0706050403021101 cmprb cr5,1,r6,r6 Should be a match (RA = 0x01, RB=0x03021101), but fails tcg_gen_andi_i32(src1, src1, 0xFF); Will send an fix patch, we can probably squash with the original one. > tcg_gen_ext8u_i32(src2lo, src2); > tcg_gen_shri_i32(src2, src2, 8); > tcg_gen_extu8_i32(src2hi, src2hi); > > tcg_gen_setcond_tl(TCG_COND_LEU, src2lo, src2lo, src1); > tcg_gen_setcond_tl(TCG_COND_LEU, src2hi, src1, src2hi); > tcg_gen_and_tl(crf, src2lo, src2hi); > > if (ctx->opcode & 0x00200000) { > tcg_gen_shri_i32(src2, src2, 8); > tcg_gen_ext8u_i32(src2lo, src2); > tcg_gen_shri_i32(src2, src2, 8); > tcg_gen_ext8u_i32(src2hi, src2); > tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); > tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); > tcg_gen_and_i32(src2lo, src2lo, src2hi); > tcg_gen_or_i32(crf, crf, src2lo); > } > > tcg_gen_shli_i32(crf, crf, CRF_GT); > > // free all 4 "src" temps Regards Nikunj
diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 93c7c66..8de217f 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -817,6 +817,45 @@ static void gen_cmpli(DisasContext *ctx) } } +/* cmprb - range comparison: isupper, isaplha, islower*/ +static void gen_cmprb(DisasContext *ctx) +{ + TCGLabel *lab1 = gen_new_label(); + TCGLabel *lab2 = gen_new_label(); + TCGv src1 = tcg_temp_local_new(); + TCGv src2 = tcg_temp_local_new(); + TCGv src2lo = tcg_temp_local_new(); + TCGv src2hi = tcg_temp_local_new(); + + tcg_gen_andi_tl(src1, cpu_gpr[rA(ctx->opcode)], 0xFF); + tcg_gen_andi_tl(src2, cpu_gpr[rB(ctx->opcode)], 0xFFFFFFFF); + + tcg_gen_andi_tl(src2lo, src2, 0xFF); + tcg_gen_shri_tl(src2hi, src2, 8); + tcg_gen_andi_tl(src2hi, src2hi, 0xFF); + + tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab1); + tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab1); + tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT); + tcg_gen_br(lab2); + gen_set_label(lab1); + + if (ctx->opcode & 0x00200000) { + tcg_gen_shri_tl(src2hi, src2, 24); + tcg_gen_andi_tl(src2hi, src2hi, 0xFF); + tcg_gen_shri_tl(src2lo, src2, 16); + tcg_gen_andi_tl(src2lo, src2lo, 0xFF); + tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab2); + tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab2); + tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT); + } + gen_set_label(lab2); + tcg_temp_free(src1); + tcg_temp_free(src2); + tcg_temp_free(src2lo); + tcg_temp_free(src2hi); +} + /* isel (PowerPC 2.03 specification) */ static void gen_isel(DisasContext *ctx) { @@ -9898,6 +9937,7 @@ GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER), GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER), GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER), GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205), +GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300), GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL), GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
ISA 3.0 Compare Ranged Byte instruction useful for isupper/islower/isaplha kind of operation. Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> --- target-ppc/translate.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+)