diff mbox

[RFC,4/6] target-ppc: add cmprb instruction

Message ID 1468346602-20700-5-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania July 12, 2016, 6:03 p.m. UTC
ISA 3.0 Compare Ranged Byte instruction useful for
isupper/islower/isaplha kind of operation.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

Comments

David Gibson July 18, 2016, 2 a.m. UTC | #1
On Tue, Jul 12, 2016 at 11:33:20PM +0530, Nikunj A Dadhania wrote:
> ISA 3.0 Compare Ranged Byte instruction useful for
> isupper/islower/isaplha kind of operation.

At least until you have locale-aware versions of those...

> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  target-ppc/translate.c | 40 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 40 insertions(+)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 93c7c66..8de217f 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -817,6 +817,45 @@ static void gen_cmpli(DisasContext *ctx)
>      }
>  }
>  
> +/* cmprb - range comparison: isupper, isaplha, islower*/
> +static void gen_cmprb(DisasContext *ctx)
> +{
> +    TCGLabel *lab1 = gen_new_label();
> +    TCGLabel *lab2 = gen_new_label();
> +    TCGv src1 = tcg_temp_local_new();
> +    TCGv src2 = tcg_temp_local_new();
> +    TCGv src2lo = tcg_temp_local_new();
> +    TCGv src2hi = tcg_temp_local_new();
> +
> +    tcg_gen_andi_tl(src1, cpu_gpr[rA(ctx->opcode)], 0xFF);
> +    tcg_gen_andi_tl(src2, cpu_gpr[rB(ctx->opcode)], 0xFFFFFFFF);
> +
> +    tcg_gen_andi_tl(src2lo, src2, 0xFF);
> +    tcg_gen_shri_tl(src2hi, src2, 8);
> +    tcg_gen_andi_tl(src2hi, src2hi, 0xFF);
> +
> +    tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab1);
> +    tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab1);
> +    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT);
> +    tcg_gen_br(lab2);
> +    gen_set_label(lab1);
> +
> +    if (ctx->opcode & 0x00200000) {
> +        tcg_gen_shri_tl(src2hi, src2, 24);
> +        tcg_gen_andi_tl(src2hi, src2hi, 0xFF);
> +        tcg_gen_shri_tl(src2lo, src2, 16);
> +        tcg_gen_andi_tl(src2lo, src2lo, 0xFF);
> +        tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab2);
> +        tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab2);
> +        tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT);
> +    }
> +    gen_set_label(lab2);
> +    tcg_temp_free(src1);
> +    tcg_temp_free(src2);
> +    tcg_temp_free(src2lo);
> +    tcg_temp_free(src2hi);
> +}
> +
>  /* isel (PowerPC 2.03 specification) */
>  static void gen_isel(DisasContext *ctx)
>  {
> @@ -9898,6 +9937,7 @@ GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
>  GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER),
>  GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
>  GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
> +GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
>  GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
>  GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
>  GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
Richard Henderson July 21, 2016, 6:17 a.m. UTC | #2
On 07/12/2016 11:33 PM, Nikunj A Dadhania wrote:
> +/* cmprb - range comparison: isupper, isaplha, islower*/
> +static void gen_cmprb(DisasContext *ctx)
> +{
> +    TCGLabel *lab1 = gen_new_label();
> +    TCGLabel *lab2 = gen_new_label();
> +    TCGv src1 = tcg_temp_local_new();
> +    TCGv src2 = tcg_temp_local_new();
> +    TCGv src2lo = tcg_temp_local_new();
> +    TCGv src2hi = tcg_temp_local_new();
> +
> +    tcg_gen_andi_tl(src1, cpu_gpr[rA(ctx->opcode)], 0xFF);
> +    tcg_gen_andi_tl(src2, cpu_gpr[rB(ctx->opcode)], 0xFFFFFFFF);

There's no point in this mask, since it's covered by

> +
> +    tcg_gen_andi_tl(src2lo, src2, 0xFF);
> +    tcg_gen_shri_tl(src2hi, src2, 8);
> +    tcg_gen_andi_tl(src2hi, src2hi, 0xFF);

these ones.

> +
> +    tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab1);
> +    tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab1);
> +    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT);
> +    tcg_gen_br(lab2);
> +    gen_set_label(lab1);
> +
> +    if (ctx->opcode & 0x00200000) {
> +        tcg_gen_shri_tl(src2hi, src2, 24);
> +        tcg_gen_andi_tl(src2hi, src2hi, 0xFF);
> +        tcg_gen_shri_tl(src2lo, src2, 16);
> +        tcg_gen_andi_tl(src2lo, src2lo, 0xFF);
> +        tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab2);
> +        tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab2);
> +        tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT);
> +    }
> +    gen_set_label(lab2);
> +    tcg_temp_free(src1);
> +    tcg_temp_free(src2);
> +    tcg_temp_free(src2lo);
> +    tcg_temp_free(src2hi);
> +}

You've forgotten to clear crf in the false case.

This is better implemented without branches, like

   TCGv_i32 src1, src2, src2lo, src2hi;
   TCGv_i32 crf = cpu_crf[cdfD(ctx->opcode)];

   // allocate all 4 "src" temps

   tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]);
   tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]);

   tcg_gen_ext8u_i32(src2lo, src2);
   tcg_gen_shri_i32(src2, src2, 8);
   tcg_gen_extu8_i32(src2hi, src2hi);

   tcg_gen_setcond_tl(TCG_COND_LEU, src2lo, src2lo, src1);
   tcg_gen_setcond_tl(TCG_COND_LEU, src2hi, src1, src2hi);
   tcg_gen_and_tl(crf, src2lo, src2hi);

   if (ctx->opcode & 0x00200000) {
     tcg_gen_shri_i32(src2, src2, 8);
     tcg_gen_ext8u_i32(src2lo, src2);
     tcg_gen_shri_i32(src2, src2, 8);
     tcg_gen_ext8u_i32(src2hi, src2);
     tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
     tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
     tcg_gen_and_i32(src2lo, src2lo, src2hi);
     tcg_gen_or_i32(crf, crf, src2lo);
   }

   tcg_gen_shli_i32(crf, crf, CRF_GT);

   // free all 4 "src" temps


r~
Nikunj A. Dadhania July 21, 2016, 8:08 a.m. UTC | #3
Richard Henderson <rth@twiddle.net> writes:

> On 07/12/2016 11:33 PM, Nikunj A Dadhania wrote:
>> +/* cmprb - range comparison: isupper, isaplha, islower*/
>> +static void gen_cmprb(DisasContext *ctx)
>> +{
>> +    TCGLabel *lab1 = gen_new_label();
>> +    TCGLabel *lab2 = gen_new_label();
>> +    TCGv src1 = tcg_temp_local_new();
>> +    TCGv src2 = tcg_temp_local_new();
>> +    TCGv src2lo = tcg_temp_local_new();
>> +    TCGv src2hi = tcg_temp_local_new();
>> +
>> +    tcg_gen_andi_tl(src1, cpu_gpr[rA(ctx->opcode)], 0xFF);
>> +    tcg_gen_andi_tl(src2, cpu_gpr[rB(ctx->opcode)], 0xFFFFFFFF);
>
> There's no point in this mask, since it's covered by
>
>> +
>> +    tcg_gen_andi_tl(src2lo, src2, 0xFF);
>> +    tcg_gen_shri_tl(src2hi, src2, 8);
>> +    tcg_gen_andi_tl(src2hi, src2hi, 0xFF);
>
> these ones.

Right.


>> +
>> +    tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab1);
>> +    tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab1);
>> +    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT);
>> +    tcg_gen_br(lab2);
>> +    gen_set_label(lab1);
>> +
>> +    if (ctx->opcode & 0x00200000) {
>> +        tcg_gen_shri_tl(src2hi, src2, 24);
>> +        tcg_gen_andi_tl(src2hi, src2hi, 0xFF);
>> +        tcg_gen_shri_tl(src2lo, src2, 16);
>> +        tcg_gen_andi_tl(src2lo, src2lo, 0xFF);
>> +        tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab2);
>> +        tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab2);
>> +        tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT);
>> +    }
>> +    gen_set_label(lab2);
>> +    tcg_temp_free(src1);
>> +    tcg_temp_free(src2);
>> +    tcg_temp_free(src2lo);
>> +    tcg_temp_free(src2hi);
>> +}
>
> You've forgotten to clear crf in the false case.

Yes, next version has the fix.

> This is better implemented without branches, like
>
>    TCGv_i32 src1, src2, src2lo, src2hi;
>    TCGv_i32 crf = cpu_crf[cdfD(ctx->opcode)];
>
>    // allocate all 4 "src" temps
>
>    tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]);
>    tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]);
>
>    tcg_gen_ext8u_i32(src2lo, src2);
>    tcg_gen_shri_i32(src2, src2, 8);
>    tcg_gen_extu8_i32(src2hi, src2hi);
>
>    tcg_gen_setcond_tl(TCG_COND_LEU, src2lo, src2lo, src1);
>    tcg_gen_setcond_tl(TCG_COND_LEU, src2hi, src1, src2hi);
>    tcg_gen_and_tl(crf, src2lo, src2hi);
>
>    if (ctx->opcode & 0x00200000) {
>      tcg_gen_shri_i32(src2, src2, 8);
>      tcg_gen_ext8u_i32(src2lo, src2);
>      tcg_gen_shri_i32(src2, src2, 8);
>      tcg_gen_ext8u_i32(src2hi, src2);
>      tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
>      tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
>      tcg_gen_and_i32(src2lo, src2lo, src2hi);
>      tcg_gen_or_i32(crf, crf, src2lo);
>    }
>
>    tcg_gen_shli_i32(crf, crf, CRF_GT);
>
>    // free all 4 "src" temps

Sure.

Regards
Nikunj
Nikunj A. Dadhania Aug. 2, 2016, 7:02 a.m. UTC | #4
Richard Henderson <rth@twiddle.net> writes:

> This is better implemented without branches, like
>
>    TCGv_i32 src1, src2, src2lo, src2hi;
>    TCGv_i32 crf = cpu_crf[cdfD(ctx->opcode)];
>
>    // allocate all 4 "src" temps
>
>    tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]);
>    tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]);
>

As the user input can pass something more than 0xFF, we need only the
RA(56:63). Anton's fuzzer test hit this bug. :-)

GPR06 0x0706050403021101
cmprb   cr5,1,r6,r6

Should be a match (RA = 0x01, RB=0x03021101), but fails

     tcg_gen_andi_i32(src1, src1, 0xFF);

Will send an fix patch, we can probably squash with the original one.

>    tcg_gen_ext8u_i32(src2lo, src2);
>    tcg_gen_shri_i32(src2, src2, 8);
>    tcg_gen_extu8_i32(src2hi, src2hi);
>
>    tcg_gen_setcond_tl(TCG_COND_LEU, src2lo, src2lo, src1);
>    tcg_gen_setcond_tl(TCG_COND_LEU, src2hi, src1, src2hi);
>    tcg_gen_and_tl(crf, src2lo, src2hi);
>
>    if (ctx->opcode & 0x00200000) {
>      tcg_gen_shri_i32(src2, src2, 8);
>      tcg_gen_ext8u_i32(src2lo, src2);
>      tcg_gen_shri_i32(src2, src2, 8);
>      tcg_gen_ext8u_i32(src2hi, src2);
>      tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
>      tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
>      tcg_gen_and_i32(src2lo, src2lo, src2hi);
>      tcg_gen_or_i32(crf, crf, src2lo);
>    }
>
>    tcg_gen_shli_i32(crf, crf, CRF_GT);
>
>    // free all 4 "src" temps

Regards
Nikunj
diff mbox

Patch

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 93c7c66..8de217f 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -817,6 +817,45 @@  static void gen_cmpli(DisasContext *ctx)
     }
 }
 
+/* cmprb - range comparison: isupper, isaplha, islower*/
+static void gen_cmprb(DisasContext *ctx)
+{
+    TCGLabel *lab1 = gen_new_label();
+    TCGLabel *lab2 = gen_new_label();
+    TCGv src1 = tcg_temp_local_new();
+    TCGv src2 = tcg_temp_local_new();
+    TCGv src2lo = tcg_temp_local_new();
+    TCGv src2hi = tcg_temp_local_new();
+
+    tcg_gen_andi_tl(src1, cpu_gpr[rA(ctx->opcode)], 0xFF);
+    tcg_gen_andi_tl(src2, cpu_gpr[rB(ctx->opcode)], 0xFFFFFFFF);
+
+    tcg_gen_andi_tl(src2lo, src2, 0xFF);
+    tcg_gen_shri_tl(src2hi, src2, 8);
+    tcg_gen_andi_tl(src2hi, src2hi, 0xFF);
+
+    tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab1);
+    tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab1);
+    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT);
+    tcg_gen_br(lab2);
+    gen_set_label(lab1);
+
+    if (ctx->opcode & 0x00200000) {
+        tcg_gen_shri_tl(src2hi, src2, 24);
+        tcg_gen_andi_tl(src2hi, src2hi, 0xFF);
+        tcg_gen_shri_tl(src2lo, src2, 16);
+        tcg_gen_andi_tl(src2lo, src2lo, 0xFF);
+        tcg_gen_brcond_tl(TCG_COND_GTU, src1, src2hi, lab2);
+        tcg_gen_brcond_tl(TCG_COND_LTU, src1, src2lo, lab2);
+        tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 1 << CRF_GT);
+    }
+    gen_set_label(lab2);
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+    tcg_temp_free(src2lo);
+    tcg_temp_free(src2hi);
+}
+
 /* isel (PowerPC 2.03 specification) */
 static void gen_isel(DisasContext *ctx)
 {
@@ -9898,6 +9937,7 @@  GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
 GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),