diff mbox

[11/15] tcg-mips: Use mips64r6 instructions in tcg_out_movi

Message ID 1455014403-10742-12-git-send-email-rth@twiddle.net (mailing list archive)
State New, archived
Headers show

Commit Message

Richard Henderson Feb. 9, 2016, 10:39 a.m. UTC
The DAHI and DATI instructions can eliminate two insns
off the pre-r6 path.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/mips/tcg-target.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

Comments

James Hogan Feb. 9, 2016, 4:50 p.m. UTC | #1
Hi Richard,

On Tue, Feb 09, 2016 at 09:39:59PM +1100, Richard Henderson wrote:
> The DAHI and DATI instructions can eliminate two insns
> off the pre-r6 path.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/mips/tcg-target.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
> 
> diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
> index 97f9251..f7f4331 100644
> --- a/tcg/mips/tcg-target.c
> +++ b/tcg/mips/tcg-target.c
> @@ -303,7 +303,9 @@ typedef enum {
>      OPC_ORI      = 015 << 26,
>      OPC_XORI     = 016 << 26,
>      OPC_LUI      = 017 << 26,
> +    OPC_AUI      = OPC_LUI,
>      OPC_DADDIU   = 031 << 26,
> +    OPC_DAUI     = 035 << 26,
>      OPC_LB       = 040 << 26,
>      OPC_LH       = 041 << 26,
>      OPC_LW       = 043 << 26,
> @@ -383,6 +385,8 @@ typedef enum {
>      OPC_REGIMM   = 001 << 26,
>      OPC_BLTZ     = OPC_REGIMM | (000 << 16),
>      OPC_BGEZ     = OPC_REGIMM | (001 << 16),
> +    OPC_DAHI     = OPC_REGIMM | (006 << 16),
> +    OPC_DATI     = OPC_REGIMM | (036 << 16),
>  
>      OPC_SPECIAL2 = 034 << 26,
>      OPC_MUL_R5   = OPC_SPECIAL2 | 002,
> @@ -402,6 +406,10 @@ typedef enum {
>      OPC_SEB      = OPC_SPECIAL3 | 02040,
>      OPC_SEH      = OPC_SPECIAL3 | 03040,
>  
> +    OPC_PCREL    = 073 << 26,
> +    OPC_ADDIUPC  = OPC_PCREL | (0 << 19),
> +    OPC_ALUIPC   = OPC_PCREL | (3 << 19) | (7 << 16),
> +
>      /* MIPS r6 doesn't have JR, JALR should be used instead */
>      OPC_JR       = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5,
>  
> @@ -448,6 +456,17 @@ static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
>      tcg_out32(s, inst);
>  }
>  
> +static inline void tcg_out_opc_pc19(TCGContext *s, MIPSInsn opc,
> +                                    TCGReg rs, TCGArg imm)
> +{
> +    int32_t inst;
> +
> +    inst = opc;
> +    inst |= (rs & 0x1F) << 21;
> +    inst |= (imm & 0x7ffff);
> +    tcg_out32(s, inst);
> +}
> +
>  /*
>   * Type bitfield
>   */
> @@ -589,6 +608,50 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
>      }
>      if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
>          tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
> +    } else if (use_mips32r6_instructions) {
> +        tcg_target_long disp = arg - (intptr_t)s->code_ptr;
> +        if (disp == sextract32(disp, 2, 19) * 4) {
> +            tcg_out_opc_pc19(s, OPC_ADDIUPC, ret, disp >> 2);
> +            return;
> +        } else if ((disp & ~(tcg_target_long)0xffff)
> +                   == sextract32(disp, 16, 16) * 0x10000) {
> +            tcg_out_opc_imm(s, OPC_ALUIPC, ret, 0, disp >> 16);

I think ret and 0 are the wrong way around here. You're putting 0 in rs
(the destination register), which causes a seg fault.

OUT: [size=56] 
0xfff30b0064:  lw       s1,-8(s0) 
0xfff30b0068:  bnezalc  zero,s1,0xfff30b0090 
0xfff30b006c:  nop 
0xfff30b0070:  j        0xfff0000000 
0xfff30b0074:  nop 
0xfff30b0078:  lui      s1,0xbfc0 
0xfff30b007c:  ori      s1,s1,0x580 
0xfff30b0080:  sd       s1,256(s0) 
0xfff30b0084:  aluipc   zero,0xfeb7 
0xfff30b0088:  j        0xfff30b0034 
0xfff30b008c:  ori      v0,v0,0x4010 
0xfff30b0090:  aluipc   zero,0xfeb7 
0xfff30b0094:  j        0xfff30b0034 
0xfff30b0098:  ori      v0,v0,0x4013

Cheers
James



> +        } else {
> +            TCGReg in = TCG_REG_ZERO;
> +            tcg_target_long tmp = (int16_t)arg;
> +
> +            /* The R6 manual recommends construction of immediates in
> +               order of low to high (ADDI, AUI, DAHI, DATI) in order
> +               to simplify hardware recognizing these sequences.  */
> +
> +            if (tmp) {
> +                tcg_out_opc_imm(s, OPC_ADDIU, ret, in, tmp);
> +                in = ret;
> +            }
> +            arg = (arg - tmp) >> 16;
> +            tmp = (int16_t)arg;
> +
> +            /* Note that DAHI and DATI only have one register operand,
> +               and are thus we must put a zero low part in place.  Also
> +               note that we already eliminated simple 32-bit constants
> +               so we know this must happen.  */
> +            if (tmp || in != ret) {
> +                tcg_out_opc_imm(s, OPC_AUI, ret, in, tmp);
> +            }
> +            arg = (arg - tmp) >> 16;
> +            tmp = (int16_t)arg;
> +
> +            if (tmp) {
> +                tcg_out_opc_imm(s, OPC_DAHI, ret, 0, tmp);
> +            }
> +            arg = (arg - tmp) >> 16;
> +            tcg_debug_assert(arg == (int16_t)arg);
> +
> +            if (arg) {
> +                tcg_out_opc_imm(s, OPC_DATI, ret, 0, arg);
> +            }
> +            return;
> +        }
>      } else {
>          tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
>          if (arg & 0xffff0000ull) {
> -- 
> 2.5.0
>
Richard Henderson Feb. 9, 2016, 5:20 p.m. UTC | #2
On 02/10/2016 03:50 AM, James Hogan wrote:
>> +        } else if ((disp & ~(tcg_target_long)0xffff)
>> +                   == sextract32(disp, 16, 16) * 0x10000) {
>> +            tcg_out_opc_imm(s, OPC_ALUIPC, ret, 0, disp >> 16);
>
> I think ret and 0 are the wrong way around here. You're putting 0 in rs
> (the destination register), which causes a seg fault.

Yep, thanks.


r~
Richard Henderson Feb. 9, 2016, 5:25 p.m. UTC | #3
On 02/10/2016 03:50 AM, James Hogan wrote:
> I think ret and 0 are the wrong way around here. You're putting 0 in rs
> (the destination register), which causes a seg fault.
>
> OUT: [size=56]
> 0xfff30b0064:  lw       s1,-8(s0)
> 0xfff30b0068:  bnezalc  zero,s1,0xfff30b0090
> 0xfff30b006c:  nop
> 0xfff30b0070:  j        0xfff0000000
> 0xfff30b0074:  nop
> 0xfff30b0078:  lui      s1,0xbfc0
> 0xfff30b007c:  ori      s1,s1,0x580
> 0xfff30b0080:  sd       s1,256(s0)
> 0xfff30b0084:  aluipc   zero,0xfeb7
> 0xfff30b0088:  j        0xfff30b0034
> 0xfff30b008c:  ori      v0,v0,0x4010
> 0xfff30b0090:  aluipc   zero,0xfeb7
> 0xfff30b0094:  j        0xfff30b0034
> 0xfff30b0098:  ori      v0,v0,0x4013
>
> Cheers
> James
>
>
>
>> +        } else {
>> +            TCGReg in = TCG_REG_ZERO;
>> +            tcg_target_long tmp = (int16_t)arg;
>> +
>> +            /* The R6 manual recommends construction of immediates in
>> +               order of low to high (ADDI, AUI, DAHI, DATI) in order
>> +               to simplify hardware recognizing these sequences.  */
>> +
>> +            if (tmp) {
>> +                tcg_out_opc_imm(s, OPC_ADDIU, ret, in, tmp);
>> +                in = ret;
>> +            }
>> +            arg = (arg - tmp) >> 16;
>> +            tmp = (int16_t)arg;
>> +
>> +            /* Note that DAHI and DATI only have one register operand,
>> +               and are thus we must put a zero low part in place.  Also
>> +               note that we already eliminated simple 32-bit constants
>> +               so we know this must happen.  */
>> +            if (tmp || in != ret) {
>> +                tcg_out_opc_imm(s, OPC_AUI, ret, in, tmp);
>> +            }
>> +            arg = (arg - tmp) >> 16;
>> +            tmp = (int16_t)arg;
>> +
>> +            if (tmp) {
>> +                tcg_out_opc_imm(s, OPC_DAHI, ret, 0, tmp);
>> +            }
>> +            arg = (arg - tmp) >> 16;
>> +            tcg_debug_assert(arg == (int16_t)arg);
>> +
>> +            if (arg) {
>> +                tcg_out_opc_imm(s, OPC_DATI, ret, 0, arg);

Same mistake here for DAHI/DATI.


r~
diff mbox

Patch

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 97f9251..f7f4331 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -303,7 +303,9 @@  typedef enum {
     OPC_ORI      = 015 << 26,
     OPC_XORI     = 016 << 26,
     OPC_LUI      = 017 << 26,
+    OPC_AUI      = OPC_LUI,
     OPC_DADDIU   = 031 << 26,
+    OPC_DAUI     = 035 << 26,
     OPC_LB       = 040 << 26,
     OPC_LH       = 041 << 26,
     OPC_LW       = 043 << 26,
@@ -383,6 +385,8 @@  typedef enum {
     OPC_REGIMM   = 001 << 26,
     OPC_BLTZ     = OPC_REGIMM | (000 << 16),
     OPC_BGEZ     = OPC_REGIMM | (001 << 16),
+    OPC_DAHI     = OPC_REGIMM | (006 << 16),
+    OPC_DATI     = OPC_REGIMM | (036 << 16),
 
     OPC_SPECIAL2 = 034 << 26,
     OPC_MUL_R5   = OPC_SPECIAL2 | 002,
@@ -402,6 +406,10 @@  typedef enum {
     OPC_SEB      = OPC_SPECIAL3 | 02040,
     OPC_SEH      = OPC_SPECIAL3 | 03040,
 
+    OPC_PCREL    = 073 << 26,
+    OPC_ADDIUPC  = OPC_PCREL | (0 << 19),
+    OPC_ALUIPC   = OPC_PCREL | (3 << 19) | (7 << 16),
+
     /* MIPS r6 doesn't have JR, JALR should be used instead */
     OPC_JR       = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5,
 
@@ -448,6 +456,17 @@  static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
     tcg_out32(s, inst);
 }
 
+static inline void tcg_out_opc_pc19(TCGContext *s, MIPSInsn opc,
+                                    TCGReg rs, TCGArg imm)
+{
+    int32_t inst;
+
+    inst = opc;
+    inst |= (rs & 0x1F) << 21;
+    inst |= (imm & 0x7ffff);
+    tcg_out32(s, inst);
+}
+
 /*
  * Type bitfield
  */
@@ -589,6 +608,50 @@  static void tcg_out_movi(TCGContext *s, TCGType type,
     }
     if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
         tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
+    } else if (use_mips32r6_instructions) {
+        tcg_target_long disp = arg - (intptr_t)s->code_ptr;
+        if (disp == sextract32(disp, 2, 19) * 4) {
+            tcg_out_opc_pc19(s, OPC_ADDIUPC, ret, disp >> 2);
+            return;
+        } else if ((disp & ~(tcg_target_long)0xffff)
+                   == sextract32(disp, 16, 16) * 0x10000) {
+            tcg_out_opc_imm(s, OPC_ALUIPC, ret, 0, disp >> 16);
+        } else {
+            TCGReg in = TCG_REG_ZERO;
+            tcg_target_long tmp = (int16_t)arg;
+
+            /* The R6 manual recommends construction of immediates in
+               order of low to high (ADDI, AUI, DAHI, DATI) in order
+               to simplify hardware recognizing these sequences.  */
+
+            if (tmp) {
+                tcg_out_opc_imm(s, OPC_ADDIU, ret, in, tmp);
+                in = ret;
+            }
+            arg = (arg - tmp) >> 16;
+            tmp = (int16_t)arg;
+
+            /* Note that DAHI and DATI only have one register operand,
+               and are thus we must put a zero low part in place.  Also
+               note that we already eliminated simple 32-bit constants
+               so we know this must happen.  */
+            if (tmp || in != ret) {
+                tcg_out_opc_imm(s, OPC_AUI, ret, in, tmp);
+            }
+            arg = (arg - tmp) >> 16;
+            tmp = (int16_t)arg;
+
+            if (tmp) {
+                tcg_out_opc_imm(s, OPC_DAHI, ret, 0, tmp);
+            }
+            arg = (arg - tmp) >> 16;
+            tcg_debug_assert(arg == (int16_t)arg);
+
+            if (arg) {
+                tcg_out_opc_imm(s, OPC_DATI, ret, 0, arg);
+            }
+            return;
+        }
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
         if (arg & 0xffff0000ull) {