diff mbox series

[for-9.1,04/19] target/i386: do not use s->tmp0 and s->tmp4 to compute flags

Message ID 20240409164323.776660-5-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show
Series target/i386: convert 1-byte opcodes to new decoder | expand

Commit Message

Paolo Bonzini April 9, 2024, 4:43 p.m. UTC
Create a new temporary whenever flags have to use one, instead of using
s->tmp0 or s->tmp4.  NULL can now be passed as the scratch register
to gen_prepare_*.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/translate.c | 54 +++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 23 deletions(-)

Comments

Richard Henderson April 10, 2024, 6:34 a.m. UTC | #1
On 4/9/24 06:43, Paolo Bonzini wrote:
> Create a new temporary whenever flags have to use one, instead of using
> s->tmp0 or s->tmp4.  NULL can now be passed as the scratch register
> to gen_prepare_*.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   target/i386/tcg/translate.c | 54 +++++++++++++++++++++----------------
>   1 file changed, 31 insertions(+), 23 deletions(-)
> 
> diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> index 197cccb6c96..debc1b27283 100644
> --- a/target/i386/tcg/translate.c
> +++ b/target/i386/tcg/translate.c
> @@ -947,9 +947,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
>       case CC_OP_SUBB ... CC_OP_SUBQ:
>           /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
>           size = s->cc_op - CC_OP_SUBB;
> -        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
> -        /* If no temporary was used, be careful not to alias t1 and t0.  */
> -        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
> +        /* Be careful not to alias t1 and t0.  */
> +        t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
> +        t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
>           tcg_gen_mov_tl(t0, s->cc_srcT);
>           gen_extu(size, t0);

The tcg_temp_new, mov, and extu can be had with gen_ext_tl...

>           goto add_sub;
> @@ -957,8 +957,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
>       case CC_OP_ADDB ... CC_OP_ADDQ:
>           /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
>           size = s->cc_op - CC_OP_ADDB;
> -        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
> -        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
> +        /* Be careful not to alias t1 and t0.  */
> +        t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
> +        t0 = gen_ext_tl(reg == t1 ? NULL : reg, cpu_cc_dst, size, false);

... like this.

It would be helpful to update the function comments (nothing is 'compute ... to reg' in 
these functions).  Future cleanup, perhaps rename 'reg' to 'scratch', or remove the 
argument entirely where applicable.

> @@ -1109,11 +1113,13 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
>           size = s->cc_op - CC_OP_SUBB;
>           switch (jcc_op) {
>           case JCC_BE:
> -            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
> -            gen_extu(size, s->tmp4);
> -            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
> -            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
> -                               .reg2 = t0, .use_reg2 = true };
> +            /* Be careful not to alias t1 and t0.  */
> +            t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
> +            t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
> +            tcg_gen_mov_tl(t0, s->cc_srcT);
> +            gen_extu(size, t0);

gen_ext_tl

> +            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0,
> +                               .reg2 = t1, .use_reg2 = true };
>               break;
>   
>           case JCC_L:
> @@ -1122,11 +1128,13 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
>           case JCC_LE:
>               cond = TCG_COND_LE;
>           fast_jcc_l:
> -            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
> -            gen_exts(size, s->tmp4);
> -            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
> -            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
> -                               .reg2 = t0, .use_reg2 = true };
> +            /* Be careful not to alias t1 and t0.  */
> +            t1 = gen_ext_tl(NULL, cpu_cc_src, size, true);
> +            t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
> +            tcg_gen_mov_tl(t0, s->cc_srcT);
> +            gen_exts(size, t0);

gen_ext_tl

With that,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
Paolo Bonzini April 10, 2024, 6:33 p.m. UTC | #2
Il mer 10 apr 2024, 08:35 Richard Henderson <richard.henderson@linaro.org>
ha scritto:

> On 4/9/24 06:43, Paolo Bonzini wrote:
> > Create a new temporary whenever flags have to use one, instead of using
> > s->tmp0 or s->tmp4.  NULL can now be passed as the scratch register
> > to gen_prepare_*.
> >
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> >   target/i386/tcg/translate.c | 54 +++++++++++++++++++++----------------
> >   1 file changed, 31 insertions(+), 23 deletions(-)
> >
> > diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> > index 197cccb6c96..debc1b27283 100644
> > --- a/target/i386/tcg/translate.c
> > +++ b/target/i386/tcg/translate.c
> > @@ -947,9 +947,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext
> *s, TCGv reg)
> >       case CC_OP_SUBB ... CC_OP_SUBQ:
> >           /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
> >           size = s->cc_op - CC_OP_SUBB;
> > -        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
> > -        /* If no temporary was used, be careful not to alias t1 and
> t0.  */
> > -        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
> > +        /* Be careful not to alias t1 and t0.  */
> > +        t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
> > +        t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
> >           tcg_gen_mov_tl(t0, s->cc_srcT);
> >           gen_extu(size, t0);
>
> The tcg_temp_new, mov, and extu can be had with gen_ext_tl...
>

There's actually a lot more that can be done now that I looked more closely
at gen_ext_tl. It is fine (modulo bugs elsewhere) to just extend cc_* in
place. In fact this lets the optimizer work better, even allows (rare)
cross tb optimization because it effectively bumps CC_OP_ADD* to
target_long size, and is just as effective in removing tmp0/tmp4.

Paolo


> >           goto add_sub;
> > @@ -957,8 +957,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext
> *s, TCGv reg)
> >       case CC_OP_ADDB ... CC_OP_ADDQ:
> >           /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
> >           size = s->cc_op - CC_OP_ADDB;
> > -        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
> > -        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
> > +        /* Be careful not to alias t1 and t0.  */
> > +        t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
> > +        t0 = gen_ext_tl(reg == t1 ? NULL : reg, cpu_cc_dst, size,
> false);
>
> ... like this.
>
> It would be helpful to update the function comments (nothing is 'compute
> ... to reg' in
> these functions).  Future cleanup, perhaps rename 'reg' to 'scratch', or
> remove the
> argument entirely where applicable.
>
> > @@ -1109,11 +1113,13 @@ static CCPrepare gen_prepare_cc(DisasContext *s,
> int b, TCGv reg)
> >           size = s->cc_op - CC_OP_SUBB;
> >           switch (jcc_op) {
> >           case JCC_BE:
> > -            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
> > -            gen_extu(size, s->tmp4);
> > -            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
> > -            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
> > -                               .reg2 = t0, .use_reg2 = true };
> > +            /* Be careful not to alias t1 and t0.  */
> > +            t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
> > +            t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
> > +            tcg_gen_mov_tl(t0, s->cc_srcT);
> > +            gen_extu(size, t0);
>
> gen_ext_tl
>
> > +            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0,
> > +                               .reg2 = t1, .use_reg2 = true };
> >               break;
> >
> >           case JCC_L:
> > @@ -1122,11 +1128,13 @@ static CCPrepare gen_prepare_cc(DisasContext *s,
> int b, TCGv reg)
> >           case JCC_LE:
> >               cond = TCG_COND_LE;
> >           fast_jcc_l:
> > -            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
> > -            gen_exts(size, s->tmp4);
> > -            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
> > -            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
> > -                               .reg2 = t0, .use_reg2 = true };
> > +            /* Be careful not to alias t1 and t0.  */
> > +            t1 = gen_ext_tl(NULL, cpu_cc_src, size, true);
> > +            t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
> > +            tcg_gen_mov_tl(t0, s->cc_srcT);
> > +            gen_exts(size, t0);
>
> gen_ext_tl
>
> With that,
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>
>
> r~
>
>
diff mbox series

Patch

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 197cccb6c96..debc1b27283 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -947,9 +947,9 @@  static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
     case CC_OP_SUBB ... CC_OP_SUBQ:
         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_SUBB;
-        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-        /* If no temporary was used, be careful not to alias t1 and t0.  */
-        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
+        /* Be careful not to alias t1 and t0.  */
+        t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
+        t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
         tcg_gen_mov_tl(t0, s->cc_srcT);
         gen_extu(size, t0);
         goto add_sub;
@@ -957,8 +957,9 @@  static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
     case CC_OP_ADDB ... CC_OP_ADDQ:
         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_ADDB;
-        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+        /* Be careful not to alias t1 and t0.  */
+        t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
+        t0 = gen_ext_tl(reg == t1 ? NULL : reg, cpu_cc_dst, size, false);
     add_sub:
         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
                              .reg2 = t1, .use_reg2 = true };
@@ -1002,6 +1003,9 @@  static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
        /* The need to compute only C from CC_OP_DYNAMIC is important
           in efficiently implementing e.g. INC at the start of a TB.  */
        gen_update_cc_op(s);
+       if (!reg) {
+           reg = tcg_temp_new();
+       }
        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
                                cpu_cc_src2, cpu_cc_op);
        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
@@ -1098,7 +1102,7 @@  static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
     int inv, jcc_op, cond;
     MemOp size;
     CCPrepare cc;
-    TCGv t0;
+    TCGv t0, t1;
 
     inv = b & 1;
     jcc_op = (b >> 1) & 7;
@@ -1109,11 +1113,13 @@  static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
-            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
-            gen_extu(size, s->tmp4);
-            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
-                               .reg2 = t0, .use_reg2 = true };
+            /* Be careful not to alias t1 and t0.  */
+            t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
+            t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
+            tcg_gen_mov_tl(t0, s->cc_srcT);
+            gen_extu(size, t0);
+            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0,
+                               .reg2 = t1, .use_reg2 = true };
             break;
 
         case JCC_L:
@@ -1122,11 +1128,13 @@  static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_LE:
             cond = TCG_COND_LE;
         fast_jcc_l:
-            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
-            gen_exts(size, s->tmp4);
-            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
-            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
-                               .reg2 = t0, .use_reg2 = true };
+            /* Be careful not to alias t1 and t0.  */
+            t1 = gen_ext_tl(NULL, cpu_cc_src, size, true);
+            t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
+            tcg_gen_mov_tl(t0, s->cc_srcT);
+            gen_exts(size, t0);
+            cc = (CCPrepare) { .cond = cond, .reg = t0,
+                               .reg2 = t1, .use_reg2 = true };
             break;
 
         default:
@@ -1160,8 +1168,8 @@  static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
             break;
         case JCC_L:
             gen_compute_eflags(s);
-            if (reg == cpu_cc_src) {
-                reg = s->tmp0;
+            if (reg == cpu_cc_src || !reg) {
+                reg = tcg_temp_new();
             }
             tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
             cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
@@ -1170,8 +1178,8 @@  static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         default:
         case JCC_LE:
             gen_compute_eflags(s);
-            if (reg == cpu_cc_src) {
-                reg = s->tmp0;
+            if (reg == cpu_cc_src || !reg) {
+                reg = tcg_temp_new();
             }
             tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
             cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
@@ -1216,7 +1224,7 @@  static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
    value 'b'. In the fast case, T0 is guaranteed not to be used. */
 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
+    CCPrepare cc = gen_prepare_cc(s, b, NULL);
 
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
@@ -1230,7 +1238,7 @@  static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
    A translation block must end soon.  */
 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
+    CCPrepare cc = gen_prepare_cc(s, b, NULL);
 
     gen_update_cc_op(s);
     set_cc_op(s, CC_OP_DYNAMIC);
@@ -2495,7 +2503,7 @@  static void gen_jcc(DisasContext *s, int b, int diff)
 
 static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, s->T1);
+    CCPrepare cc = gen_prepare_cc(s, b, NULL);
 
     if (!cc.use_reg2) {
         cc.reg2 = tcg_constant_tl(cc.imm);