[09/16] tcg: Use per-temp state data in liveness
diff mbox

Message ID 20170621024831.26019-10-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson June 21, 2017, 2:48 a.m. UTC
This avoids having to allocate external memory for each temporary.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/tcg.c | 232 ++++++++++++++++++++++++++++++--------------------------------
 tcg/tcg.h |   6 ++
 2 files changed, 120 insertions(+), 118 deletions(-)

Comments

Alex Bennée June 27, 2017, 8:57 a.m. UTC | #1
Richard Henderson <rth@twiddle.net> writes:

> This avoids having to allocate external memory for each temporary.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/tcg.c | 232 ++++++++++++++++++++++++++++++--------------------------------
>  tcg/tcg.h |   6 ++
>  2 files changed, 120 insertions(+), 118 deletions(-)
>
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 0d758e4..e78140b 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -1399,42 +1399,54 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
>
>  /* liveness analysis: end of function: all temps are dead, and globals
>     should be in memory. */
> -static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
> +static void tcg_la_func_end(TCGContext *s)
>  {
> -    memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
> -    memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
> +    int ng = s->nb_globals;
> +    int nt = s->nb_temps;
> +    int i;
> +
> +    for (i = 0; i < ng; ++i) {
> +        s->temps[i].state = TS_DEAD | TS_MEM;
> +    }
> +    for (i = ng; i < nt; ++i) {
> +        s->temps[i].state = TS_DEAD;
> +    }
>  }
>
>  /* liveness analysis: end of basic block: all temps are dead, globals
>     and local temps should be in memory. */
> -static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
> +static void tcg_la_bb_end(TCGContext *s)
>  {
> -    int i, n;
> +    int ng = s->nb_globals;
> +    int nt = s->nb_temps;
> +    int i;
>
> -    tcg_la_func_end(s, temp_state);
> -    for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
> -        if (s->temps[i].temp_local) {
> -            temp_state[i] |= TS_MEM;
> -        }
> +    for (i = 0; i < ng; ++i) {
> +        s->temps[i].state = TS_DEAD | TS_MEM;
> +    }
> +    for (i = ng; i < nt; ++i) {
> +        s->temps[i].state = (s->temps[i].temp_local
> +                             ? TS_DEAD | TS_MEM
> +                             : TS_DEAD);
>      }
>  }
>
>  /* Liveness analysis : update the opc_arg_life array to tell if a
>     given input arguments is dead. Instructions updating dead
>     temporaries are removed. */
> -static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
> +static void liveness_pass_1(TCGContext *s)
>  {
>      int nb_globals = s->nb_globals;
>      int oi, oi_prev;
>
> -    tcg_la_func_end(s, temp_state);
> +    tcg_la_func_end(s);
>
>      for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
>          int i, nb_iargs, nb_oargs;
>          TCGOpcode opc_new, opc_new2;
>          bool have_opc_new2;
>          TCGLifeData arg_life = 0;
> -        TCGArg arg;
> +        TCGTemp *arg_ts;
>
>          TCGOp * const op = &s->gen_op_buf[oi];
>          TCGOpcode opc = op->opc;
> @@ -1454,8 +1466,8 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
>                  /* pure functions can be removed if their result is unused */
>                  if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
>                      for (i = 0; i < nb_oargs; i++) {
> -                        arg = op->args[i];
> -                        if (temp_state[arg] != TS_DEAD) {
> +                        arg_ts = arg_temp(op->args[i]);
> +                        if (arg_ts->state != TS_DEAD) {
>                              goto do_not_remove_call;
>                          }
>                      }
> @@ -1465,41 +1477,41 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
>
>                      /* output args are dead */
>                      for (i = 0; i < nb_oargs; i++) {
> -                        arg = op->args[i];
> -                        if (temp_state[arg] & TS_DEAD) {
> +                        arg_ts = arg_temp(op->args[i]);
> +                        if (arg_ts->state & TS_DEAD) {
>                              arg_life |= DEAD_ARG << i;
>                          }
> -                        if (temp_state[arg] & TS_MEM) {
> +                        if (arg_ts->state & TS_MEM) {
>                              arg_life |= SYNC_ARG << i;
>                          }
> -                        temp_state[arg] = TS_DEAD;
> +                        arg_ts->state = TS_DEAD;
>                      }
>
>                      if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
>                                          TCG_CALL_NO_READ_GLOBALS))) {
>                          /* globals should go back to memory */
> -                        memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
> +                        for (i = 0; i < nb_globals; i++) {
> +                            s->temps[i].state = TS_DEAD | TS_MEM;
> +                        }
>                      } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
>                          /* globals should be synced to memory */
>                          for (i = 0; i < nb_globals; i++) {
> -                            temp_state[i] |= TS_MEM;
> +                            s->temps[i].state |= TS_MEM;
>                          }
>                      }
>
>                      /* record arguments that die in this helper */
>                      for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
> -                        arg = op->args[i];
> -                        if (arg != TCG_CALL_DUMMY_ARG) {
> -                            if (temp_state[arg] & TS_DEAD) {
> -                                arg_life |= DEAD_ARG << i;
> -                            }
> +                        arg_ts = arg_temp(op->args[i]);
> +                        if (arg_ts && arg_ts->state & TS_DEAD) {
> +                            arg_life |= DEAD_ARG << i;
>                          }
>                      }
>                      /* input arguments are live for preceding opcodes */
>                      for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
> -                        arg = op->args[i];
> -                        if (arg != TCG_CALL_DUMMY_ARG) {
> -                            temp_state[arg] &= ~TS_DEAD;
> +                        arg_ts = arg_temp(op->args[i]);
> +                        if (arg_ts) {
> +                            arg_ts->state &= ~TS_DEAD;
>                          }
>                      }
>                  }
> @@ -1509,7 +1521,7 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
>              break;
>          case INDEX_op_discard:
>              /* mark the temporary as dead */
> -            temp_state[op->args[0]] = TS_DEAD;
> +            arg_temp(op->args[0])->state = TS_DEAD;
>              break;
>
>          case INDEX_op_add2_i32:
> @@ -1530,8 +1542,8 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
>                 the low part.  The result can be optimized to a simple
>                 add or sub.  This happens often for x86_64 guest when the
>                 cpu mode is set to 32 bit.  */
> -            if (temp_state[op->args[1]] == TS_DEAD) {
> -                if (temp_state[op->args[0]] == TS_DEAD) {
> +            if (arg_temp(op->args[1])->state == TS_DEAD) {
> +                if (arg_temp(op->args[0])->state == TS_DEAD) {
>                      goto do_remove;
>                  }
>                  /* Replace the opcode and adjust the args in place,
> @@ -1568,8 +1580,8 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
>          do_mul2:
>              nb_iargs = 2;
>              nb_oargs = 2;
> -            if (temp_state[op->args[1]] == TS_DEAD) {
> -                if (temp_state[op->args[0]] == TS_DEAD) {
> +            if (arg_temp(op->args[1])->state == TS_DEAD) {
> +                if (arg_temp(op->args[0])->state == TS_DEAD) {
>                      /* Both parts of the operation are dead.  */
>                      goto do_remove;
>                  }
> @@ -1577,7 +1589,7 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
>                  op->opc = opc = opc_new;
>                  op->args[1] = op->args[2];
>                  op->args[2] = op->args[3];
> -            } else if (temp_state[op->args[0]] == TS_DEAD && have_opc_new2) {
> +            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
>                  /* The low part of the operation is dead; generate the high. */
>                  op->opc = opc = opc_new2;
>                  op->args[0] = op->args[1];
> @@ -1600,7 +1612,7 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
>                 implies side effects */
>              if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
>                  for (i = 0; i < nb_oargs; i++) {
> -                    if (temp_state[op->args[i]] != TS_DEAD) {
> +                    if (arg_temp(op->args[i])->state != TS_DEAD) {
>                          goto do_not_remove;
>                      }
>                  }
> @@ -1610,36 +1622,36 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
>              do_not_remove:
>                  /* output args are dead */
>                  for (i = 0; i < nb_oargs; i++) {
> -                    arg = op->args[i];
> -                    if (temp_state[arg] & TS_DEAD) {
> +                    arg_ts = arg_temp(op->args[i]);
> +                    if (arg_ts->state & TS_DEAD) {
>                          arg_life |= DEAD_ARG << i;
>                      }
> -                    if (temp_state[arg] & TS_MEM) {
> +                    if (arg_ts->state & TS_MEM) {
>                          arg_life |= SYNC_ARG << i;
>                      }
> -                    temp_state[arg] = TS_DEAD;
> +                    arg_ts->state = TS_DEAD;
>                  }
>
>                  /* if end of basic block, update */
>                  if (def->flags & TCG_OPF_BB_END) {
> -                    tcg_la_bb_end(s, temp_state);
> +                    tcg_la_bb_end(s);
>                  } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
>                      /* globals should be synced to memory */
>                      for (i = 0; i < nb_globals; i++) {
> -                        temp_state[i] |= TS_MEM;
> +                        s->temps[i].state |= TS_MEM;
>                      }
>                  }
>
>                  /* record arguments that die in this opcode */
>                  for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
> -                    arg = op->args[i];
> -                    if (temp_state[arg] & TS_DEAD) {
> +                    arg_ts = arg_temp(op->args[i]);
> +                    if (arg_ts->state & TS_DEAD) {
>                          arg_life |= DEAD_ARG << i;
>                      }
>                  }
>                  /* input arguments are live for preceding opcodes */
>                  for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
> -                    temp_state[op->args[i]] &= ~TS_DEAD;
> +                    arg_temp(op->args[i])->state &= ~TS_DEAD;
>                  }
>              }
>              break;
> @@ -1649,16 +1661,12 @@ static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
>  }
>
>  /* Liveness analysis: Convert indirect regs to direct temporaries.  */
> -static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
> +static bool liveness_pass_2(TCGContext *s)
>  {
>      int nb_globals = s->nb_globals;
> -    int16_t *dir_temps;
>      int i, oi, oi_next;
>      bool changes = false;
>
> -    dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
> -    memset(dir_temps, 0, nb_globals * sizeof(int16_t));
> -
>      /* Create a temporary for each indirect global.  */
>      for (i = 0; i < nb_globals; ++i) {
>          TCGTemp *its = &s->temps[i];
> @@ -1666,19 +1674,19 @@ static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
>              TCGTemp *dts = tcg_temp_alloc(s);
>              dts->type = its->type;
>              dts->base_type = its->base_type;
> -            dir_temps[i] = temp_idx(s, dts);
> +            its->state_ptr = dts;
>          }
> +        /* All globals begin dead.  */
> +        its->state = TS_DEAD;
>      }
>
> -    memset(temp_state, TS_DEAD, nb_globals);
> -
>      for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
>          TCGOp *op = &s->gen_op_buf[oi];
>          TCGOpcode opc = op->opc;
>          const TCGOpDef *def = &tcg_op_defs[opc];
>          TCGLifeData arg_life = op->life;
>          int nb_iargs, nb_oargs, call_flags;
> -        TCGArg arg, dir;
> +        TCGTemp *arg_ts, *dir_ts;
>
>          oi_next = op->next;
>
> @@ -1706,24 +1714,20 @@ static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
>
>          /* Make sure that input arguments are available.  */
>          for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
> -            arg = op->args[i];
> -            /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too.  */
> -            if (arg < nb_globals) {

This test is gone but....

> -                dir = dir_temps[arg];
> -                if (dir != 0 && temp_state[arg] == TS_DEAD) {
> -                    TCGTemp *its = arg_temp(arg);
> -                    TCGOpcode lopc = (its->type == TCG_TYPE_I32
> -                                      ? INDEX_op_ld_i32
> -                                      : INDEX_op_ld_i64);
> -                    TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
> -
> -                    lop->args[0] = dir;
> -                    lop->args[1] = temp_arg(its->mem_base);
> -                    lop->args[2] = its->mem_offset;
> -
> -                    /* Loaded, but synced with memory.  */
> -                    temp_state[arg] = TS_MEM;
> -                }
> +            arg_ts = arg_temp(op->args[i]);
> +            dir_ts = arg_ts->state_ptr;
> +            if (dir_ts && arg_ts->state == TS_DEAD) {

...we de-ref arg_ts here. So what if it was a TCG_CALL_ARG_DUMMY?

> +                TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
> +                                  ? INDEX_op_ld_i32
> +                                  : INDEX_op_ld_i64);
> +                TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
> +
> +                lop->args[0] = temp_arg(dir_ts);
> +                lop->args[1] = temp_arg(arg_ts->mem_base);
> +                lop->args[2] = arg_ts->mem_offset;
> +
> +                /* Loaded, but synced with memory.  */
> +                arg_ts->state = TS_MEM;
>              }
>          }
>
> @@ -1731,15 +1735,13 @@ static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
>             No action is required except keeping temp_state up to date
>             so that we reload when needed.  */
>          for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
> -            arg = op->args[i];
> -            if (arg < nb_globals) {
> -                dir = dir_temps[arg];
> -                if (dir != 0) {
> -                    op->args[i] = dir;
> -                    changes = true;
> -                    if (IS_DEAD_ARG(i)) {
> -                        temp_state[arg] = TS_DEAD;
> -                    }
> +            arg_ts = arg_temp(op->args[i]);
> +            dir_ts = arg_ts->state_ptr;
> +            if (dir_ts) {
> +                op->args[i] = temp_arg(dir_ts);
> +                changes = true;
> +                if (IS_DEAD_ARG(i)) {
> +                    arg_ts->state = TS_DEAD;
>                  }
>              }
>          }
> @@ -1752,51 +1754,49 @@ static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
>              for (i = 0; i < nb_globals; ++i) {
>                  /* Liveness should see that globals are synced back,
>                     that is, either TS_DEAD or TS_MEM.  */
> -                tcg_debug_assert(dir_temps[i] == 0
> -                                 || temp_state[i] != 0);
> +                arg_ts = &s->temps[i];
> +                tcg_debug_assert(arg_ts->state_ptr == 0
> +                                 || arg_ts->state != 0);
>              }
>          } else {
>              for (i = 0; i < nb_globals; ++i) {
>                  /* Liveness should see that globals are saved back,
>                     that is, TS_DEAD, waiting to be reloaded.  */
> -                tcg_debug_assert(dir_temps[i] == 0
> -                                 || temp_state[i] == TS_DEAD);
> +                arg_ts = &s->temps[i];
> +                tcg_debug_assert(arg_ts->state_ptr == 0
> +                                 || arg_ts->state == TS_DEAD);
>              }
>          }
>
>          /* Outputs become available.  */
>          for (i = 0; i < nb_oargs; i++) {
> -            arg = op->args[i];
> -            if (arg >= nb_globals) {
> -                continue;
> -            }
> -            dir = dir_temps[arg];
> -            if (dir == 0) {
> +            arg_ts = arg_temp(op->args[i]);
> +            dir_ts = arg_ts->state_ptr;
> +            if (!dir_ts) {
>                  continue;
>              }
> -            op->args[i] = dir;
> +            op->args[i] = temp_arg(dir_ts);
>              changes = true;
>
>              /* The output is now live and modified.  */
> -            temp_state[arg] = 0;
> +            arg_ts->state = 0;
>
>              /* Sync outputs upon their last write.  */
>              if (NEED_SYNC_ARG(i)) {
> -                TCGTemp *its = arg_temp(arg);
> -                TCGOpcode sopc = (its->type == TCG_TYPE_I32
> +                TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
>                                    ? INDEX_op_st_i32
>                                    : INDEX_op_st_i64);
>                  TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
>
> -                sop->args[0] = dir;
> -                sop->args[1] = temp_arg(its->mem_base);
> -                sop->args[2] = its->mem_offset;
> +                sop->args[0] = temp_arg(dir_ts);
> +                sop->args[1] = temp_arg(arg_ts->mem_base);
> +                sop->args[2] = arg_ts->mem_offset;
>
> -                temp_state[arg] = TS_MEM;
> +                arg_ts->state = TS_MEM;
>              }
>              /* Drop outputs that are dead.  */
>              if (IS_DEAD_ARG(i)) {
> -                temp_state[arg] = TS_DEAD;
> +                arg_ts->state = TS_DEAD;
>              }
>          }
>      }
> @@ -2569,27 +2569,23 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
>      s->la_time -= profile_getclock();
>  #endif
>
> -    {
> -        uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
> -
> -        liveness_pass_1(s, temp_state);
> +    liveness_pass_1(s);
>
> -        if (s->nb_indirects > 0) {
> +    if (s->nb_indirects > 0) {
>  #ifdef DEBUG_DISAS
> -            if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
> -                         && qemu_log_in_addr_range(tb->pc))) {
> -                qemu_log_lock();
> -                qemu_log("OP before indirect lowering:\n");
> -                tcg_dump_ops(s);
> -                qemu_log("\n");
> -                qemu_log_unlock();
> -            }
> +        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
> +                     && qemu_log_in_addr_range(tb->pc))) {
> +            qemu_log_lock();
> +            qemu_log("OP before indirect lowering:\n");
> +            tcg_dump_ops(s);
> +            qemu_log("\n");
> +            qemu_log_unlock();
> +        }
>  #endif
> -            /* Replace indirect temps with direct temps.  */
> -            if (liveness_pass_2(s, temp_state)) {
> -                /* If changes were made, re-run liveness.  */
> -                liveness_pass_1(s, temp_state);
> -            }
> +        /* Replace indirect temps with direct temps.  */
> +        if (liveness_pass_2(s)) {
> +            /* If changes were made, re-run liveness.  */
> +            liveness_pass_1(s);
>          }
>      }
>
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 80012b5..1eeeca5 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -599,6 +599,12 @@ typedef struct TCGTemp {
>      struct TCGTemp *mem_base;
>      intptr_t mem_offset;
>      const char *name;
> +
> +    /* Pass-specific information that can be stored for a temporary.
> +       One word worth of integer data, and one pointer to data
> +       allocated separately.  */
> +    uintptr_t state;
> +    void *state_ptr;
>  } TCGTemp;
>
>  typedef struct TCGContext TCGContext;


--
Alex Bennée
Richard Henderson June 27, 2017, 4:39 p.m. UTC | #2
On 06/27/2017 01:57 AM, Alex Bennée wrote:
>> -            /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too.  */
>> -            if (arg < nb_globals) {
> 
> This test is gone but....
> 
>> -                dir = dir_temps[arg];
>> -                if (dir != 0 && temp_state[arg] == TS_DEAD) {
>> -                    TCGTemp *its = arg_temp(arg);
>> -                    TCGOpcode lopc = (its->type == TCG_TYPE_I32
>> -                                      ? INDEX_op_ld_i32
>> -                                      : INDEX_op_ld_i64);
>> -                    TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
>> -
>> -                    lop->args[0] = dir;
>> -                    lop->args[1] = temp_arg(its->mem_base);
>> -                    lop->args[2] = its->mem_offset;
>> -
>> -                    /* Loaded, but synced with memory.  */
>> -                    temp_state[arg] = TS_MEM;
>> -                }
>> +            arg_ts = arg_temp(op->args[i]);
>> +            dir_ts = arg_ts->state_ptr;
>> +            if (dir_ts && arg_ts->state == TS_DEAD) {
> 
> ...we de-ref arg_ts here. So what if it was a TCG_CALL_ARG_DUMMY?

Good catch.  I need to do more testing on a host that actually uses this padding...


r~

Patch
diff mbox

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0d758e4..e78140b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1399,42 +1399,54 @@  TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
 
 /* liveness analysis: end of function: all temps are dead, and globals
    should be in memory. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
+static void tcg_la_func_end(TCGContext *s)
 {
-    memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
-    memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
+    int ng = s->nb_globals;
+    int nt = s->nb_temps;
+    int i;
+
+    for (i = 0; i < ng; ++i) {
+        s->temps[i].state = TS_DEAD | TS_MEM;
+    }
+    for (i = ng; i < nt; ++i) {
+        s->temps[i].state = TS_DEAD;
+    }
 }
 
 /* liveness analysis: end of basic block: all temps are dead, globals
    and local temps should be in memory. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
+static void tcg_la_bb_end(TCGContext *s)
 {
-    int i, n;
+    int ng = s->nb_globals;
+    int nt = s->nb_temps;
+    int i;
 
-    tcg_la_func_end(s, temp_state);
-    for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
-        if (s->temps[i].temp_local) {
-            temp_state[i] |= TS_MEM;
-        }
+    for (i = 0; i < ng; ++i) {
+        s->temps[i].state = TS_DEAD | TS_MEM;
+    }
+    for (i = ng; i < nt; ++i) {
+        s->temps[i].state = (s->temps[i].temp_local
+                             ? TS_DEAD | TS_MEM
+                             : TS_DEAD);
     }
 }
 
 /* Liveness analysis : update the opc_arg_life array to tell if a
    given input arguments is dead. Instructions updating dead
    temporaries are removed. */
-static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
+static void liveness_pass_1(TCGContext *s)
 {
     int nb_globals = s->nb_globals;
     int oi, oi_prev;
 
-    tcg_la_func_end(s, temp_state);
+    tcg_la_func_end(s);
 
     for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
         int i, nb_iargs, nb_oargs;
         TCGOpcode opc_new, opc_new2;
         bool have_opc_new2;
         TCGLifeData arg_life = 0;
-        TCGArg arg;
+        TCGTemp *arg_ts;
 
         TCGOp * const op = &s->gen_op_buf[oi];
         TCGOpcode opc = op->opc;
@@ -1454,8 +1466,8 @@  static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
                 /* pure functions can be removed if their result is unused */
                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
                     for (i = 0; i < nb_oargs; i++) {
-                        arg = op->args[i];
-                        if (temp_state[arg] != TS_DEAD) {
+                        arg_ts = arg_temp(op->args[i]);
+                        if (arg_ts->state != TS_DEAD) {
                             goto do_not_remove_call;
                         }
                     }
@@ -1465,41 +1477,41 @@  static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
 
                     /* output args are dead */
                     for (i = 0; i < nb_oargs; i++) {
-                        arg = op->args[i];
-                        if (temp_state[arg] & TS_DEAD) {
+                        arg_ts = arg_temp(op->args[i]);
+                        if (arg_ts->state & TS_DEAD) {
                             arg_life |= DEAD_ARG << i;
                         }
-                        if (temp_state[arg] & TS_MEM) {
+                        if (arg_ts->state & TS_MEM) {
                             arg_life |= SYNC_ARG << i;
                         }
-                        temp_state[arg] = TS_DEAD;
+                        arg_ts->state = TS_DEAD;
                     }
 
                     if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
                                         TCG_CALL_NO_READ_GLOBALS))) {
                         /* globals should go back to memory */
-                        memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
+                        for (i = 0; i < nb_globals; i++) {
+                            s->temps[i].state = TS_DEAD | TS_MEM;
+                        }
                     } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
                         /* globals should be synced to memory */
                         for (i = 0; i < nb_globals; i++) {
-                            temp_state[i] |= TS_MEM;
+                            s->temps[i].state |= TS_MEM;
                         }
                     }
 
                     /* record arguments that die in this helper */
                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
-                        arg = op->args[i];
-                        if (arg != TCG_CALL_DUMMY_ARG) {
-                            if (temp_state[arg] & TS_DEAD) {
-                                arg_life |= DEAD_ARG << i;
-                            }
+                        arg_ts = arg_temp(op->args[i]);
+                        if (arg_ts && arg_ts->state & TS_DEAD) {
+                            arg_life |= DEAD_ARG << i;
                         }
                     }
                     /* input arguments are live for preceding opcodes */
                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
-                        arg = op->args[i];
-                        if (arg != TCG_CALL_DUMMY_ARG) {
-                            temp_state[arg] &= ~TS_DEAD;
+                        arg_ts = arg_temp(op->args[i]);
+                        if (arg_ts) {
+                            arg_ts->state &= ~TS_DEAD;
                         }
                     }
                 }
@@ -1509,7 +1521,7 @@  static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
             break;
         case INDEX_op_discard:
             /* mark the temporary as dead */
-            temp_state[op->args[0]] = TS_DEAD;
+            arg_temp(op->args[0])->state = TS_DEAD;
             break;
 
         case INDEX_op_add2_i32:
@@ -1530,8 +1542,8 @@  static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
                the low part.  The result can be optimized to a simple
                add or sub.  This happens often for x86_64 guest when the
                cpu mode is set to 32 bit.  */
-            if (temp_state[op->args[1]] == TS_DEAD) {
-                if (temp_state[op->args[0]] == TS_DEAD) {
+            if (arg_temp(op->args[1])->state == TS_DEAD) {
+                if (arg_temp(op->args[0])->state == TS_DEAD) {
                     goto do_remove;
                 }
                 /* Replace the opcode and adjust the args in place,
@@ -1568,8 +1580,8 @@  static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
         do_mul2:
             nb_iargs = 2;
             nb_oargs = 2;
-            if (temp_state[op->args[1]] == TS_DEAD) {
-                if (temp_state[op->args[0]] == TS_DEAD) {
+            if (arg_temp(op->args[1])->state == TS_DEAD) {
+                if (arg_temp(op->args[0])->state == TS_DEAD) {
                     /* Both parts of the operation are dead.  */
                     goto do_remove;
                 }
@@ -1577,7 +1589,7 @@  static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
                 op->opc = opc = opc_new;
                 op->args[1] = op->args[2];
                 op->args[2] = op->args[3];
-            } else if (temp_state[op->args[0]] == TS_DEAD && have_opc_new2) {
+            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
                 /* The low part of the operation is dead; generate the high. */
                 op->opc = opc = opc_new2;
                 op->args[0] = op->args[1];
@@ -1600,7 +1612,7 @@  static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
                implies side effects */
             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
                 for (i = 0; i < nb_oargs; i++) {
-                    if (temp_state[op->args[i]] != TS_DEAD) {
+                    if (arg_temp(op->args[i])->state != TS_DEAD) {
                         goto do_not_remove;
                     }
                 }
@@ -1610,36 +1622,36 @@  static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
             do_not_remove:
                 /* output args are dead */
                 for (i = 0; i < nb_oargs; i++) {
-                    arg = op->args[i];
-                    if (temp_state[arg] & TS_DEAD) {
+                    arg_ts = arg_temp(op->args[i]);
+                    if (arg_ts->state & TS_DEAD) {
                         arg_life |= DEAD_ARG << i;
                     }
-                    if (temp_state[arg] & TS_MEM) {
+                    if (arg_ts->state & TS_MEM) {
                         arg_life |= SYNC_ARG << i;
                     }
-                    temp_state[arg] = TS_DEAD;
+                    arg_ts->state = TS_DEAD;
                 }
 
                 /* if end of basic block, update */
                 if (def->flags & TCG_OPF_BB_END) {
-                    tcg_la_bb_end(s, temp_state);
+                    tcg_la_bb_end(s);
                 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
                     /* globals should be synced to memory */
                     for (i = 0; i < nb_globals; i++) {
-                        temp_state[i] |= TS_MEM;
+                        s->temps[i].state |= TS_MEM;
                     }
                 }
 
                 /* record arguments that die in this opcode */
                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-                    arg = op->args[i];
-                    if (temp_state[arg] & TS_DEAD) {
+                    arg_ts = arg_temp(op->args[i]);
+                    if (arg_ts->state & TS_DEAD) {
                         arg_life |= DEAD_ARG << i;
                     }
                 }
                 /* input arguments are live for preceding opcodes */
                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-                    temp_state[op->args[i]] &= ~TS_DEAD;
+                    arg_temp(op->args[i])->state &= ~TS_DEAD;
                 }
             }
             break;
@@ -1649,16 +1661,12 @@  static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
 }
 
 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
-static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
+static bool liveness_pass_2(TCGContext *s)
 {
     int nb_globals = s->nb_globals;
-    int16_t *dir_temps;
     int i, oi, oi_next;
     bool changes = false;
 
-    dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
-    memset(dir_temps, 0, nb_globals * sizeof(int16_t));
-
     /* Create a temporary for each indirect global.  */
     for (i = 0; i < nb_globals; ++i) {
         TCGTemp *its = &s->temps[i];
@@ -1666,19 +1674,19 @@  static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
             TCGTemp *dts = tcg_temp_alloc(s);
             dts->type = its->type;
             dts->base_type = its->base_type;
-            dir_temps[i] = temp_idx(s, dts);
+            its->state_ptr = dts;
         }
+        /* All globals begin dead.  */
+        its->state = TS_DEAD;
     }
 
-    memset(temp_state, TS_DEAD, nb_globals);
-
     for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
         TCGOp *op = &s->gen_op_buf[oi];
         TCGOpcode opc = op->opc;
         const TCGOpDef *def = &tcg_op_defs[opc];
         TCGLifeData arg_life = op->life;
         int nb_iargs, nb_oargs, call_flags;
-        TCGArg arg, dir;
+        TCGTemp *arg_ts, *dir_ts;
 
         oi_next = op->next;
 
@@ -1706,24 +1714,20 @@  static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
 
         /* Make sure that input arguments are available.  */
         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
-            arg = op->args[i];
-            /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too.  */
-            if (arg < nb_globals) {
-                dir = dir_temps[arg];
-                if (dir != 0 && temp_state[arg] == TS_DEAD) {
-                    TCGTemp *its = arg_temp(arg);
-                    TCGOpcode lopc = (its->type == TCG_TYPE_I32
-                                      ? INDEX_op_ld_i32
-                                      : INDEX_op_ld_i64);
-                    TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
-
-                    lop->args[0] = dir;
-                    lop->args[1] = temp_arg(its->mem_base);
-                    lop->args[2] = its->mem_offset;
-
-                    /* Loaded, but synced with memory.  */
-                    temp_state[arg] = TS_MEM;
-                }
+            arg_ts = arg_temp(op->args[i]);
+            dir_ts = arg_ts->state_ptr;
+            if (dir_ts && arg_ts->state == TS_DEAD) {
+                TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
+                                  ? INDEX_op_ld_i32
+                                  : INDEX_op_ld_i64);
+                TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
+
+                lop->args[0] = temp_arg(dir_ts);
+                lop->args[1] = temp_arg(arg_ts->mem_base);
+                lop->args[2] = arg_ts->mem_offset;
+
+                /* Loaded, but synced with memory.  */
+                arg_ts->state = TS_MEM;
             }
         }
 
@@ -1731,15 +1735,13 @@  static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
            No action is required except keeping temp_state up to date
            so that we reload when needed.  */
         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
-            arg = op->args[i];
-            if (arg < nb_globals) {
-                dir = dir_temps[arg];
-                if (dir != 0) {
-                    op->args[i] = dir;
-                    changes = true;
-                    if (IS_DEAD_ARG(i)) {
-                        temp_state[arg] = TS_DEAD;
-                    }
+            arg_ts = arg_temp(op->args[i]);
+            dir_ts = arg_ts->state_ptr;
+            if (dir_ts) {
+                op->args[i] = temp_arg(dir_ts);
+                changes = true;
+                if (IS_DEAD_ARG(i)) {
+                    arg_ts->state = TS_DEAD;
                 }
             }
         }
@@ -1752,51 +1754,49 @@  static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
             for (i = 0; i < nb_globals; ++i) {
                 /* Liveness should see that globals are synced back,
                    that is, either TS_DEAD or TS_MEM.  */
-                tcg_debug_assert(dir_temps[i] == 0
-                                 || temp_state[i] != 0);
+                arg_ts = &s->temps[i];
+                tcg_debug_assert(arg_ts->state_ptr == 0
+                                 || arg_ts->state != 0);
             }
         } else {
             for (i = 0; i < nb_globals; ++i) {
                 /* Liveness should see that globals are saved back,
                    that is, TS_DEAD, waiting to be reloaded.  */
-                tcg_debug_assert(dir_temps[i] == 0
-                                 || temp_state[i] == TS_DEAD);
+                arg_ts = &s->temps[i];
+                tcg_debug_assert(arg_ts->state_ptr == 0
+                                 || arg_ts->state == TS_DEAD);
             }
         }
 
         /* Outputs become available.  */
         for (i = 0; i < nb_oargs; i++) {
-            arg = op->args[i];
-            if (arg >= nb_globals) {
-                continue;
-            }
-            dir = dir_temps[arg];
-            if (dir == 0) {
+            arg_ts = arg_temp(op->args[i]);
+            dir_ts = arg_ts->state_ptr;
+            if (!dir_ts) {
                 continue;
             }
-            op->args[i] = dir;
+            op->args[i] = temp_arg(dir_ts);
             changes = true;
 
             /* The output is now live and modified.  */
-            temp_state[arg] = 0;
+            arg_ts->state = 0;
 
             /* Sync outputs upon their last write.  */
             if (NEED_SYNC_ARG(i)) {
-                TCGTemp *its = arg_temp(arg);
-                TCGOpcode sopc = (its->type == TCG_TYPE_I32
+                TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
                                   ? INDEX_op_st_i32
                                   : INDEX_op_st_i64);
                 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
 
-                sop->args[0] = dir;
-                sop->args[1] = temp_arg(its->mem_base);
-                sop->args[2] = its->mem_offset;
+                sop->args[0] = temp_arg(dir_ts);
+                sop->args[1] = temp_arg(arg_ts->mem_base);
+                sop->args[2] = arg_ts->mem_offset;
 
-                temp_state[arg] = TS_MEM;
+                arg_ts->state = TS_MEM;
             }
             /* Drop outputs that are dead.  */
             if (IS_DEAD_ARG(i)) {
-                temp_state[arg] = TS_DEAD;
+                arg_ts->state = TS_DEAD;
             }
         }
     }
@@ -2569,27 +2569,23 @@  int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     s->la_time -= profile_getclock();
 #endif
 
-    {
-        uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
-
-        liveness_pass_1(s, temp_state);
+    liveness_pass_1(s);
 
-        if (s->nb_indirects > 0) {
+    if (s->nb_indirects > 0) {
 #ifdef DEBUG_DISAS
-            if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
-                         && qemu_log_in_addr_range(tb->pc))) {
-                qemu_log_lock();
-                qemu_log("OP before indirect lowering:\n");
-                tcg_dump_ops(s);
-                qemu_log("\n");
-                qemu_log_unlock();
-            }
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
+                     && qemu_log_in_addr_range(tb->pc))) {
+            qemu_log_lock();
+            qemu_log("OP before indirect lowering:\n");
+            tcg_dump_ops(s);
+            qemu_log("\n");
+            qemu_log_unlock();
+        }
 #endif
-            /* Replace indirect temps with direct temps.  */
-            if (liveness_pass_2(s, temp_state)) {
-                /* If changes were made, re-run liveness.  */
-                liveness_pass_1(s, temp_state);
-            }
+        /* Replace indirect temps with direct temps.  */
+        if (liveness_pass_2(s)) {
+            /* If changes were made, re-run liveness.  */
+            liveness_pass_1(s);
         }
     }
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 80012b5..1eeeca5 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -599,6 +599,12 @@  typedef struct TCGTemp {
     struct TCGTemp *mem_base;
     intptr_t mem_offset;
     const char *name;
+
+    /* Pass-specific information that can be stored for a temporary.
+       One word worth of integer data, and one pointer to data
+       allocated separately.  */
+    uintptr_t state;
+    void *state_ptr;
 } TCGTemp;
 
 typedef struct TCGContext TCGContext;