[v2,10/16] tcg-mips: Adjust qemu_ld/st for mips64
diff mbox

Message ID 1455507754-8978-11-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Feb. 15, 2016, 3:42 a.m. UTC
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/mips/tcg-target.c | 188 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 130 insertions(+), 58 deletions(-)

Patch
diff mbox

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 25b079b..f61d6c7 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -33,8 +33,14 @@ 
 # define MIPS_BE  0
 #endif
 
-#define LO_OFF    (MIPS_BE * 4)
-#define HI_OFF    (4 - LO_OFF)
+#if TCG_TARGET_REG_BITS == 32
+# define LO_OFF  (MIPS_BE * 4)
+# define HI_OFF  (4 - LO_OFF)
+#else
+extern int link_error(void);
+# define LO_OFF  link_error()
+# define HI_OFF  link_error()
+#endif
 
 #ifndef NDEBUG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
@@ -188,7 +194,7 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_set(ct->u.regs, 0xffffffff);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
-        if (TARGET_LONG_BITS == 64) {
+        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
         }
 #endif
@@ -198,11 +204,11 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_set(ct->u.regs, 0xffffffff);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
-        if (TARGET_LONG_BITS == 32) {
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
-        } else {
+        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
+        } else {
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
         }
 #endif
         break;
@@ -1080,6 +1086,10 @@  static void * const qemu_ld_helpers[16] = {
     [MO_BESW] = helper_be_ldsw_mmu,
     [MO_BEUL] = helper_be_ldul_mmu,
     [MO_BEQ]  = helper_be_ldq_mmu,
+#if TCG_TARGET_REG_BITS == 64
+    [MO_LESL] = helper_le_ldsl_mmu,
+    [MO_BESL] = helper_be_ldsl_mmu,
+#endif
 };
 
 static void * const qemu_st_helpers[16] = {
@@ -1107,6 +1117,9 @@  static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
     if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
         tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
     } else {
+        /* For N32 and N64, the initial offset is different.  But there
+           we also have 8 argument register so we don't run out here.  */
+        tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
         tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
     }
     return i + 1;
@@ -1148,6 +1161,7 @@  static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
 
 static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
 {
+    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
     i = (i + 1) & ~1;
     i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
     i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
@@ -1161,6 +1175,7 @@  static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
                              tcg_insn_unit *label_ptr[2], bool is_load)
 {
     TCGMemOp s_bits = get_memop(oi) & MO_SIZE;
+    target_ulong mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1);
     int mem_index = get_mmuidx(oi);
     int cmp_off
         = (is_load
@@ -1168,11 +1183,11 @@  static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
            : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
 
-    tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl,
+    tcg_out_opc_sa(s, ALIAS_TSRL, TCG_REG_A0, addrl,
                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
     tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0,
                     (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-    tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
+    tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
 
     /* Compensate for very large offsets.  */
     if (add_off >= 0x8000) {
@@ -1182,43 +1197,48 @@  static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
         QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
                                    tlb_table[NB_MMU_MODES - 1][1])
                           > 0x7ff0 + 0x7fff);
-        tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0);
+        tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, TCG_REG_A0, 0x7ff0);
         cmp_off -= 0x7ff0;
         add_off -= 0x7ff0;
     }
 
-    /* Load the (low half) tlb comparator.  */
-    tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0,
-                    cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0));
-
-    /* Mask the page bits, keeping the alignment bits to compare against.
-       In between on 32-bit targets, load the tlb addend for the fast path.  */
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1,
-                 TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    if (TARGET_LONG_BITS == 32) {
-        tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+    /* Load the (low half) tlb comparator.  Mask the page bits, keeping the
+       alignment bits to compare against.  */
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+        tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A0, cmp_off + LO_OFF);
+        tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, mask);
+    } else {
+        tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_REG_A0, cmp_off);
+        tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, mask);
+        /* No second compare is required here;
+           load the tlb addend for the fast path.  */
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_REG_A0, add_off);
     }
     tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
 
+    /* Zero extend a 32-bit guest address for a 64-bit host.  */
+    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+        tcg_out_ext32u(s, base, addrl);
+        addrl = base;
+    }
+
     label_ptr[0] = s->code_ptr;
     tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
 
     /* Load and test the high half tlb comparator.  */
-    if (TARGET_LONG_BITS == 64) {
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
         /* delay slot */
-        tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF);
+        tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF);
 
-        /* Load the tlb addend for the fast path. We can't do it earlier with
-           64-bit targets or we'll clobber a0 before reading the high half tlb
-           comparator.  */
-        tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+        /* Load the tlb addend for the fast path.  */
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_REG_A0, add_off);
 
         label_ptr[1] = s->code_ptr;
         tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
     }
 
     /* delay slot */
-    tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl);
+    tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_REG_A0, addrl);
 }
 
 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
@@ -1236,7 +1256,7 @@  static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
     label->addrhi_reg = addrhi;
     label->raddr = raddr;
     label->label_ptr[0] = label_ptr[0];
-    if (TARGET_LONG_BITS == 64) {
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
         label->label_ptr[1] = label_ptr[1];
     }
 }
@@ -1250,12 +1270,12 @@  static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 
     /* resolve label address */
     reloc_pc16(l->label_ptr[0], s->code_ptr);
-    if (TARGET_LONG_BITS == 64) {
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
         reloc_pc16(l->label_ptr[1], s->code_ptr);
     }
 
     i = 1;
-    if (TARGET_LONG_BITS == 64) {
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
         i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
     } else {
         i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
@@ -1267,7 +1287,7 @@  static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
 
     v0 = l->datalo_reg;
-    if ((opc & MO_SIZE) == MO_64) {
+    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
         /* We eliminated V0 from the possible output registers, so it
            cannot be clobbered here.  So we must move V1 first.  */
         if (MIPS_BE) {
@@ -1293,12 +1313,12 @@  static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 
     /* resolve label address */
     reloc_pc16(l->label_ptr[0], s->code_ptr);
-    if (TARGET_LONG_BITS == 64) {
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
         reloc_pc16(l->label_ptr[1], s->code_ptr);
     }
 
     i = 1;
-    if (TARGET_LONG_BITS == 64) {
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
         i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
     } else {
         i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
@@ -1310,14 +1330,15 @@  static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     case MO_16:
         i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg);
         break;
-    case MO_32:
-        i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
-        break;
     case MO_64:
-        i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
-        break;
+        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+            i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
+            break;
+        }
+        /* FALLTHRU */
     default:
-        tcg_abort();
+        i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
+        break;
     }
     i = tcg_out_call_iarg_imm(s, i, oi);
 
@@ -1332,7 +1353,7 @@  static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 #endif
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
-                                   TCGReg base, TCGMemOp opc)
+                                   TCGReg base, TCGMemOp opc, bool is_64)
 {
     switch (opc & (MO_SSIZE | MO_BSWAP)) {
     case MO_UB:
@@ -1341,6 +1362,7 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
     case MO_SB:
         tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
         break;
+
     case MO_UW | MO_BSWAP:
         tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
         tcg_out_bswap16(s, lo, TCG_TMP1);
@@ -1348,6 +1370,7 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
     case MO_UW:
         tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
         break;
+
     case MO_SW | MO_BSWAP:
         tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
         tcg_out_bswap16s(s, lo, TCG_TMP1);
@@ -1355,7 +1378,22 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
     case MO_SW:
         tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
         break;
+
     case MO_UL | MO_BSWAP:
+        if (TCG_TARGET_REG_BITS == 64 && is_64) {
+            if (use_mips32r2_instructions) {
+                tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+                tcg_out_bswap32u(s, lo, lo);
+            } else {
+                tcg_out_bswap_subr(s, bswap32u_addr);
+                /* delay slot */
+                tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0);
+                tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
+            }
+            break;
+        }
+        /* FALLTHRU */
+    case MO_SL | MO_BSWAP:
         if (use_mips32r2_instructions) {
             tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
             tcg_out_bswap32(s, lo, lo);
@@ -1366,11 +1404,29 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
             tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3);
         }
         break;
+
     case MO_UL:
+        if (TCG_TARGET_REG_BITS == 64 && is_64) {
+            tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+            break;
+        }
+        /* FALLTHRU */
+    case MO_SL:
         tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
         break;
+
     case MO_Q | MO_BSWAP:
-        if (use_mips32r2_instructions) {
+        if (TCG_TARGET_REG_BITS == 64) {
+            if (use_mips32r2_instructions) {
+                tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+                tcg_out_bswap64(s, lo, lo);
+            } else {
+                tcg_out_bswap_subr(s, bswap64_addr);
+                /* delay slot */
+                tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0);
+                tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
+            }
+        } else if (use_mips32r2_instructions) {
             tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
             tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
             tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
@@ -1390,7 +1446,9 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
         break;
     case MO_Q:
         /* Prefer to load from offset 0 first, but allow for overlap.  */
-        if (MIPS_BE ? hi != base : lo == base) {
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+        } else if (MIPS_BE ? hi != base : lo == base) {
             tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
             tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
         } else {
@@ -1415,27 +1473,31 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
     TCGReg base = TCG_REG_A0;
 
     data_regl = *args++;
-    data_regh = (is_64 ? *args++ : 0);
+    data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
     addr_regl = *args++;
-    addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+    addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
     oi = *args++;
     opc = get_memop(oi);
 
 #if defined(CONFIG_SOFTMMU)
     tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1);
-    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc);
+    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
     add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh,
                         s->code_ptr, label_ptr);
 #else
+    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+        tcg_out_ext32u(s, base, addr_regl);
+        addr_regl = base;
+    }
     if (guest_base == 0 && data_regl != addr_regl) {
         base = addr_regl;
     } else if (guest_base == (int16_t)guest_base) {
-        tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base);
+        tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
     } else {
         tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
-        tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
+        tcg_out_opc_reg(s, ALIAS_PADD, base, base, addr_regl);
     }
-    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc);
+    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
 #endif
 }
 
@@ -1470,7 +1532,10 @@  static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
         break;
 
     case MO_64 | MO_BSWAP:
-        if (use_mips32r2_instructions) {
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_bswap64(s, TCG_TMP3, lo);
+            tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0);
+        } else if (use_mips32r2_instructions) {
             tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
             tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
             tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
@@ -1485,8 +1550,12 @@  static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
         }
         break;
     case MO_64:
-        tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? hi : lo, base, 0);
-        tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
+        } else {
+            tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? hi : lo, base, 0);
+            tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
+        }
         break;
 
     default:
@@ -1506,9 +1575,9 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
     TCGReg base = TCG_REG_A0;
 
     data_regl = *args++;
-    data_regh = (is_64 ? *args++ : 0);
+    data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
     addr_regl = *args++;
-    addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+    addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
     oi = *args++;
     opc = get_memop(oi);
 
@@ -1518,15 +1587,18 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
     add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh,
                         s->code_ptr, label_ptr);
 #else
+    base = TCG_REG_A0;
+    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+        tcg_out_ext32u(s, base, addr_regl);
+        addr_regl = base;
+    }
     if (guest_base == 0) {
         base = addr_regl;
+    } else if (guest_base == (int16_t)guest_base) {
+        tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
     } else {
-        if (guest_base == (int16_t)guest_base) {
-            tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base);
-        } else {
-            tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
-            tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
-        }
+        tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
+        tcg_out_opc_reg(s, ALIAS_PADD, base, base, addr_regl);
     }
     tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
 #endif