diff mbox series

[v2,23/30] tcg/loongarch64: Add softmmu load/store helpers, implement qemu_ld/qemu_st ops

Message ID 20210921201915.601245-24-git@xen0n.name (mailing list archive)
State New, archived
Headers show
Series LoongArch64 port of QEMU TCG | expand

Commit Message

WANG Xuerui Sept. 21, 2021, 8:19 p.m. UTC
Signed-off-by: WANG Xuerui <git@xen0n.name>
---
 tcg/loongarch64/tcg-target-con-set.h |   2 +
 tcg/loongarch64/tcg-target.c.inc     | 332 +++++++++++++++++++++++++++
 2 files changed, 334 insertions(+)

Comments

Richard Henderson Sept. 22, 2021, 4:29 p.m. UTC | #1
On 9/21/21 1:19 PM, WANG Xuerui wrote:
> +    /* Compare masked address with the TLB entry.  */
> +    label_ptr[0] = s->code_ptr;
> +    tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
> +
> +    /* TLB Hit - translate address using addend.  */
> +    tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP2, addrl);

You removed a little too much here.  You still need

     if (TARGET_LONG_BITS == 32) {
         tcg_out_ext32u(s, TCG_REG_TMP0, addrl);
         addrl = TCG_REG_TMP0;
     }
     tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP2, addrl);

> +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
> +                                TCGReg datalo, TCGReg addrlo,
> +                                void *raddr, tcg_insn_unit **label_ptr)
> +{
> +    TCGLabelQemuLdst *label = new_ldst_label(s);
> +
> +    label->is_ld = is_ld;
> +    label->oi = oi;
> +    label->type = 0;

Type should be set based on "is_64" argument to tcg_out_qemu_ld (or indeed, is_64 could be 
replaced by "type", which would probably make more sense).

This will be used to fix...

> +    if (opc & MO_SIGN) {
> +        /* Sign-extend directly into destination.  */
> +        switch (size) {
> +        case MO_8:
> +            tcg_out_ext8s(s, l->datalo_reg, TCG_REG_A0);
> +            break;
> +        case MO_16:
> +            tcg_out_ext16s(s, l->datalo_reg, TCG_REG_A0);
> +            break;
> +        case MO_32:
> +            tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0);
> +            break;
> +        default:
> +            g_assert_not_reached();
> +            break;
> +        }
> +    } else {
> +        tcg_out_mov(s, size == MO_64, l->datalo_reg, TCG_REG_A0);
> +    }

... this, where TCG_TYPE_I32 loads should always be sign-extended from 32-bits.  Something 
like

     switch (opc & MO_SSIZE) {
     case MO_SB:
         ext8s;
         break;
     case MO_SH:
         ext16s;
         break;
     case MO_SL:
         ext32s;
         break;
     case MO_UL:
         if (type == TCG_TYPE_I32) {
             ext32s;
             break;
         }
         /* fall through */
     default:
         tcg_out_mov(s, TCG_TYPE_REG, datalo, A0);
         break;
     }

> +    case MO_64:
> +        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A2, l->datalo_reg);

TCG_TYPE_I64, to match MO_64.

> +    if (USE_GUEST_BASE) {
> +        tcg_out_opc_add_d(s, base, TCG_GUEST_BASE_REG, addr_regl);
> +    } else {
> +        tcg_out_opc_add_d(s, base, addr_regl, TCG_REG_ZERO);
> +    }

Still adding zero in tcg_out_qemu_st.


r~
WANG Xuerui Sept. 22, 2021, 5:32 p.m. UTC | #2
Hi Richard,

On 9/23/21 00:29, Richard Henderson wrote:
> On 9/21/21 1:19 PM, WANG Xuerui wrote:
>> +    /* Compare masked address with the TLB entry.  */
>> +    label_ptr[0] = s->code_ptr;
>> +    tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
>> +
>> +    /* TLB Hit - translate address using addend.  */
>> +    tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
>
> You removed a little too much here.  You still need
>
>     if (TARGET_LONG_BITS == 32) {
>         tcg_out_ext32u(s, TCG_REG_TMP0, addrl);
>         addrl = TCG_REG_TMP0;
>     }
>     tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
Ah, of course that was necessary because TARGET_LONG_BITS depends on the 
*target*... silly me. Thanks for catching this, I was just about to 
debug the "mysterious" segfaults on all the 32-bit targets :facepalm:
>
>> +static void add_qemu_ldst_label(TCGContext *s, int is_ld, 
>> TCGMemOpIdx oi,
>> +                                TCGReg datalo, TCGReg addrlo,
>> +                                void *raddr, tcg_insn_unit **label_ptr)
>> +{
>> +    TCGLabelQemuLdst *label = new_ldst_label(s);
>> +
>> +    label->is_ld = is_ld;
>> +    label->oi = oi;
>> +    label->type = 0;
>
> Type should be set based on "is_64" argument to tcg_out_qemu_ld (or 
> indeed, is_64 could be replaced by "type", which would probably make 
> more sense).
>
> This will be used to fix...
>
>> +    if (opc & MO_SIGN) {
>> +        /* Sign-extend directly into destination.  */
>> +        switch (size) {
>> +        case MO_8:
>> +            tcg_out_ext8s(s, l->datalo_reg, TCG_REG_A0);
>> +            break;
>> +        case MO_16:
>> +            tcg_out_ext16s(s, l->datalo_reg, TCG_REG_A0);
>> +            break;
>> +        case MO_32:
>> +            tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0);
>> +            break;
>> +        default:
>> +            g_assert_not_reached();
>> +            break;
>> +        }
>> +    } else {
>> +        tcg_out_mov(s, size == MO_64, l->datalo_reg, TCG_REG_A0);
>> +    }
>
> ... this, where TCG_TYPE_I32 loads should always be sign-extended from 
> 32-bits.  Something like
>
>     switch (opc & MO_SSIZE) {
>     case MO_SB:
>         ext8s;
>         break;
>     case MO_SH:
>         ext16s;
>         break;
>     case MO_SL:
>         ext32s;
>         break;
>     case MO_UL:
>         if (type == TCG_TYPE_I32) {
>             ext32s;
>             break;
>         }
>         /* fall through */
>     default:
>         tcg_out_mov(s, TCG_TYPE_REG, datalo, A0);
>         break;
>     }
Ack; I'll modify tcg_out_qemu_ld and add_qemu_ldst_label to take a TCGType.
>
>> +    case MO_64:
>> +        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A2, l->datalo_reg);
>
> TCG_TYPE_I64, to match MO_64.
Ack; will fix.
>
>> +    if (USE_GUEST_BASE) {
>> +        tcg_out_opc_add_d(s, base, TCG_GUEST_BASE_REG, addr_regl);
>> +    } else {
>> +        tcg_out_opc_add_d(s, base, addr_regl, TCG_REG_ZERO);
>> +    }
>
> Still adding zero in tcg_out_qemu_st.
One probably should not even attempt to fix code at 4 am, esp. while 
going through tens of review comments...
>
>
> r~
diff mbox series

Patch

diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
index 3ab0416d9f..8fd3a2f4a1 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -17,7 +17,9 @@ 
 C_O0_I1(r)
 C_O0_I2(rZ, r)
 C_O0_I2(rZ, rZ)
+C_O0_I2(LZ, L)
 C_O1_I1(r, r)
+C_O1_I1(r, L)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 18b2473d9c..bbb6b7f47d 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -117,6 +117,11 @@  static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_A1,
 };
 
+#ifndef CONFIG_SOFTMMU
+#define USE_GUEST_BASE     (guest_base != 0)
+#define TCG_GUEST_BASE_REG TCG_REG_S1
+#endif
+
 #define TCG_CT_CONST_ZERO  0x100
 #define TCG_CT_CONST_S12   0x200
 #define TCG_CT_CONST_N12   0x400
@@ -591,6 +596,312 @@  static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
     return false;
 }
 
+/*
+ * Load/store helpers for SoftMMU, and qemu_ld/st implementations
+ */
+
+#if defined(CONFIG_SOFTMMU)
+#include "../tcg-ldst.c.inc"
+
+/*
+ * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ *                                     TCGMemOpIdx oi, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[4] = {
+    [MO_8]  = helper_ret_ldub_mmu,
+    [MO_16] = helper_le_lduw_mmu,
+    [MO_32] = helper_le_ldul_mmu,
+    [MO_64] = helper_le_ldq_mmu,
+};
+
+/*
+ * helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ *                                     uintxx_t val, TCGMemOpIdx oi,
+ *                                     uintptr_t ra)
+ */
+static void * const qemu_st_helpers[4] = {
+    [MO_8]  = helper_ret_stb_mmu,
+    [MO_16] = helper_le_stw_mmu,
+    [MO_32] = helper_le_stl_mmu,
+    [MO_64] = helper_le_stq_mmu,
+};
+
+/* We expect to use a 12-bit negative offset from ENV.  */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
+
+static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
+{
+    tcg_out_opc_b(s, 0);
+    return reloc_br_sd10k16(s->code_ptr - 1, target);
+}
+
+static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, TCGMemOpIdx oi,
+                             tcg_insn_unit **label_ptr, bool is_load)
+{
+    MemOp opc = get_memop(oi);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
+    tcg_target_long compare_mask;
+    int mem_index = get_mmuidx(oi);
+    int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
+    int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
+    int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
+
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
+
+    tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl,
+                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+    tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
+    tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
+
+    /* Load the tlb comparator and the addend.  */
+    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
+               is_load ? offsetof(CPUTLBEntry, addr_read)
+               : offsetof(CPUTLBEntry, addr_write));
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
+               offsetof(CPUTLBEntry, addend));
+
+    /* We don't support unaligned accesses.  */
+    if (a_bits < s_bits) {
+        a_bits = s_bits;
+    }
+    /* Clear the non-page, non-alignment bits from the address.  */
+    compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+    tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
+    tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
+
+    /* Compare masked address with the TLB entry.  */
+    label_ptr[0] = s->code_ptr;
+    tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
+
+    /* TLB Hit - translate address using addend.  */
+    tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
+}
+
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
+                                TCGReg datalo, TCGReg addrlo,
+                                void *raddr, tcg_insn_unit **label_ptr)
+{
+    TCGLabelQemuLdst *label = new_ldst_label(s);
+
+    label->is_ld = is_ld;
+    label->oi = oi;
+    label->type = 0;
+    label->datalo_reg = datalo;
+    label->datahi_reg = 0;
+    label->addrlo_reg = addrlo;
+    label->addrhi_reg = 0;
+    label->raddr = tcg_splitwx_to_rx(raddr);
+    label->label_ptr[0] = label_ptr[0];
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOpIdx oi = l->oi;
+    MemOp opc = get_memop(oi);
+    MemOp size = opc & MO_SIZE;
+
+    /* resolve label address */
+    if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+        return false;
+    }
+
+    /* call load helper */
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
+
+    tcg_out_call(s, qemu_ld_helpers[size]);
+    if (opc & MO_SIGN) {
+        /* Sign-extend directly into destination.  */
+        switch (size) {
+        case MO_8:
+            tcg_out_ext8s(s, l->datalo_reg, TCG_REG_A0);
+            break;
+        case MO_16:
+            tcg_out_ext16s(s, l->datalo_reg, TCG_REG_A0);
+            break;
+        case MO_32:
+            tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0);
+            break;
+        default:
+            g_assert_not_reached();
+            break;
+        }
+    } else {
+        tcg_out_mov(s, size == MO_64, l->datalo_reg, TCG_REG_A0);
+    }
+
+    return tcg_out_goto(s, l->raddr);
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOpIdx oi = l->oi;
+    MemOp opc = get_memop(oi);
+    MemOp size = opc & MO_SIZE;
+
+    /* resolve label address */
+    if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+        return false;
+    }
+
+    /* call store helper */
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
+    switch (size) {
+    case MO_8:
+        tcg_out_ext8u(s, TCG_REG_A2, l->datalo_reg);
+        break;
+    case MO_16:
+        tcg_out_ext16u(s, TCG_REG_A2, l->datalo_reg);
+        break;
+    case MO_32:
+        tcg_out_ext32u(s, TCG_REG_A2, l->datalo_reg);
+        break;
+    case MO_64:
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A2, l->datalo_reg);
+        break;
+    default:
+        g_assert_not_reached();
+        break;
+    }
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
+
+    tcg_out_call(s, qemu_st_helpers[size]);
+
+    return tcg_out_goto(s, l->raddr);
+}
+#endif /* CONFIG_SOFTMMU */
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg base,
+                                   MemOp opc, bool is_64)
+{
+    /* Byte swapping is left to middle-end expansion.  */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+    switch (opc & MO_SSIZE) {
+    case MO_UB:
+        tcg_out_opc_ld_bu(s, lo, base, 0);
+        break;
+    case MO_SB:
+        tcg_out_opc_ld_b(s, lo, base, 0);
+        break;
+    case MO_UW:
+        tcg_out_opc_ld_hu(s, lo, base, 0);
+        break;
+    case MO_SW:
+        tcg_out_opc_ld_h(s, lo, base, 0);
+        break;
+    case MO_UL:
+        if (is_64) {
+            tcg_out_opc_ld_wu(s, lo, base, 0);
+            break;
+        }
+        /* fallthrough */
+    case MO_SL:
+        tcg_out_opc_ld_w(s, lo, base, 0);
+        break;
+    case MO_Q:
+        tcg_out_opc_ld_d(s, lo, base, 0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+{
+    TCGReg addr_regl;
+    TCGReg data_regl;
+    TCGMemOpIdx oi;
+    MemOp opc;
+#if defined(CONFIG_SOFTMMU)
+    tcg_insn_unit *label_ptr[1];
+#endif
+    TCGReg base = TCG_REG_TMP0;
+
+    data_regl = *args++;
+    addr_regl = *args++;
+    oi = *args++;
+    opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+    tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 1);
+    tcg_out_qemu_ld_direct(s, data_regl, base, opc, is_64);
+    add_qemu_ldst_label(s, 1, oi,
+                        data_regl, addr_regl,
+                        s->code_ptr, label_ptr);
+#else
+    if (USE_GUEST_BASE) {
+        tcg_out_opc_add_d(s, base, TCG_GUEST_BASE_REG, addr_regl);
+    } else {
+        base = addr_regl;
+    }
+    tcg_out_qemu_ld_direct(s, data_regl, base, opc, is_64);
+#endif
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo,
+                                   TCGReg base, MemOp opc)
+{
+    /* Byte swapping is left to middle-end expansion.  */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+    switch (opc & MO_SIZE) {
+    case MO_8:
+        tcg_out_opc_st_b(s, lo, base, 0);
+        break;
+    case MO_16:
+        tcg_out_opc_st_h(s, lo, base, 0);
+        break;
+    case MO_32:
+        tcg_out_opc_st_w(s, lo, base, 0);
+        break;
+    case MO_64:
+        tcg_out_opc_st_d(s, lo, base, 0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
+{
+    TCGReg addr_regl;
+    TCGReg data_regl;
+    TCGMemOpIdx oi;
+    MemOp opc;
+#if defined(CONFIG_SOFTMMU)
+    tcg_insn_unit *label_ptr[1];
+#endif
+    TCGReg base = TCG_REG_TMP0;
+
+    data_regl = *args++;
+    addr_regl = *args++;
+    oi = *args++;
+    opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+    tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 0);
+    tcg_out_qemu_st_direct(s, data_regl, base, opc);
+    add_qemu_ldst_label(s, 0, oi,
+                        data_regl, addr_regl,
+                        s->code_ptr, label_ptr);
+#else
+    if (USE_GUEST_BASE) {
+        tcg_out_opc_add_d(s, base, TCG_GUEST_BASE_REG, addr_regl);
+    } else {
+        tcg_out_opc_add_d(s, base, addr_regl, TCG_REG_ZERO);
+    }
+    tcg_out_qemu_st_direct(s, data_regl, base, opc);
+#endif
+}
+
 /*
  * Entry-points
  */
@@ -983,6 +1294,19 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
         break;
 
+    case INDEX_op_qemu_ld_i32:
+        tcg_out_qemu_ld(s, args, false);
+        break;
+    case INDEX_op_qemu_ld_i64:
+        tcg_out_qemu_ld(s, args, true);
+        break;
+    case INDEX_op_qemu_st_i32:
+        tcg_out_qemu_st(s, args);
+        break;
+    case INDEX_op_qemu_st_i64:
+        tcg_out_qemu_st(s, args);
+        break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
@@ -1010,6 +1334,10 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_brcond_i64:
         return C_O0_I2(rZ, rZ);
 
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        return C_O0_I2(LZ, L);
+
     case INDEX_op_ext8s_i32:
     case INDEX_op_ext8s_i64:
     case INDEX_op_ext8u_i32:
@@ -1045,6 +1373,10 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_ld_i64:
         return C_O1_I1(r, r);
 
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_i64:
+        return C_O1_I1(r, L);
+
     case INDEX_op_andc_i32:
     case INDEX_op_andc_i64:
     case INDEX_op_eqv_i32: