diff mbox series

[RFC,v1,16/23] riscv: tcg-target: Add slowpath load and store instructions

Message ID bd8f5c781f7086c5b26e1bd4c8e6570b5534b22d.1542321076.git.alistair.francis@wdc.com (mailing list archive)
State New, archived
Headers show
Series Add RISC-V TCG backend support | expand

Commit Message

Alistair Francis Nov. 15, 2018, 10:36 p.m. UTC
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Michael Clark <mjc@sifive.com>
---
 tcg/riscv/tcg-target.inc.c | 240 +++++++++++++++++++++++++++++++++++++
 1 file changed, 240 insertions(+)

Comments

Richard Henderson Nov. 16, 2018, 9:24 a.m. UTC | #1
On 11/15/18 11:36 PM, Alistair Francis wrote:
> +static void tcg_out_mb(TCGContext *s, TCGArg a0)
> +{
> +    static const RISCVInsn fence[] = {
> +        [0 ... TCG_MO_ALL] = OPC_FENCE_RW_RW,
> +        [TCG_MO_LD_LD]     = OPC_FENCE_R_R,
> +        [TCG_MO_ST_LD]     = OPC_FENCE_W_R,
> +        [TCG_MO_LD_ST]     = OPC_FENCE_R_W,
> +        [TCG_MO_ST_ST]     = OPC_FENCE_W_W,
> +        [TCG_BAR_LDAQ]     = OPC_FENCE_R_RW,
> +        [TCG_BAR_STRL]     = OPC_FENCE_RW_W,
> +        [TCG_BAR_SC]       = OPC_FENCE_RW_RW,
> +    };
> +    tcg_out32(s, fence[a0 & TCG_MO_ALL]);
> +}
> +

TCG_MO_* and TCG_BAR_* are two different bitmasks, or'ed together.
Which you've separated by "& TCG_MO_ALL".  Thus the TCG_BAR_* constants should
not appear in this table.


> +static void * const qemu_ld_helpers[16] = {
> +    [MO_UB]   = helper_ret_ldub_mmu,
> +    [MO_SB]   = helper_ret_ldsb_mmu,
> +    [MO_LEUW] = helper_le_lduw_mmu,
> +    [MO_LESW] = helper_le_ldsw_mmu,
> +    [MO_LEUL] = helper_le_ldul_mmu,
> +    [MO_LESL] = helper_le_ldsl_mmu,
> +    [MO_LEQ]  = helper_le_ldq_mmu,
> +    [MO_BEUW] = helper_be_lduw_mmu,
> +    [MO_BESW] = helper_be_ldsw_mmu,
> +    [MO_BEUL] = helper_be_ldul_mmu,
> +    [MO_BESL] = helper_be_ldsl_mmu,
> +    [MO_BEQ]  = helper_be_ldq_mmu,
> +};

The LESL and BESL functions will not be present for rv32 -> link error.  Here
you do need an ifdef.

> +        } else {
> +            adj = cmp_off - sextract32(cmp_off, 0, 12);
> +            tcg_debug_assert(add_off - adj >= -0x1000
> +                             && add_off - adj < 0x1000);
> +
> +            tcg_out_opc_upper(s, OPC_LUI, base, adj);
> +            tcg_out_opc_reg(s, OPC_ADD, base, TCG_REG_ZERO, TCG_AREG0);

base, base, TCG_AREG0.

> +    /* Compare masked address with the TLB entry. */
> +    label_ptr[0] = s->code_ptr;
> +    tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);

Another case of a potential out-of-range branch.

It might be worthwhile to move all of this out-of-line from the start, where
that branch will always be short.  See

http://lists.nongnu.org/archive/html/qemu-devel/2018-11/msg02234.html


r~
Alistair Francis Nov. 21, 2018, 12:18 a.m. UTC | #2
On Fri, Nov 16, 2018 at 1:24 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 11/15/18 11:36 PM, Alistair Francis wrote:
> > +static void tcg_out_mb(TCGContext *s, TCGArg a0)
> > +{
> > +    static const RISCVInsn fence[] = {
> > +        [0 ... TCG_MO_ALL] = OPC_FENCE_RW_RW,
> > +        [TCG_MO_LD_LD]     = OPC_FENCE_R_R,
> > +        [TCG_MO_ST_LD]     = OPC_FENCE_W_R,
> > +        [TCG_MO_LD_ST]     = OPC_FENCE_R_W,
> > +        [TCG_MO_ST_ST]     = OPC_FENCE_W_W,
> > +        [TCG_BAR_LDAQ]     = OPC_FENCE_R_RW,
> > +        [TCG_BAR_STRL]     = OPC_FENCE_RW_W,
> > +        [TCG_BAR_SC]       = OPC_FENCE_RW_RW,
> > +    };
> > +    tcg_out32(s, fence[a0 & TCG_MO_ALL]);
> > +}
> > +
>
> TCG_MO_* and TCG_BAR_* are two different bitmasks, or'ed together.
> Which you've separated by "& TCG_MO_ALL".  Thus the TCG_BAR_* constants should
> not appear in this table.
>
>
> > +static void * const qemu_ld_helpers[16] = {
> > +    [MO_UB]   = helper_ret_ldub_mmu,
> > +    [MO_SB]   = helper_ret_ldsb_mmu,
> > +    [MO_LEUW] = helper_le_lduw_mmu,
> > +    [MO_LESW] = helper_le_ldsw_mmu,
> > +    [MO_LEUL] = helper_le_ldul_mmu,
> > +    [MO_LESL] = helper_le_ldsl_mmu,
> > +    [MO_LEQ]  = helper_le_ldq_mmu,
> > +    [MO_BEUW] = helper_be_lduw_mmu,
> > +    [MO_BESW] = helper_be_ldsw_mmu,
> > +    [MO_BEUL] = helper_be_ldul_mmu,
> > +    [MO_BESL] = helper_be_ldsl_mmu,
> > +    [MO_BEQ]  = helper_be_ldq_mmu,
> > +};
>
> The LESL and BESL functions will not be present for rv32 -> link error.  Here
> you do need an ifdef.
>
> > +        } else {
> > +            adj = cmp_off - sextract32(cmp_off, 0, 12);
> > +            tcg_debug_assert(add_off - adj >= -0x1000
> > +                             && add_off - adj < 0x1000);
> > +
> > +            tcg_out_opc_upper(s, OPC_LUI, base, adj);
> > +            tcg_out_opc_reg(s, OPC_ADD, base, TCG_REG_ZERO, TCG_AREG0);
>
> base, base, TCG_AREG0.
>
> > +    /* Compare masked address with the TLB entry. */
> > +    label_ptr[0] = s->code_ptr;
> > +    tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
>
> Another case of a potential out-of-range branch.
>
> It might be worthwhile to move all of this out-of-line from the start, where
> that branch will always be short.  See
>
> http://lists.nongnu.org/archive/html/qemu-devel/2018-11/msg02234.html

That does look cool, but it's not in the tree yet.

Otherwise can we directly just call tcg_out_brcond()?

PS: Thanks for your review. I have gone through most of your comments.
I now don't see any segfaults when running. My guest still doesn't
boot, but it's getting further then it used to :)

Alistair

>
>
> r~
Richard Henderson Nov. 21, 2018, 7:43 a.m. UTC | #3
On 11/21/18 1:18 AM, Alistair Francis wrote:
> On Fri, Nov 16, 2018 at 1:24 AM Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> On 11/15/18 11:36 PM, Alistair Francis wrote:
>>> +static void tcg_out_mb(TCGContext *s, TCGArg a0)
>>> +{
>>> +    static const RISCVInsn fence[] = {
>>> +        [0 ... TCG_MO_ALL] = OPC_FENCE_RW_RW,
>>> +        [TCG_MO_LD_LD]     = OPC_FENCE_R_R,
>>> +        [TCG_MO_ST_LD]     = OPC_FENCE_W_R,
>>> +        [TCG_MO_LD_ST]     = OPC_FENCE_R_W,
>>> +        [TCG_MO_ST_ST]     = OPC_FENCE_W_W,
>>> +        [TCG_BAR_LDAQ]     = OPC_FENCE_R_RW,
>>> +        [TCG_BAR_STRL]     = OPC_FENCE_RW_W,
>>> +        [TCG_BAR_SC]       = OPC_FENCE_RW_RW,
>>> +    };
>>> +    tcg_out32(s, fence[a0 & TCG_MO_ALL]);
>>> +}
>>> +
>>
>> TCG_MO_* and TCG_BAR_* are two different bitmasks, or'ed together.
>> Which you've separated by "& TCG_MO_ALL".  Thus the TCG_BAR_* constants should
>> not appear in this table.
>>
>>
>>> +static void * const qemu_ld_helpers[16] = {
>>> +    [MO_UB]   = helper_ret_ldub_mmu,
>>> +    [MO_SB]   = helper_ret_ldsb_mmu,
>>> +    [MO_LEUW] = helper_le_lduw_mmu,
>>> +    [MO_LESW] = helper_le_ldsw_mmu,
>>> +    [MO_LEUL] = helper_le_ldul_mmu,
>>> +    [MO_LESL] = helper_le_ldsl_mmu,
>>> +    [MO_LEQ]  = helper_le_ldq_mmu,
>>> +    [MO_BEUW] = helper_be_lduw_mmu,
>>> +    [MO_BESW] = helper_be_ldsw_mmu,
>>> +    [MO_BEUL] = helper_be_ldul_mmu,
>>> +    [MO_BESL] = helper_be_ldsl_mmu,
>>> +    [MO_BEQ]  = helper_be_ldq_mmu,
>>> +};
>>
>> The LESL and BESL functions will not be present for rv32 -> link error.  Here
>> you do need an ifdef.
>>
>>> +        } else {
>>> +            adj = cmp_off - sextract32(cmp_off, 0, 12);
>>> +            tcg_debug_assert(add_off - adj >= -0x1000
>>> +                             && add_off - adj < 0x1000);
>>> +
>>> +            tcg_out_opc_upper(s, OPC_LUI, base, adj);
>>> +            tcg_out_opc_reg(s, OPC_ADD, base, TCG_REG_ZERO, TCG_AREG0);
>>
>> base, base, TCG_AREG0.
>>
>>> +    /* Compare masked address with the TLB entry. */
>>> +    label_ptr[0] = s->code_ptr;
>>> +    tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
>>
>> Another case of a potential out-of-range branch.
>>
>> It might be worthwhile to move all of this out-of-line from the start, where
>> that branch will always be short.  See
>>
>> http://lists.nongnu.org/archive/html/qemu-devel/2018-11/msg02234.html
> 
> That does look cool, but it's not in the tree yet.

No, but it'll probably be in tree before this code is.  ;-)
I'll put it into my tcg-next-for-4.0 branch, and you can base off that.

> Otherwise can we directly just call tcg_out_brcond()?

Not quite, because there's no label structure.  But you can break out
subroutines and use those.

> PS: Thanks for your review. I have gone through most of your comments.
> I now don't see any segfaults when running. My guest still doesn't
> boot, but it's getting further then it used to :)

Excellent.


r~
diff mbox series

Patch

diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c
index b449e17295..5fe6935e24 100644
--- a/tcg/riscv/tcg-target.inc.c
+++ b/tcg/riscv/tcg-target.inc.c
@@ -718,6 +718,246 @@  static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
     tcg_out_call_int(s, arg, false);
 }
 
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    static const RISCVInsn fence[] = {
+        [0 ... TCG_MO_ALL] = OPC_FENCE_RW_RW,
+        [TCG_MO_LD_LD]     = OPC_FENCE_R_R,
+        [TCG_MO_ST_LD]     = OPC_FENCE_W_R,
+        [TCG_MO_LD_ST]     = OPC_FENCE_R_W,
+        [TCG_MO_ST_ST]     = OPC_FENCE_W_W,
+        [TCG_BAR_LDAQ]     = OPC_FENCE_R_RW,
+        [TCG_BAR_STRL]     = OPC_FENCE_RW_W,
+        [TCG_BAR_SC]       = OPC_FENCE_RW_RW,
+    };
+    tcg_out32(s, fence[a0 & TCG_MO_ALL]);
+}
+
+/*
+ * Load/store and TLB
+ */
+
+#if defined(CONFIG_SOFTMMU)
+#include "tcg-ldst.inc.c"
+
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ *                                     TCGMemOpIdx oi, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[16] = {
+    [MO_UB]   = helper_ret_ldub_mmu,
+    [MO_SB]   = helper_ret_ldsb_mmu,
+    [MO_LEUW] = helper_le_lduw_mmu,
+    [MO_LESW] = helper_le_ldsw_mmu,
+    [MO_LEUL] = helper_le_ldul_mmu,
+    [MO_LESL] = helper_le_ldsl_mmu,
+    [MO_LEQ]  = helper_le_ldq_mmu,
+    [MO_BEUW] = helper_be_lduw_mmu,
+    [MO_BESW] = helper_be_ldsw_mmu,
+    [MO_BEUL] = helper_be_ldul_mmu,
+    [MO_BESL] = helper_be_ldsl_mmu,
+    [MO_BEQ]  = helper_be_ldq_mmu,
+};
+
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ *                                     uintxx_t val, TCGMemOpIdx oi,
+ *                                     uintptr_t ra)
+ */
+static void * const qemu_st_helpers[16] = {
+    [MO_UB]   = helper_ret_stb_mmu,
+    [MO_LEUW] = helper_le_stw_mmu,
+    [MO_LEUL] = helper_le_stl_mmu,
+    [MO_LEQ]  = helper_le_stq_mmu,
+    [MO_BEUW] = helper_be_stw_mmu,
+    [MO_BEUL] = helper_be_stl_mmu,
+    [MO_BEQ]  = helper_be_stq_mmu,
+};
+
+static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
+                             TCGReg addrh, TCGMemOpIdx oi,
+                             tcg_insn_unit **label_ptr, bool is_load)
+{
+    TCGMemOp opc = get_memop(oi);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
+    target_ulong mask;
+    int mem_index = get_mmuidx(oi);
+    int cmp_off
+        = (is_load
+           ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+           : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+    int addend_offset = (offsetof(CPUTLBEntry, addend)) -
+                            (is_load ? offsetof(CPUTLBEntry, addr_read)
+                                     : offsetof(CPUTLBEntry, addr_write));
+    RISCVInsn load_cmp_op = (TARGET_LONG_BITS == 64 ? OPC_LD :
+                             TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW);
+    RISCVInsn load_add_op = TCG_TARGET_REG_BITS == 64 ? OPC_LD : OPC_LW;
+    TCGReg base = TCG_AREG0;
+    TCGReg cmpr;
+
+    /* We don't support oversize guests */
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+        g_assert_not_reached();
+    }
+
+    /* We don't support unaligned accesses. */
+    if (a_bits < s_bits) {
+        a_bits = s_bits;
+    }
+    mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+
+
+    /* Compensate for very large offsets.  */
+    if (add_off >= 0x1000) {
+        int adj;
+        base = TCG_REG_TMP2;
+        if (cmp_off <= 2 * 0xfff) {
+            adj = 0xfff;
+            tcg_out_opc_imm(s, OPC_ADDI, base, TCG_AREG0, adj);
+        } else {
+            adj = cmp_off - sextract32(cmp_off, 0, 12);
+            tcg_debug_assert(add_off - adj >= -0x1000
+                             && add_off - adj < 0x1000);
+
+            tcg_out_opc_upper(s, OPC_LUI, base, adj);
+            tcg_out_opc_reg(s, OPC_ADD, base, TCG_REG_ZERO, TCG_AREG0);
+        }
+        add_off -= adj;
+        cmp_off -= adj;
+    }
+
+    /* Extract the page index.  */
+    if (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS < 12) {
+        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, addrl,
+                        TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+        tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP0, TCG_REG_TMP0,
+                        MAKE_64BIT_MASK(CPU_TLB_ENTRY_BITS, CPU_TLB_BITS));
+    } else {
+        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, addrl, TARGET_PAGE_BITS);
+        tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP0, TCG_REG_TMP0,
+                        MAKE_64BIT_MASK(0, CPU_TLB_BITS));
+        tcg_out_opc_imm(s, OPC_SLLI, TCG_REG_TMP0, TCG_REG_TMP0,
+                        CPU_TLB_ENTRY_BITS);
+    }
+
+    /* Add that to the base address to index the tlb.  */
+    tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, base, TCG_REG_TMP0);
+    base = TCG_REG_TMP2;
+
+    /* Load the tlb comparator and the addend.  */
+    tcg_out_ldst(s, load_cmp_op, TCG_REG_TMP0, base, cmp_off);
+    tcg_out_ldst(s, load_cmp_op, TCG_REG_TMP2, base, add_off);
+
+    /* Clear the non-page, non-alignment bits from the address.  */
+    if (mask == sextract64(mask, 0, 12)) {
+        tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addrl, mask);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, mask);
+        tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
+     }
+
+    /* Compare masked address with the TLB entry. */
+    label_ptr[0] = s->code_ptr;
+    tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
+
+    /* TLB Hit - translate address using addend.  */
+    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+        tcg_out_ext32u(s, TCG_REG_TMP0, addrl);
+        addrl = TCG_REG_TMP0;
+    }
+    tcg_out_opc_reg(s, OPC_ADD, TCG_REG_L0, TCG_REG_TMP2, addrl);
+}
+
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
+                                TCGType ext,
+                                TCGReg datalo, TCGReg datahi,
+                                TCGReg addrlo, TCGReg addrhi,
+                                void *raddr, tcg_insn_unit **label_ptr)
+{
+    TCGLabelQemuLdst *label = new_ldst_label(s);
+
+    label->is_ld = is_ld;
+    label->oi = oi;
+    label->type = ext;
+    label->datalo_reg = datalo;
+    label->datahi_reg = datahi;
+    label->addrlo_reg = addrlo;
+    label->addrhi_reg = addrhi;
+    label->raddr = raddr;
+    label->label_ptr[0] = label_ptr[0];
+}
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOpIdx oi = l->oi;
+    TCGMemOp opc = get_memop(oi);
+    TCGReg a0 = tcg_target_call_iarg_regs[0];
+    TCGReg a1 = tcg_target_call_iarg_regs[1];
+    TCGReg a2 = tcg_target_call_iarg_regs[2];
+    TCGReg a3 = tcg_target_call_iarg_regs[3];
+
+    /* We don't support oversize guests */
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+        g_assert_not_reached();
+    }
+
+    /* resolve label address */
+    reloc_sbimm12(l->label_ptr[0], s->code_ptr);
+
+    /* call load helper */
+    tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
+    tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
+
+    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+    tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
+
+    tcg_out_goto(s, l->raddr);
+}
+
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    TCGMemOpIdx oi = l->oi;
+    TCGMemOp opc = get_memop(oi);
+    TCGMemOp s_bits = opc & MO_SIZE;
+    TCGReg a0 = tcg_target_call_iarg_regs[0];
+    TCGReg a1 = tcg_target_call_iarg_regs[1];
+    TCGReg a2 = tcg_target_call_iarg_regs[2];
+    TCGReg a3 = tcg_target_call_iarg_regs[3];
+    TCGReg a4 = tcg_target_call_iarg_regs[4];
+
+    /* We don't support oversize guests */
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+        g_assert_not_reached();
+    }
+
+    /* resolve label address */
+    reloc_sbimm12(l->label_ptr[0], s->code_ptr);
+
+    /* call store helper */
+    tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, a2, l->datalo_reg);
+    switch (s_bits) {
+    case MO_8:
+        tcg_out_ext8u(s, a2, a2);
+        break;
+    case MO_16:
+        tcg_out_ext16u(s, a2, a2);
+        break;
+    default:
+        break;
+    }
+    tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
+
+    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+
+    tcg_out_goto(s, l->raddr);
+}
+#endif /* CONFIG_SOFTMMU */
+
 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
                               uintptr_t addr)
 {