Message ID | 20240904142739.854-6-zhiwei_liu@linux.alibaba.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add support for vector | expand |
On 9/4/24 07:27, LIU Zhiwei wrote: > @@ -811,31 +849,52 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg) > tcg_out_ext32s(s, ret, arg); > } > > -static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, > - TCGReg addr, intptr_t offset) > +static intptr_t split_offset_scalar(TCGContext *s, TCGReg *addr, > + intptr_t offset) > { > intptr_t imm12 = sextreg(offset, 0, 12); > > if (offset != imm12) { > intptr_t diff = tcg_pcrel_diff(s, (void *)offset); > > - if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { > + if (*addr == TCG_REG_ZERO && diff == (int32_t)diff) { > imm12 = sextreg(diff, 0, 12); > tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12); > } else { > tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); > - if (addr != TCG_REG_ZERO) { > - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr); > + if (*addr != TCG_REG_ZERO) { > + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, *addr); > } > } > - addr = TCG_REG_TMP2; > + *addr = TCG_REG_TMP2; > + } > + return imm12; > +} > + > +static void split_offset_vector(TCGContext *s, TCGReg *addr, intptr_t offset) > +{ > + if (offset != 0) { > + if (offset == sextreg(offset, 0, 12)) { > + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, *addr, offset); > + } else { > + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); > + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, *addr); > + } > + *addr = TCG_REG_TMP0; > } > +} > + > +static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, > + TCGReg addr, intptr_t offset) > +{ > + intptr_t imm12; > > switch (opc) { > case OPC_SB: > case OPC_SH: > case OPC_SW: > case OPC_SD: > + imm12 = split_offset_scalar(s, &addr, offset); > tcg_out_opc_store(s, opc, addr, data, imm12); > break; > case OPC_LB: > @@ -845,8 +904,31 @@ static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, > case OPC_LW: > case OPC_LWU: > case OPC_LD: > + imm12 = split_offset_scalar(s, &addr, offset); > tcg_out_opc_imm(s, opc, data, addr, imm12); > break; > + case OPC_VSE8_V: > + case OPC_VSE16_V: > + case OPC_VSE32_V: > + case OPC_VSE64_V: > + case OPC_VS1R_V: > + case OPC_VS2R_V: > + case OPC_VS4R_V: > + case OPC_VS8R_V: > + split_offset_vector(s, &addr, offset); > + tcg_out_opc_ldst_vec(s, opc, data, addr, true); > + break; > + case OPC_VLE8_V: > + case OPC_VLE16_V: > + case OPC_VLE32_V: > + case OPC_VLE64_V: > + case OPC_VL1RE64_V: > + case OPC_VL2RE64_V: > + case OPC_VL4RE64_V: > + case OPC_VL8RE64_V: > + split_offset_vector(s, &addr, offset); > + tcg_out_opc_ldst_vec(s, opc, data, addr, true); > + break; > default: > g_assert_not_reached(); > } This is more complicated than it needs to be, calling a combined function, then using a switch to separate, then calling separate functions. Calling separate functions in the first place is simpler. E.g. static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, TCGReg addr, intptr_t offset) { tcg_debug_assert(data >= TCG_REG_V0); tcg_debug_assert(addr < TCG_REG_V0); if (offset) { tcg_debug_assert(addr != TCG_REG_ZERO); if (offset == sextreg(offset, 0, 12)) { tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset); } else { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr); } addr = TCG_REG_TMP0; } tcg_out32(s, opc | ((data & 0x1f) << 7) | (addr << 15) | (1 << 25)); } > static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, > TCGReg arg1, intptr_t arg2) > { > - RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD; > + RISCVInsn insn; > + > + if (type < TCG_TYPE_V64) { > + insn = (type == TCG_TYPE_I32) ? OPC_LW : OPC_LD; > + } else { > + int nf = get_vec_type_bytes(type) / riscv_vlenb; > + > + switch (nf) { > + case 1: > + insn = OPC_VL1RE64_V; > + break; > + case 2: > + insn = OPC_VL2RE64_V; > + break; > + case 4: > + insn = OPC_VL4RE64_V; > + break; > + case 8: > + insn = OPC_VL8RE64_V; > + break; > + default: > + { > + int prev_vsew = riscv_set_vec_config_vl(s, type); > + > + switch (prev_vsew) { > + case MO_8: > + insn = OPC_VLE8_V; > + break; > + case MO_16: > + insn = OPC_VLE16_V; > + break; > + case MO_32: > + insn = OPC_VLE32_V; > + break; > + case MO_64: > + insn = OPC_VLE64_V; > + break; > + default: > + g_assert_not_reached(); > + } > + } > + break; This can be simplified: switch (type) { case TCG_TYPE_I32: tcg_out_ldst(s, OPC_LW, data, base, offset); break; case TCG_TYPE_I64: tcg_out_ldst(s, OPC_LD, data, base, offset); break; case TCG_TYPE_V64: case TCG_TYPE_V128: case TCG_TYPE_V256: if (type >= riscv_lg2_vlenb) { static const RISCVInsn whole_reg_ld[] = { OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V }; unsigned idx = type - riscv_lg2_vlenb; insn = whole_reg_ld[idx]; } else { static const RISCVInsn unit_stride_ld[] = { OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V }; MemOp prev_vsew = set_vtype_len(s, type); insn = unit_stride_ld[prev_vsew]; } tcg_out_vec_ldst(s, insn, data, base, offset); break; default: g_assert_not_reached(); } and similar for store. r~
On 2024/9/5 14:39, Richard Henderson wrote: > On 9/4/24 07:27, LIU Zhiwei wrote: >> @@ -811,31 +849,52 @@ static void tcg_out_extrl_i64_i32(TCGContext >> *s, TCGReg ret, TCGReg arg) >> tcg_out_ext32s(s, ret, arg); >> } >> -static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, >> - TCGReg addr, intptr_t offset) >> +static intptr_t split_offset_scalar(TCGContext *s, TCGReg *addr, >> + intptr_t offset) >> { >> intptr_t imm12 = sextreg(offset, 0, 12); >> if (offset != imm12) { >> intptr_t diff = tcg_pcrel_diff(s, (void *)offset); >> - if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { >> + if (*addr == TCG_REG_ZERO && diff == (int32_t)diff) { >> imm12 = sextreg(diff, 0, 12); >> tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - >> imm12); >> } else { >> tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - >> imm12); >> - if (addr != TCG_REG_ZERO) { >> - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, >> TCG_REG_TMP2, addr); >> + if (*addr != TCG_REG_ZERO) { >> + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, >> TCG_REG_TMP2, *addr); >> } >> } >> - addr = TCG_REG_TMP2; >> + *addr = TCG_REG_TMP2; >> + } >> + return imm12; >> +} >> + >> +static void split_offset_vector(TCGContext *s, TCGReg *addr, >> intptr_t offset) >> +{ >> + if (offset != 0) { >> + if (offset == sextreg(offset, 0, 12)) { >> + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, *addr, offset); >> + } else { >> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); >> + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, >> *addr); >> + } >> + *addr = TCG_REG_TMP0; >> } >> +} >> + >> +static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, >> + TCGReg addr, intptr_t offset) >> +{ >> + intptr_t imm12; >> switch (opc) { >> case OPC_SB: >> case OPC_SH: >> case OPC_SW: >> case OPC_SD: >> + imm12 = split_offset_scalar(s, &addr, offset); >> tcg_out_opc_store(s, opc, addr, data, imm12); >> break; >> case OPC_LB: >> @@ -845,8 +904,31 @@ static void tcg_out_ldst(TCGContext *s, >> RISCVInsn opc, TCGReg data, >> case OPC_LW: >> case OPC_LWU: >> case OPC_LD: >> + imm12 = split_offset_scalar(s, &addr, offset); >> tcg_out_opc_imm(s, opc, data, addr, imm12); >> break; >> + case OPC_VSE8_V: >> + case OPC_VSE16_V: >> + case OPC_VSE32_V: >> + case OPC_VSE64_V: >> + case OPC_VS1R_V: >> + case OPC_VS2R_V: >> + case OPC_VS4R_V: >> + case OPC_VS8R_V: >> + split_offset_vector(s, &addr, offset); >> + tcg_out_opc_ldst_vec(s, opc, data, addr, true); >> + break; >> + case OPC_VLE8_V: >> + case OPC_VLE16_V: >> + case OPC_VLE32_V: >> + case OPC_VLE64_V: >> + case OPC_VL1RE64_V: >> + case OPC_VL2RE64_V: >> + case OPC_VL4RE64_V: >> + case OPC_VL8RE64_V: >> + split_offset_vector(s, &addr, offset); >> + tcg_out_opc_ldst_vec(s, opc, data, addr, true); >> + break; >> default: >> g_assert_not_reached(); >> } > > This is more complicated than it needs to be, calling a combined > function, then using a switch to separate, then calling separate > functions. Calling separate functions in the first place is simpler. > E.g. > > static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, > TCGReg addr, intptr_t offset) > { > tcg_debug_assert(data >= TCG_REG_V0); > tcg_debug_assert(addr < TCG_REG_V0); > > if (offset) { > tcg_debug_assert(addr != TCG_REG_ZERO); > if (offset == sextreg(offset, 0, 12)) { > tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset); > } else { > tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); > tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, > addr); > } > addr = TCG_REG_TMP0; > } > > tcg_out32(s, opc | ((data & 0x1f) << 7) | (addr << 15) | (1 << 25)); > } > >> static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, >> TCGReg arg1, intptr_t arg2) >> { >> - RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD; >> + RISCVInsn insn; >> + >> + if (type < TCG_TYPE_V64) { >> + insn = (type == TCG_TYPE_I32) ? OPC_LW : OPC_LD; >> + } else { >> + int nf = get_vec_type_bytes(type) / riscv_vlenb; >> + >> + switch (nf) { >> + case 1: >> + insn = OPC_VL1RE64_V; >> + break; >> + case 2: >> + insn = OPC_VL2RE64_V; >> + break; >> + case 4: >> + insn = OPC_VL4RE64_V; >> + break; >> + case 8: >> + insn = OPC_VL8RE64_V; >> + break; >> + default: >> + { >> + int prev_vsew = riscv_set_vec_config_vl(s, type); >> + >> + switch (prev_vsew) { >> + case MO_8: >> + insn = OPC_VLE8_V; >> + break; >> + case MO_16: >> + insn = OPC_VLE16_V; >> + break; >> + case MO_32: >> + insn = OPC_VLE32_V; >> + break; >> + case MO_64: >> + insn = OPC_VLE64_V; >> + break; >> + default: >> + g_assert_not_reached(); >> + } >> + } >> + break; > > This can be simplified: > > switch (type) { > case TCG_TYPE_I32: > tcg_out_ldst(s, OPC_LW, data, base, offset); > break; > case TCG_TYPE_I64: > tcg_out_ldst(s, OPC_LD, data, base, offset); > break; > case TCG_TYPE_V64: > case TCG_TYPE_V128: > case TCG_TYPE_V256: > if (type >= riscv_lg2_vlenb) { > static const RISCVInsn whole_reg_ld[] = { > OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, > OPC_VL8RE64_V > }; > unsigned idx = type - riscv_lg2_vlenb; > insn = whole_reg_ld[idx]; > } else { > static const RISCVInsn unit_stride_ld[] = { > OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V > }; > MemOp prev_vsew = set_vtype_len(s, type); > insn = unit_stride_ld[prev_vsew]; > } > tcg_out_vec_ldst(s, insn, data, base, offset); > break; > default: > g_assert_not_reached(); > } > > and similar for store. Great. We will take this way. Zhiwei > > > r~
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index aac5ceee2b..d73a62b0f2 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -21,3 +21,5 @@ C_O1_I2(r, rZ, rZ) C_N1_I2(r, r, rM) C_O1_I4(r, r, rI, rM, rM) C_O2_I4(r, r, rZ, rZ, rM, rM) +C_O0_I2(v, r) +C_O1_I1(v, r) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index df96d350a3..4b1079fc6f 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -174,6 +174,11 @@ static bool tcg_target_const_match(int64_t val, int ct, #define V_OPMVX (0x6 << 12) #define V_OPCFG (0x7 << 12) +/* NF <= 7 && BNF >= 0 */ +#define V_NF(x) (x << 29) +#define V_UNIT_STRIDE (0x0 << 20) +#define V_UNIT_STRIDE_WHOLE_REG (0x8 << 20) + typedef enum { VLMUL_M1 = 0, /* LMUL=1 */ VLMUL_M2, /* LMUL=2 */ @@ -285,6 +290,25 @@ typedef enum { OPC_VSETVLI = 0x57 | V_OPCFG, OPC_VSETIVLI = 0xc0000057 | V_OPCFG, OPC_VSETVL = 0x80000057 | V_OPCFG, + + OPC_VLE8_V = 0x7 | V_UNIT_STRIDE, + OPC_VLE16_V = 0x5007 | V_UNIT_STRIDE, + OPC_VLE32_V = 0x6007 | V_UNIT_STRIDE, + OPC_VLE64_V = 0x7007 | V_UNIT_STRIDE, + OPC_VSE8_V = 0x27 | V_UNIT_STRIDE, + OPC_VSE16_V = 0x5027 | V_UNIT_STRIDE, + OPC_VSE32_V = 0x6027 | V_UNIT_STRIDE, + OPC_VSE64_V = 0x7027 | V_UNIT_STRIDE, + + OPC_VL1RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0), + OPC_VL2RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1), + OPC_VL4RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3), + OPC_VL8RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7), + + OPC_VS1R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0), + OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1), + OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3), + OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7), } RISCVInsn; /* @@ -646,6 +670,20 @@ static void tcg_target_set_vec_config(TCGContext *s, TCGType type, } } +static int riscv_set_vec_config_vl(TCGContext *s, TCGType type) +{ + int prev_vsew = s->riscv_host_vtype < 0 ? MO_8 : + ((s->riscv_host_vtype >> 3) & 0x7); + tcg_target_set_vec_config(s, type, prev_vsew); + return prev_vsew; +} + +static void riscv_set_vec_config_vl_vece(TCGContext *s, TCGType type, + unsigned vece) +{ + tcg_target_set_vec_config(s, type, vece); +} + /* * TCG intrinsics */ @@ -811,31 +849,52 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_ext32s(s, ret, arg); } -static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, - TCGReg addr, intptr_t offset) +static intptr_t split_offset_scalar(TCGContext *s, TCGReg *addr, + intptr_t offset) { intptr_t imm12 = sextreg(offset, 0, 12); if (offset != imm12) { intptr_t diff = tcg_pcrel_diff(s, (void *)offset); - if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { + if (*addr == TCG_REG_ZERO && diff == (int32_t)diff) { imm12 = sextreg(diff, 0, 12); tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12); } else { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); - if (addr != TCG_REG_ZERO) { - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr); + if (*addr != TCG_REG_ZERO) { + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, *addr); } } - addr = TCG_REG_TMP2; + *addr = TCG_REG_TMP2; + } + return imm12; +} + +static void split_offset_vector(TCGContext *s, TCGReg *addr, intptr_t offset) +{ + if (offset != 0) { + if (offset == sextreg(offset, 0, 12)) { + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, *addr, offset); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, *addr); + } + *addr = TCG_REG_TMP0; } +} + +static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, + TCGReg addr, intptr_t offset) +{ + intptr_t imm12; switch (opc) { case OPC_SB: case OPC_SH: case OPC_SW: case OPC_SD: + imm12 = split_offset_scalar(s, &addr, offset); tcg_out_opc_store(s, opc, addr, data, imm12); break; case OPC_LB: @@ -845,8 +904,31 @@ static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, case OPC_LW: case OPC_LWU: case OPC_LD: + imm12 = split_offset_scalar(s, &addr, offset); tcg_out_opc_imm(s, opc, data, addr, imm12); break; + case OPC_VSE8_V: + case OPC_VSE16_V: + case OPC_VSE32_V: + case OPC_VSE64_V: + case OPC_VS1R_V: + case OPC_VS2R_V: + case OPC_VS4R_V: + case OPC_VS8R_V: + split_offset_vector(s, &addr, offset); + tcg_out_opc_ldst_vec(s, opc, data, addr, true); + break; + case OPC_VLE8_V: + case OPC_VLE16_V: + case OPC_VLE32_V: + case OPC_VLE64_V: + case OPC_VL1RE64_V: + case OPC_VL2RE64_V: + case OPC_VL4RE64_V: + case OPC_VL8RE64_V: + split_offset_vector(s, &addr, offset); + tcg_out_opc_ldst_vec(s, opc, data, addr, true); + break; default: g_assert_not_reached(); } @@ -855,14 +937,101 @@ static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2) { - RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD; + RISCVInsn insn; + + if (type < TCG_TYPE_V64) { + insn = (type == TCG_TYPE_I32) ? OPC_LW : OPC_LD; + } else { + int nf = get_vec_type_bytes(type) / riscv_vlenb; + + switch (nf) { + case 1: + insn = OPC_VL1RE64_V; + break; + case 2: + insn = OPC_VL2RE64_V; + break; + case 4: + insn = OPC_VL4RE64_V; + break; + case 8: + insn = OPC_VL8RE64_V; + break; + default: + { + int prev_vsew = riscv_set_vec_config_vl(s, type); + + switch (prev_vsew) { + case MO_8: + insn = OPC_VLE8_V; + break; + case MO_16: + insn = OPC_VLE16_V; + break; + case MO_32: + insn = OPC_VLE32_V; + break; + case MO_64: + insn = OPC_VLE64_V; + break; + default: + g_assert_not_reached(); + } + } + break; + } + } tcg_out_ldst(s, insn, arg, arg1, arg2); } static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2) { - RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SW : OPC_SD; + RISCVInsn insn; + + if (type < TCG_TYPE_V64) { + insn = (type == TCG_TYPE_I32) ? OPC_SW : OPC_SD; + tcg_out_ldst(s, insn, arg, arg1, arg2); + } else { + int nf = get_vec_type_bytes(type) / riscv_vlenb; + + switch (nf) { + case 1: + insn = OPC_VS1R_V; + break; + case 2: + insn = OPC_VS2R_V; + break; + case 4: + insn = OPC_VS4R_V; + break; + case 8: + insn = OPC_VS8R_V; + break; + default: + { + int prev_vsew = riscv_set_vec_config_vl(s, type); + + switch (prev_vsew) { + case MO_8: + insn = OPC_VSE8_V; + break; + case MO_16: + insn = OPC_VSE16_V; + break; + case MO_32: + insn = OPC_VSE32_V; + break; + case MO_64: + insn = OPC_VSE64_V; + break; + default: + g_assert_not_reached(); + } + } + break; + } + } tcg_out_ldst(s, insn, arg, arg1, arg2); } @@ -2057,7 +2226,20 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { + TCGType type = vecl + TCG_TYPE_V64; + TCGArg a0, a1, a2; + + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + switch (opc) { + case INDEX_op_ld_vec: + tcg_out_ld(s, type, a0, a1, a2); + break; + case INDEX_op_st_vec: + tcg_out_st(s, type, a0, a1, a2); + break; case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ default: @@ -2221,6 +2403,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_st_a64_i64: return C_O0_I2(rZ, r); + case INDEX_op_st_vec: + return C_O0_I2(v, r); + case INDEX_op_ld_vec: + return C_O1_I1(v, r); default: g_assert_not_reached(); }