Message ID | 20200625170018.64265-5-ljp@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add several Power ISA 3.1 32/64-bit vector instructions | expand |
On 6/25/20 10:00 AM, Lijun Pan wrote: > vmulld: Vector Multiply Low Doubleword. > > Signed-off-by: Lijun Pan <ljp@linux.ibm.com> > --- > v3: use tcg_gen_gvec_mul() > > target/ppc/translate/vmx-impl.inc.c | 1 + > target/ppc/translate/vmx-ops.inc.c | 4 ++++ This part looks fine. > tcg/ppc/tcg-target.h | 2 ++ > tcg/ppc/tcg-target.inc.c | 7 +++++-- This part must be a separate patch. > @@ -3149,6 +3150,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, > static const uint32_t > add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, > sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, > + mul_op[4] = { 0, 0, VMULUWM, VMULLD }, > neg_op[4] = { 0, 0, VNEGW, VNEGD }, > eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, > ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, > @@ -3199,8 +3201,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, > a1 = 0; > break; > case INDEX_op_mul_vec: > - tcg_debug_assert(vece == MO_32 && have_isa_2_07); > - insn = VMULUWM; > + tcg_debug_assert((vece == MO_32 && have_isa_2_07) || > + (vece == MO_64 && have_isa_3_10)); > + insn = mul_op[vece]; I think it would be ok to just index mul_op here, since the real isa check is to be done elsewhere. Missing a change to tcg_can_emit_vec_op to do that isa check, and allow INDEX_op_mul_vec to be used for MO_64. Missing a change to tcg_target_init to test for PPC_FEATURE2_ARCH_3_1. r~
> On Jun 25, 2020, at 1:25 PM, Richard Henderson <richard.henderson@linaro.org> wrote: > > On 6/25/20 10:00 AM, Lijun Pan wrote: >> vmulld: Vector Multiply Low Doubleword. >> >> Signed-off-by: Lijun Pan <ljp@linux.ibm.com> >> --- >> v3: use tcg_gen_gvec_mul() >> >> target/ppc/translate/vmx-impl.inc.c | 1 + >> target/ppc/translate/vmx-ops.inc.c | 4 ++++ > > This part looks fine. > >> tcg/ppc/tcg-target.h | 2 ++ >> tcg/ppc/tcg-target.inc.c | 7 +++++-- > > This part must be a separate patch. > > >> @@ -3149,6 +3150,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, >> static const uint32_t >> add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, >> sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, >> + mul_op[4] = { 0, 0, VMULUWM, VMULLD }, >> neg_op[4] = { 0, 0, VNEGW, VNEGD }, >> eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, >> ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, >> @@ -3199,8 +3201,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, >> a1 = 0; >> break; >> case INDEX_op_mul_vec: >> - tcg_debug_assert(vece == MO_32 && have_isa_2_07); >> - insn = VMULUWM; >> + tcg_debug_assert((vece == MO_32 && have_isa_2_07) || >> + (vece == MO_64 && have_isa_3_10)); >> + insn = mul_op[vece]; > > I think it would be ok to just index mul_op here, since the real isa check is > to be done elsewhere. Just keep "insn = mul_op[vece];" and remove" tcg_debug_assert((vece == MO_32 && have_isa_2_07) || (vece == MO_64 && have_isa_3_10));“? > > Missing a change to tcg_can_emit_vec_op to do that isa check, and allow > INDEX_op_mul_vec to be used for MO_64. something like below? " @@ -3016,6 +3016,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) return -1; case MO_32: return have_isa_2_07 ? 1 : -1; + case MO_64: + return have_isa_3_10 ? 1 : -1; } " > > Missing a change to tcg_target_init to test for PPC_FEATURE2_ARCH_3_1. something like below? @@ -3712,6 +3712,11 @@ static void tcg_target_init(TCGContext *s) have_isa = tcg_isa_3_00; } #endif +#ifdef PPC_FEATURE2_ARCH_3_10 + if (hwcap2 & PPC_FEATURE2_ARCH_3_10) { + have_isa = tcg_isa_3_10; + } +#endif +++ b/include/elf.h @@ -554,6 +554,7 @@ typedef struct { #define PPC_FEATURE2_HTM_NOSC 0x01000000 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #define PPC_FEATURE2_HAS_IEEE128 0x00400000 +#define PPC_FEATURE2_ARCH_3_10 0x00200000 Thanks, Lijun
On 6/25/20 2:13 PM, Lijun Pan wrote: >>> case INDEX_op_mul_vec: >>> - tcg_debug_assert(vece == MO_32 && have_isa_2_07); >>> - insn = VMULUWM; >>> + tcg_debug_assert((vece == MO_32 && have_isa_2_07) || >>> + (vece == MO_64 && have_isa_3_10)); >>> + insn = mul_op[vece]; >> >> I think it would be ok to just index mul_op here, since the real isa check is >> to be done elsewhere. > > Just keep "insn = mul_op[vece];" > and remove" tcg_debug_assert((vece == MO_32 && have_isa_2_07) || > (vece == MO_64 && have_isa_3_10));“? Yes. > @@ -3016,6 +3016,8 @@int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, > unsigned vece) > return -1; > case MO_32: > return have_isa_2_07 ? 1 : -1; > + case MO_64: > + return have_isa_3_10 ? 1 : -1; > } Actually, just "return have_isa_3_10". Returning 1 means that the opcode is supported directly. Returning -1 means that the opcode can be expanded by tcg_expand_vec_op. Returning 0 means that the tcg backend does not support the opcode at all. > something like below? > @@ -3712,6 +3712,11 @@static void tcg_target_init(TCGContext *s) > have_isa = tcg_isa_3_00; > } > #endif > +#ifdef PPC_FEATURE2_ARCH_3_10 > + if (hwcap2 & PPC_FEATURE2_ARCH_3_10) { > + have_isa = tcg_isa_3_10; > + } > +#endif Certainly this. > @@ -554,6 +554,7 @@typedef struct { > #define PPC_FEATURE2_HTM_NOSC 0x01000000 > #define PPC_FEATURE2_ARCH_3_00 0x00800000 > #define PPC_FEATURE2_HAS_IEEE128 0x00400000 > +#define PPC_FEATURE2_ARCH_3_10 0x00200000 Of this I'm not sure. I didn't even realize these defines were here in include/elf.h. For other tcg backends we get the defines from <sys/auxv.h>. If we do want to update include/elf.h, it should be a separate patch. CC'ing Laurent for this. r~
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c index 6e79ffa650..8c89738552 100644 --- a/target/ppc/translate/vmx-impl.inc.c +++ b/target/ppc/translate/vmx-impl.inc.c @@ -807,6 +807,7 @@ GEN_VXFORM_DUAL(vmulouw, PPC_ALTIVEC, PPC_NONE, GEN_VXFORM(vmulosb, 4, 4); GEN_VXFORM(vmulosh, 4, 5); GEN_VXFORM(vmulosw, 4, 6); +GEN_VXFORM_V(vmulld, MO_64, tcg_gen_gvec_mul, 4, 7); GEN_VXFORM(vmuleub, 4, 8); GEN_VXFORM(vmuleuh, 4, 9); GEN_VXFORM(vmuleuw, 4, 10); diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c index 84e05fb827..b49787ac97 100644 --- a/target/ppc/translate/vmx-ops.inc.c +++ b/target/ppc/translate/vmx-ops.inc.c @@ -48,6 +48,9 @@ GEN_HANDLER_E(name, 0x04, opc2, opc3, inval, PPC_NONE, PPC2_ISA300) GEN_HANDLER_E_2(name, 0x04, opc2, opc3, opc4, 0x00000000, PPC_NONE, \ PPC2_ISA300) +#define GEN_VXFORM_310(name, opc2, opc3) \ +GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA310) + #define GEN_VXFORM_DUAL(name0, name1, opc2, opc3, type0, type1) \ GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1) @@ -104,6 +107,7 @@ GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE), GEN_VXFORM(vmulosb, 4, 4), GEN_VXFORM(vmulosh, 4, 5), GEN_VXFORM_207(vmulosw, 4, 6), +GEN_VXFORM_310(vmulld, 4, 7), GEN_VXFORM(vmuleub, 4, 8), GEN_VXFORM(vmuleuh, 4, 9), GEN_VXFORM_207(vmuleuw, 4, 10), diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 4fa21f0e71..ff1249ef8e 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -63,6 +63,7 @@ typedef enum { tcg_isa_2_06, tcg_isa_2_07, tcg_isa_3_00, + tcg_isa_3_10, } TCGPowerISA; extern TCGPowerISA have_isa; @@ -72,6 +73,7 @@ extern bool have_vsx; #define have_isa_2_06 (have_isa >= tcg_isa_2_06) #define have_isa_2_07 (have_isa >= tcg_isa_2_07) #define have_isa_3_00 (have_isa >= tcg_isa_3_00) +#define have_isa_3_10 (have_isa >= tcg_isa_3_10) /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index ee1f9227c1..de0feaf7a8 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -564,6 +564,7 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, #define VMULOUH VX4(72) #define VMULOUW VX4(136) /* v2.07 */ #define VMULUWM VX4(137) /* v2.07 */ +#define VMULLD VX4(457) /* v3.10 */ #define VMSUMUHM VX4(38) #define VMRGHB VX4(12) @@ -3149,6 +3150,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, static const uint32_t add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, + mul_op[4] = { 0, 0, VMULUWM, VMULLD }, neg_op[4] = { 0, 0, VNEGW, VNEGD }, eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, @@ -3199,8 +3201,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, a1 = 0; break; case INDEX_op_mul_vec: - tcg_debug_assert(vece == MO_32 && have_isa_2_07); - insn = VMULUWM; + tcg_debug_assert((vece == MO_32 && have_isa_2_07) || + (vece == MO_64 && have_isa_3_10)); + insn = mul_op[vece]; break; case INDEX_op_ssadd_vec: insn = ssadd_op[vece];
vmulld: Vector Multiply Low Doubleword. Signed-off-by: Lijun Pan <ljp@linux.ibm.com> --- v3: use tcg_gen_gvec_mul() target/ppc/translate/vmx-impl.inc.c | 1 + target/ppc/translate/vmx-ops.inc.c | 4 ++++ tcg/ppc/tcg-target.h | 2 ++ tcg/ppc/tcg-target.inc.c | 7 +++++-- 4 files changed, 12 insertions(+), 2 deletions(-)