diff mbox series

[v3,4/8] target/ppc: add vmulld instruction

Message ID 20200625170018.64265-5-ljp@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series Add several Power ISA 3.1 32/64-bit vector instructions | expand

Commit Message

Lijun Pan June 25, 2020, 5 p.m. UTC
vmulld: Vector Multiply Low Doubleword.

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
---
v3: use tcg_gen_gvec_mul()

 target/ppc/translate/vmx-impl.inc.c | 1 +
 target/ppc/translate/vmx-ops.inc.c  | 4 ++++
 tcg/ppc/tcg-target.h                | 2 ++
 tcg/ppc/tcg-target.inc.c            | 7 +++++--
 4 files changed, 12 insertions(+), 2 deletions(-)

Comments

Richard Henderson June 25, 2020, 6:25 p.m. UTC | #1
On 6/25/20 10:00 AM, Lijun Pan wrote:
> vmulld: Vector Multiply Low Doubleword.
> 
> Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
> ---
> v3: use tcg_gen_gvec_mul()
> 
>  target/ppc/translate/vmx-impl.inc.c | 1 +
>  target/ppc/translate/vmx-ops.inc.c  | 4 ++++

This part looks fine.

>  tcg/ppc/tcg-target.h                | 2 ++
>  tcg/ppc/tcg-target.inc.c            | 7 +++++--

This part must be a separate patch.


> @@ -3149,6 +3150,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>      static const uint32_t
>          add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
>          sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
> +        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
>          neg_op[4] = { 0, 0, VNEGW, VNEGD },
>          eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
>          ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
> @@ -3199,8 +3201,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>          a1 = 0;
>          break;
>      case INDEX_op_mul_vec:
> -        tcg_debug_assert(vece == MO_32 && have_isa_2_07);
> -        insn = VMULUWM;
> +        tcg_debug_assert((vece == MO_32 && have_isa_2_07) ||
> +                         (vece == MO_64 && have_isa_3_10));
> +        insn = mul_op[vece];

I think it would be ok to just index mul_op here, since the real isa check is
to be done elsewhere.

Missing a change to tcg_can_emit_vec_op to do that isa check, and allow
INDEX_op_mul_vec to be used for MO_64.

Missing a change to tcg_target_init to test for PPC_FEATURE2_ARCH_3_1.


r~
Lijun Pan June 25, 2020, 9:13 p.m. UTC | #2
> On Jun 25, 2020, at 1:25 PM, Richard Henderson <richard.henderson@linaro.org> wrote:
> 
> On 6/25/20 10:00 AM, Lijun Pan wrote:
>> vmulld: Vector Multiply Low Doubleword.
>> 
>> Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
>> ---
>> v3: use tcg_gen_gvec_mul()
>> 
>> target/ppc/translate/vmx-impl.inc.c | 1 +
>> target/ppc/translate/vmx-ops.inc.c  | 4 ++++
> 
> This part looks fine.
> 
>> tcg/ppc/tcg-target.h                | 2 ++
>> tcg/ppc/tcg-target.inc.c            | 7 +++++--
> 
> This part must be a separate patch.
> 
> 
>> @@ -3149,6 +3150,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>>     static const uint32_t
>>         add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
>>         sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
>> +        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
>>         neg_op[4] = { 0, 0, VNEGW, VNEGD },
>>         eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
>>         ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
>> @@ -3199,8 +3201,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>>         a1 = 0;
>>         break;
>>     case INDEX_op_mul_vec:
>> -        tcg_debug_assert(vece == MO_32 && have_isa_2_07);
>> -        insn = VMULUWM;
>> +        tcg_debug_assert((vece == MO_32 && have_isa_2_07) ||
>> +                         (vece == MO_64 && have_isa_3_10));
>> +        insn = mul_op[vece];
> 
> I think it would be ok to just index mul_op here, since the real isa check is
> to be done elsewhere.

Just keep "insn = mul_op[vece];"
and remove"        tcg_debug_assert((vece == MO_32 && have_isa_2_07) ||
                         (vece == MO_64 && have_isa_3_10));“?

> 
> Missing a change to tcg_can_emit_vec_op to do that isa check, and allow
> INDEX_op_mul_vec to be used for MO_64.

something like below?
"
@@ -3016,6 +3016,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
             return -1;
         case MO_32:
             return have_isa_2_07 ? 1 : -1;
+        case MO_64:
+            return have_isa_3_10 ? 1 : -1;
         }
"

> 
> Missing a change to tcg_target_init to test for PPC_FEATURE2_ARCH_3_1.

something like below?
@@ -3712,6 +3712,11 @@ static void tcg_target_init(TCGContext *s)
         have_isa = tcg_isa_3_00;
     }
 #endif
+#ifdef PPC_FEATURE2_ARCH_3_10
+    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
+        have_isa = tcg_isa_3_10;
+    }
+#endif

+++ b/include/elf.h
@@ -554,6 +554,7 @@ typedef struct {
 #define PPC_FEATURE2_HTM_NOSC           0x01000000
 #define PPC_FEATURE2_ARCH_3_00          0x00800000
 #define PPC_FEATURE2_HAS_IEEE128        0x00400000
+#define PPC_FEATURE2_ARCH_3_10          0x00200000


Thanks,
Lijun
Richard Henderson June 26, 2020, 3:52 a.m. UTC | #3
On 6/25/20 2:13 PM, Lijun Pan wrote:
>>>     case INDEX_op_mul_vec:
>>> -        tcg_debug_assert(vece == MO_32 && have_isa_2_07);
>>> -        insn = VMULUWM;
>>> +        tcg_debug_assert((vece == MO_32 && have_isa_2_07) ||
>>> +                         (vece == MO_64 && have_isa_3_10));
>>> +        insn = mul_op[vece];
>>
>> I think it would be ok to just index mul_op here, since the real isa check is
>> to be done elsewhere.
> 
> Just keep "insn = mul_op[vece];"
> and remove"        tcg_debug_assert((vece == MO_32 && have_isa_2_07) ||
>                          (vece == MO_64 && have_isa_3_10));“?

Yes.

> @@ -3016,6 +3016,8 @@int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type,
> unsigned vece)
>              return -1;
>          case MO_32:
>              return have_isa_2_07 ? 1 : -1;
> +        case MO_64:
> +            return have_isa_3_10 ? 1 : -1;
>          }

Actually, just "return have_isa_3_10".

Returning 1 means that the opcode is supported directly.  Returning -1 means
that the opcode can be expanded by tcg_expand_vec_op.  Returning 0 means that
the tcg backend does not support the opcode at all.

> something like below?
> @@ -3712,6 +3712,11 @@static void tcg_target_init(TCGContext *s)
>          have_isa = tcg_isa_3_00;
>      }
>  #endif
> +#ifdef PPC_FEATURE2_ARCH_3_10
> +    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
> +        have_isa = tcg_isa_3_10;
> +    }
> +#endif

Certainly this.

> @@ -554,6 +554,7 @@typedef struct {
>  #define PPC_FEATURE2_HTM_NOSC           0x01000000
>  #define PPC_FEATURE2_ARCH_3_00          0x00800000
>  #define PPC_FEATURE2_HAS_IEEE128        0x00400000
> +#define PPC_FEATURE2_ARCH_3_10          0x00200000

Of this I'm not sure.  I didn't even realize these defines were here in
include/elf.h.  For other tcg backends we get the defines from <sys/auxv.h>.

If we do want to update include/elf.h, it should be a separate patch.  CC'ing
Laurent for this.


r~
diff mbox series

Patch

diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 6e79ffa650..8c89738552 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -807,6 +807,7 @@  GEN_VXFORM_DUAL(vmulouw, PPC_ALTIVEC, PPC_NONE,
 GEN_VXFORM(vmulosb, 4, 4);
 GEN_VXFORM(vmulosh, 4, 5);
 GEN_VXFORM(vmulosw, 4, 6);
+GEN_VXFORM_V(vmulld, MO_64, tcg_gen_gvec_mul, 4, 7);
 GEN_VXFORM(vmuleub, 4, 8);
 GEN_VXFORM(vmuleuh, 4, 9);
 GEN_VXFORM(vmuleuw, 4, 10);
diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c
index 84e05fb827..b49787ac97 100644
--- a/target/ppc/translate/vmx-ops.inc.c
+++ b/target/ppc/translate/vmx-ops.inc.c
@@ -48,6 +48,9 @@  GEN_HANDLER_E(name, 0x04, opc2, opc3, inval, PPC_NONE, PPC2_ISA300)
 GEN_HANDLER_E_2(name, 0x04, opc2, opc3, opc4, 0x00000000, PPC_NONE,     \
                                                        PPC2_ISA300)
 
+#define GEN_VXFORM_310(name, opc2, opc3)                                \
+GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA310)
+
 #define GEN_VXFORM_DUAL(name0, name1, opc2, opc3, type0, type1) \
 GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1)
 
@@ -104,6 +107,7 @@  GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM(vmulosb, 4, 4),
 GEN_VXFORM(vmulosh, 4, 5),
 GEN_VXFORM_207(vmulosw, 4, 6),
+GEN_VXFORM_310(vmulld, 4, 7),
 GEN_VXFORM(vmuleub, 4, 8),
 GEN_VXFORM(vmuleuh, 4, 9),
 GEN_VXFORM_207(vmuleuw, 4, 10),
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 4fa21f0e71..ff1249ef8e 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -63,6 +63,7 @@  typedef enum {
     tcg_isa_2_06,
     tcg_isa_2_07,
     tcg_isa_3_00,
+    tcg_isa_3_10,
 } TCGPowerISA;
 
 extern TCGPowerISA have_isa;
@@ -72,6 +73,7 @@  extern bool have_vsx;
 #define have_isa_2_06  (have_isa >= tcg_isa_2_06)
 #define have_isa_2_07  (have_isa >= tcg_isa_2_07)
 #define have_isa_3_00  (have_isa >= tcg_isa_3_00)
+#define have_isa_3_10  (have_isa >= tcg_isa_3_10)
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_ext8u_i32        0 /* andi */
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index ee1f9227c1..de0feaf7a8 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -564,6 +564,7 @@  static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define VMULOUH    VX4(72)
 #define VMULOUW    VX4(136)       /* v2.07 */
 #define VMULUWM    VX4(137)       /* v2.07 */
+#define VMULLD     VX4(457)       /* v3.10 */
 #define VMSUMUHM   VX4(38)
 
 #define VMRGHB     VX4(12)
@@ -3149,6 +3150,7 @@  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     static const uint32_t
         add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
         sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
+        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
         neg_op[4] = { 0, 0, VNEGW, VNEGD },
         eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
         ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
@@ -3199,8 +3201,9 @@  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         a1 = 0;
         break;
     case INDEX_op_mul_vec:
-        tcg_debug_assert(vece == MO_32 && have_isa_2_07);
-        insn = VMULUWM;
+        tcg_debug_assert((vece == MO_32 && have_isa_2_07) ||
+                         (vece == MO_64 && have_isa_3_10));
+        insn = mul_op[vece];
         break;
     case INDEX_op_ssadd_vec:
         insn = ssadd_op[vece];