Message ID | 20200613042029.22321-4-ljp@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add several Power ISA 3.1 32/64-bit vector instructions | expand |
On 6/12/20 9:20 PM, Lijun Pan wrote: > +#define VMULH_DO(name, op, element, cast_orig, cast_temp) \ > + void helper_vmulh##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > + { \ > + int i; \ > + \ > + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ > + r->element[i] = (cast_orig)(((cast_temp)a->element[i] op \ > + (cast_temp)b->element[i]) >> 32); \ > + } \ > + } > +VMULH_DO(sw, *, s32, int32_t, int64_t) > +VMULH_DO(uw, *, u32, uint32_t, uint64_t) > +#undef VMULH_DO There's no point in calling the macro "VMUL" and then passing in "op" as a parameter. Just inline the multiply directly. Also, fix your indentation. r~
> On Jun 18, 2020, at 6:29 PM, Richard Henderson <richard.henderson@linaro.org> wrote: > > On 6/12/20 9:20 PM, Lijun Pan wrote: >> +#define VMULH_DO(name, op, element, cast_orig, cast_temp) \ >> + void helper_vmulh##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ >> + { \ >> + int i; \ >> + \ >> + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ >> + r->element[i] = (cast_orig)(((cast_temp)a->element[i] op \ >> + (cast_temp)b->element[i]) >> 32); \ >> + } \ >> + } >> +VMULH_DO(sw, *, s32, int32_t, int64_t) >> +VMULH_DO(uw, *, u32, uint32_t, uint64_t) >> +#undef VMULH_DO > > There's no point in calling the macro "VMUL" and then passing in "op" as a > parameter. Just inline the multiply directly. Do you mean writing two functions directly, void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { int i; for (i = 0; i < 4; i++) { r->s32[i] = (int32_t)((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); } } void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { int i; for (i = 0; i < 4; i++) { r->u32[i] = (uint32_t)((uint64_t)a->u32[i] * (uint64_t)b->u32[i]) >> 32); } } Thanks, Lijun
On 6/18/20 10:37 PM, Lijun Pan wrote: > Do you mean writing two functions directly, > > void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > { > int i; > > for (i = 0; i < 4; i++) { > r->s32[i] = (int32_t)((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); > } > } > > void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > { > int i; > > for (i = 0; i < 4; i++) { > r->u32[i] = (uint32_t)((uint64_t)a->u32[i] * (uint64_t)b->u32[i]) >> 32); > } > } That works for me. r~
diff --git a/target/ppc/helper.h b/target/ppc/helper.h index c3f087ccb3..6d4a3536eb 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -186,6 +186,8 @@ DEF_HELPER_3(vmulouh, void, avr, avr, avr) DEF_HELPER_3(vmulouw, void, avr, avr, avr) DEF_HELPER_3(vmuluwm, void, avr, avr, avr) DEF_HELPER_3(vmulld, void, avr, avr, avr) +DEF_HELPER_3(vmulhsw, void, avr, avr, avr) +DEF_HELPER_3(vmulhuw, void, avr, avr, avr) DEF_HELPER_3(vslo, void, avr, avr, avr) DEF_HELPER_3(vsro, void, avr, avr, avr) DEF_HELPER_3(vsrv, void, avr, avr, avr) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index afbcdd05b4..4bb3b7e928 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -523,6 +523,20 @@ void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) r->VsrD(0) = 0; } +#define VMULH_DO(name, op, element, cast_orig, cast_temp) \ + void helper_vmulh##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + r->element[i] = (cast_orig)(((cast_temp)a->element[i] op \ + (cast_temp)b->element[i]) >> 32); \ + } \ + } +VMULH_DO(sw, *, s32, int32_t, int64_t) +VMULH_DO(uw, *, u32, uint32_t, uint64_t) +#undef VMULH_DO + #define VARITH_DO(name, op, element) \ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c index 4ee1df48f2..2c35559c52 100644 --- a/target/ppc/translate/vmx-impl.inc.c +++ b/target/ppc/translate/vmx-impl.inc.c @@ -811,9 +811,15 @@ GEN_VXFORM(vmulld, 4, 7); GEN_VXFORM(vmuleub, 4, 8); GEN_VXFORM(vmuleuh, 4, 9); GEN_VXFORM(vmuleuw, 4, 10); +GEN_VXFORM(vmulhuw, 4, 10); +GEN_VXFORM_DUAL(vmuleuw, PPC_ALTIVEC, PPC_NONE, + vmulhuw, PPC_NONE, PPC2_ISA300); GEN_VXFORM(vmulesb, 4, 12); GEN_VXFORM(vmulesh, 4, 13); GEN_VXFORM(vmulesw, 4, 14); +GEN_VXFORM(vmulhsw, 4, 14); +GEN_VXFORM_DUAL(vmulesw, PPC_ALTIVEC, PPC_NONE, + vmulhsw, PPC_NONE, PPC2_ISA300); GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4); GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5); GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6); diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c index 499bed0a44..1d8238a718 100644 --- a/target/ppc/translate/vmx-ops.inc.c +++ b/target/ppc/translate/vmx-ops.inc.c @@ -107,10 +107,10 @@ GEN_VXFORM_207(vmulosw, 4, 6), GEN_VXFORM_300(vmulld, 4, 7), GEN_VXFORM(vmuleub, 4, 8), GEN_VXFORM(vmuleuh, 4, 9), -GEN_VXFORM_207(vmuleuw, 4, 10), +GEN_VXFORM_DUAL(vmuleuw, vmulhuw, 4, 10, PPC_ALTIVEC, PPC_NONE), GEN_VXFORM(vmulesb, 4, 12), GEN_VXFORM(vmulesh, 4, 13), -GEN_VXFORM_207(vmulesw, 4, 14), +GEN_VXFORM_DUAL(vmulesw, vmulhsw, 4, 14, PPC_ALTIVEC, PPC_NONE), GEN_VXFORM(vslb, 2, 4), GEN_VXFORM(vslh, 2, 5), GEN_VXFORM_DUAL(vslw, vrlwnm, 2, 6, PPC_ALTIVEC, PPC_NONE),
vmulhsw: Vector Multiply High Signed Word vmulhuw: Vector Multiply High Unsigned Word Signed-off-by: Lijun Pan <ljp@linux.ibm.com> --- target/ppc/helper.h | 2 ++ target/ppc/int_helper.c | 14 ++++++++++++++ target/ppc/translate/vmx-impl.inc.c | 6 ++++++ target/ppc/translate/vmx-ops.inc.c | 4 ++-- 4 files changed, 24 insertions(+), 2 deletions(-)