Message ID | 1480937130-24561-14-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Dec 05, 2016 at 04:55:30PM +0530, Nikunj A Dadhania wrote: > From: Bharata B Rao <bharata@linux.vnet.ibm.com> > > xxperm: VSX Vector Permute > xxpermr: VSX Vector Permute Right-indexed > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > --- > target-ppc/fpu_helper.c | 50 +++++++++++++++++++++++++++++++++++++ > target-ppc/helper.h | 2 ++ > target-ppc/translate/vsx-impl.inc.c | 2 ++ > target-ppc/translate/vsx-ops.inc.c | 2 ++ > 4 files changed, 56 insertions(+) > > diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c > index 3b867cf..be552c7 100644 > --- a/target-ppc/fpu_helper.c > +++ b/target-ppc/fpu_helper.c > @@ -2869,3 +2869,53 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb) > float_check_status(env); > return xt; > } > + > +static void vsr_copy_256(ppc_vsr_t *xa, ppc_vsr_t *xt, int8_t *src) > +{ > +#if defined(HOST_WORDS_BIGENDIAN) > + memcpy(src, xa, sizeof(*xa)); > + memcpy(src + 16, xt, sizeof(*xt)); > +#else > + memcpy(src, xt, sizeof(*xt)); > + memcpy(src + 16, xa, sizeof(*xa)); Is this right? I thought the order of the bytes within each word varied with the host endianness as well. > +#endif > +} > + > +static int8_t vsr_get_byte(int8_t *src, int bound, int idx) > +{ > + if (idx >= bound) { > + return 0xFF; > + } AFAICT you don't need this check. For both xxperm and xxpermr you're already masking the index to 5 bits, so it can't exceed 31. > +#if defined(HOST_WORDS_BIGENDIAN) > + return src[idx]; > +#else > + return src[bound - 1 - idx]; > +#endif > +} > + > +#define VSX_XXPERM(op, indexed) \ > +void helper_##op(CPUPPCState *env, uint32_t opcode) \ > +{ \ > + ppc_vsr_t xt, xa, pcv; \ > + int i, idx; \ > + int8_t src[32]; \ > + \ > + getVSR(xA(opcode), &xa, env); \ > + getVSR(xT(opcode), &xt, env); \ > + getVSR(xB(opcode), &pcv, env); \ > + \ > + vsr_copy_256(&xa, &xt, src); \ You have a double copy here AFAICT - first from the actual env structure to xt and xa, then to the src array. That seems like it would be good to avoid. It seems like it would nice in any case to avoid even the one copy. You'd need a temporary for the output of course and to copy that, but you should be able to combine indexed with host endianness to translate each index to retrieve directly from the VSR values in env. > + for (i = 0; i < 16; i++) { \ > + idx = pcv.VsrB(i) & 0x1F; \ > + if (indexed) { \ > + xt.VsrB(i) = vsr_get_byte(src, 32, 31 - idx); \ > + } else { \ > + xt.VsrB(i) = vsr_get_byte(src, 32, idx); \ > + } \ > + } \ > + putVSR(xT(opcode), &xt, env); \ > +} > + > +VSX_XXPERM(xxperm, 0) > +VSX_XXPERM(xxpermr, 1) > diff --git a/target-ppc/helper.h b/target-ppc/helper.h > index 9f812c8..399cf99 100644 > --- a/target-ppc/helper.h > +++ b/target-ppc/helper.h > @@ -538,6 +538,8 @@ DEF_HELPER_2(xvrspip, void, env, i32) > DEF_HELPER_2(xvrspiz, void, env, i32) > DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32) > DEF_HELPER_4(xxinsertw, void, env, tl, tl, i32) > +DEF_HELPER_2(xxperm, void, env, i32) > +DEF_HELPER_2(xxpermr, void, env, i32) > > DEF_HELPER_2(efscfsi, i32, env, i32) > DEF_HELPER_2(efscfui, i32, env, i32) > diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c > index 77f098b..2ad152e 100644 > --- a/target-ppc/translate/vsx-impl.inc.c > +++ b/target-ppc/translate/vsx-impl.inc.c > @@ -914,6 +914,8 @@ GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX) > GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX) > GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX) > GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX) > +GEN_VSX_HELPER_2(xxperm, 0x08, 0x03, 0, PPC2_ISA300) > +GEN_VSX_HELPER_2(xxpermr, 0x08, 0x07, 0, PPC2_ISA300) > > static void gen_xxbrd(DisasContext *ctx) > { > diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c > index 42e83d2..93fb9b8 100644 > --- a/target-ppc/translate/vsx-ops.inc.c > +++ b/target-ppc/translate/vsx-ops.inc.c > @@ -275,6 +275,8 @@ VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207), > VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207), > GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX), > GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX), > +GEN_XX3FORM(xxperm, 0x08, 0x03, PPC2_ISA300), > +GEN_XX3FORM(xxpermr, 0x08, 0x07, PPC2_ISA300), > GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX), > GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300), > GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
On Tue, Dec 06, 2016 at 03:11:22PM +1100, David Gibson wrote: > On Mon, Dec 05, 2016 at 04:55:30PM +0530, Nikunj A Dadhania wrote: > > From: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > xxperm: VSX Vector Permute > > xxpermr: VSX Vector Permute Right-indexed > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > > --- > > target-ppc/fpu_helper.c | 50 +++++++++++++++++++++++++++++++++++++ > > target-ppc/helper.h | 2 ++ > > target-ppc/translate/vsx-impl.inc.c | 2 ++ > > target-ppc/translate/vsx-ops.inc.c | 2 ++ > > 4 files changed, 56 insertions(+) > > > > diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c > > index 3b867cf..be552c7 100644 > > --- a/target-ppc/fpu_helper.c > > +++ b/target-ppc/fpu_helper.c > > @@ -2869,3 +2869,53 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb) > > float_check_status(env); > > return xt; > > } > > + > > +static void vsr_copy_256(ppc_vsr_t *xa, ppc_vsr_t *xt, int8_t *src) > > +{ > > +#if defined(HOST_WORDS_BIGENDIAN) > > + memcpy(src, xa, sizeof(*xa)); > > + memcpy(src + 16, xt, sizeof(*xt)); > > +#else > > + memcpy(src, xt, sizeof(*xt)); > > + memcpy(src + 16, xa, sizeof(*xa)); > > Is this right? I thought the order of the bytes within each word > varied with the host endianness as well. Since we are already working with 2 16 byte vectors xa and xb here, I thought we don't have to worry about order of bytes within each vector, but instead can construct the 32 byte vector as above based on host endianness. > > > +#endif > > +} > > + > > +static int8_t vsr_get_byte(int8_t *src, int bound, int idx) > > +{ > > + if (idx >= bound) { > > + return 0xFF; > > + } > > AFAICT you don't need this check. For both xxperm and xxpermr you're > already masking the index to 5 bits, so it can't exceed 31. Was thinking of making it a generic API and hence had that boundary check but yes, no point for the check in the context of this instruction. > > > +#if defined(HOST_WORDS_BIGENDIAN) > > + return src[idx]; > > +#else > > + return src[bound - 1 - idx]; > > +#endif > > +} > > + > > +#define VSX_XXPERM(op, indexed) \ > > +void helper_##op(CPUPPCState *env, uint32_t opcode) \ > > +{ \ > > + ppc_vsr_t xt, xa, pcv; \ > > + int i, idx; \ > > + int8_t src[32]; \ > > + \ > > + getVSR(xA(opcode), &xa, env); \ > > + getVSR(xT(opcode), &xt, env); \ > > + getVSR(xB(opcode), &pcv, env); \ > > + \ > > + vsr_copy_256(&xa, &xt, src); \ > > You have a double copy here AFAICT - first from the actual env > structure to xt and xa, then to the src array. That seems like it > would be good to avoid. > > It seems like it would nice in any case to avoid even the one copy. > You'd need a temporary for the output of course and to copy that, but > you should be able to combine indexed with host endianness to > translate each index to retrieve directly from the VSR values in env. I am not sure it would be good to retrieve byte values directly from env as getVSR nicely abstracts out from which fields (env->[fpr, vsr, avr] the data is fetched based on the register specified in the opcode. I can reduce one copy though by not constructing a 32 byte vector (src) but instead retrieving the bytes directly from xa and xt based on the index. Regards, Bharata.
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c index 3b867cf..be552c7 100644 --- a/target-ppc/fpu_helper.c +++ b/target-ppc/fpu_helper.c @@ -2869,3 +2869,53 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb) float_check_status(env); return xt; } + +static void vsr_copy_256(ppc_vsr_t *xa, ppc_vsr_t *xt, int8_t *src) +{ +#if defined(HOST_WORDS_BIGENDIAN) + memcpy(src, xa, sizeof(*xa)); + memcpy(src + 16, xt, sizeof(*xt)); +#else + memcpy(src, xt, sizeof(*xt)); + memcpy(src + 16, xa, sizeof(*xa)); +#endif +} + +static int8_t vsr_get_byte(int8_t *src, int bound, int idx) +{ + if (idx >= bound) { + return 0xFF; + } +#if defined(HOST_WORDS_BIGENDIAN) + return src[idx]; +#else + return src[bound - 1 - idx]; +#endif +} + +#define VSX_XXPERM(op, indexed) \ +void helper_##op(CPUPPCState *env, uint32_t opcode) \ +{ \ + ppc_vsr_t xt, xa, pcv; \ + int i, idx; \ + int8_t src[32]; \ + \ + getVSR(xA(opcode), &xa, env); \ + getVSR(xT(opcode), &xt, env); \ + getVSR(xB(opcode), &pcv, env); \ + \ + vsr_copy_256(&xa, &xt, src); \ + \ + for (i = 0; i < 16; i++) { \ + idx = pcv.VsrB(i) & 0x1F; \ + if (indexed) { \ + xt.VsrB(i) = vsr_get_byte(src, 32, 31 - idx); \ + } else { \ + xt.VsrB(i) = vsr_get_byte(src, 32, idx); \ + } \ + } \ + putVSR(xT(opcode), &xt, env); \ +} + +VSX_XXPERM(xxperm, 0) +VSX_XXPERM(xxpermr, 1) diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 9f812c8..399cf99 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -538,6 +538,8 @@ DEF_HELPER_2(xvrspip, void, env, i32) DEF_HELPER_2(xvrspiz, void, env, i32) DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32) DEF_HELPER_4(xxinsertw, void, env, tl, tl, i32) +DEF_HELPER_2(xxperm, void, env, i32) +DEF_HELPER_2(xxpermr, void, env, i32) DEF_HELPER_2(efscfsi, i32, env, i32) DEF_HELPER_2(efscfui, i32, env, i32) diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c index 77f098b..2ad152e 100644 --- a/target-ppc/translate/vsx-impl.inc.c +++ b/target-ppc/translate/vsx-impl.inc.c @@ -914,6 +914,8 @@ GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX) GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX) GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX) GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX) +GEN_VSX_HELPER_2(xxperm, 0x08, 0x03, 0, PPC2_ISA300) +GEN_VSX_HELPER_2(xxpermr, 0x08, 0x07, 0, PPC2_ISA300) static void gen_xxbrd(DisasContext *ctx) { diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c index 42e83d2..93fb9b8 100644 --- a/target-ppc/translate/vsx-ops.inc.c +++ b/target-ppc/translate/vsx-ops.inc.c @@ -275,6 +275,8 @@ VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207), VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207), GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX), GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX), +GEN_XX3FORM(xxperm, 0x08, 0x03, PPC2_ISA300), +GEN_XX3FORM(xxpermr, 0x08, 0x07, PPC2_ISA300), GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX), GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300), GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),