Message ID | 1469561218-3067-6-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote: > From: Vivek Andrew Sha <vivekandrewsha@gmail.com> > > Adds Vector Shift Right Variable instruction. > > Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com> > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > --- > target-ppc/helper.h | 1 + > target-ppc/int_helper.c | 17 +++++++++++++++++ > target-ppc/translate.c | 2 ++ > 3 files changed, 20 insertions(+) > > diff --git a/target-ppc/helper.h b/target-ppc/helper.h > index 9703f85..8eada2f 100644 > --- a/target-ppc/helper.h > +++ b/target-ppc/helper.h > @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr) > DEF_HELPER_3(vsld, void, avr, avr, avr) > DEF_HELPER_3(vslo, void, avr, avr, avr) > DEF_HELPER_3(vsro, void, avr, avr, avr) > +DEF_HELPER_3(vsrv, void, avr, avr, avr) > DEF_HELPER_3(vslv, void, avr, avr, avr) > DEF_HELPER_3(vaddcuw, void, avr, avr, avr) > DEF_HELPER_3(vsubcuw, void, avr, avr, avr) > diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c > index 412398f..f4776f0 100644 > --- a/target-ppc/int_helper.c > +++ b/target-ppc/int_helper.c > @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > } > } > > +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > +{ > + int i; > + unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1]; > + > + src[0] = 0; > + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { > + src[i + 1] = a->u8[i]; > + } > + > + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { > + shift = b->u8[i] & 0x7; /* extract shift value */ > + bytes = (src[i] << 8) + src[i + 1]; /* extract adjacent bytes */ I think you should be able to construct bytes on the fly without pre-generating teh whole of src, as you already did for vslv. > + r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ > + } > +} > + > void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) > { > int sh = shift & 0xf; > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index 473f21a..3382cd0 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -7457,6 +7457,7 @@ GEN_VXFORM(vsraw, 2, 14); > GEN_VXFORM(vsrad, 2, 15); > GEN_VXFORM(vslo, 6, 16); > GEN_VXFORM(vsro, 6, 17); > +GEN_VXFORM(vsrv, 2, 28); > GEN_VXFORM(vslv, 2, 29); > GEN_VXFORM(vaddcuw, 0, 6); > GEN_VXFORM(vsubcuw, 0, 22); > @@ -10943,6 +10944,7 @@ GEN_VXFORM(vsraw, 2, 14), > GEN_VXFORM_207(vsrad, 2, 15), > GEN_VXFORM(vslo, 6, 16), > GEN_VXFORM(vsro, 6, 17), > +GEN_VXFORM(vsrv, 2, 28), > GEN_VXFORM(vslv, 2, 29), > GEN_VXFORM(vaddcuw, 0, 6), > GEN_VXFORM(vsubcuw, 0, 22),
David Gibson <david@gibson.dropbear.id.au> writes: > [ Unknown signature status ] > On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote: >> From: Vivek Andrew Sha <vivekandrewsha@gmail.com> >> >> Adds Vector Shift Right Variable instruction. >> >> Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> >> --- >> target-ppc/helper.h | 1 + >> target-ppc/int_helper.c | 17 +++++++++++++++++ >> target-ppc/translate.c | 2 ++ >> 3 files changed, 20 insertions(+) >> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h >> index 9703f85..8eada2f 100644 >> --- a/target-ppc/helper.h >> +++ b/target-ppc/helper.h >> @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr) >> DEF_HELPER_3(vsld, void, avr, avr, avr) >> DEF_HELPER_3(vslo, void, avr, avr, avr) >> DEF_HELPER_3(vsro, void, avr, avr, avr) >> +DEF_HELPER_3(vsrv, void, avr, avr, avr) >> DEF_HELPER_3(vslv, void, avr, avr, avr) >> DEF_HELPER_3(vaddcuw, void, avr, avr, avr) >> DEF_HELPER_3(vsubcuw, void, avr, avr, avr) >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c >> index 412398f..f4776f0 100644 >> --- a/target-ppc/int_helper.c >> +++ b/target-ppc/int_helper.c >> @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) >> } >> } >> >> +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) >> +{ >> + int i; >> + unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1]; >> + >> + src[0] = 0; >> + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { >> + src[i + 1] = a->u8[i]; >> + } >> + >> + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { >> + shift = b->u8[i] & 0x7; /* extract shift value */ >> + bytes = (src[i] << 8) + src[i + 1]; /* extract adjacent bytes */ > > I think you should be able to construct bytes on the fly without > pre-generating teh whole of src, as you already did for vslv. Had done that, but that introduces a bug like this, for eg: vslv vra,vra,vrb So modified vra->u8[i] is used during subsequent operation as input. Assuming I take care or special casing "0": bytes = ((vra->u8[i - 1] << 8) | (vra->u8[i])) vra->u8[i] = (bytes >> shift) & 0xFF; when i = 1, bytes will ((vra->u8[0] << 8) | (vra->u8[1])). But vra->u8[0], was changed in the previous operation. Thats the reason src[] is needed Regards Nikunj
On Wed, Jul 27, 2016 at 12:01:33PM +0530, Nikunj A Dadhania wrote: > David Gibson <david@gibson.dropbear.id.au> writes: > > > [ Unknown signature status ] > > On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote: > >> From: Vivek Andrew Sha <vivekandrewsha@gmail.com> > >> > >> Adds Vector Shift Right Variable instruction. > >> > >> Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com> > >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > >> --- > >> target-ppc/helper.h | 1 + > >> target-ppc/int_helper.c | 17 +++++++++++++++++ > >> target-ppc/translate.c | 2 ++ > >> 3 files changed, 20 insertions(+) > >> > >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h > >> index 9703f85..8eada2f 100644 > >> --- a/target-ppc/helper.h > >> +++ b/target-ppc/helper.h > >> @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr) > >> DEF_HELPER_3(vsld, void, avr, avr, avr) > >> DEF_HELPER_3(vslo, void, avr, avr, avr) > >> DEF_HELPER_3(vsro, void, avr, avr, avr) > >> +DEF_HELPER_3(vsrv, void, avr, avr, avr) > >> DEF_HELPER_3(vslv, void, avr, avr, avr) > >> DEF_HELPER_3(vaddcuw, void, avr, avr, avr) > >> DEF_HELPER_3(vsubcuw, void, avr, avr, avr) > >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c > >> index 412398f..f4776f0 100644 > >> --- a/target-ppc/int_helper.c > >> +++ b/target-ppc/int_helper.c > >> @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > >> } > >> } > >> > >> +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > >> +{ > >> + int i; > >> + unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1]; > >> + > >> + src[0] = 0; > >> + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { > >> + src[i + 1] = a->u8[i]; > >> + } > >> + > >> + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { > >> + shift = b->u8[i] & 0x7; /* extract shift value */ > >> + bytes = (src[i] << 8) + src[i + 1]; /* extract adjacent bytes */ > > > > I think you should be able to construct bytes on the fly without > > pre-generating teh whole of src, as you already did for vslv. > > Had done that, but that introduces a bug like this, for eg: > > vslv vra,vra,vrb > > So modified vra->u8[i] is used during subsequent operation as input. > > Assuming I take care or special casing "0": > > bytes = ((vra->u8[i - 1] << 8) | (vra->u8[i])) > vra->u8[i] = (bytes >> shift) & 0xFF; > > when i = 1, bytes will ((vra->u8[0] << 8) | (vra->u8[1])). But vra->u8[0], > was changed in the previous operation. Ah, good point. > Thats the reason src[] is needed It's probably possible to avoid generating all of src by instead generating the bytes one loop iteration ahead, but that sounds fiddly, so the current approach is fine for now.
David Gibson <david@gibson.dropbear.id.au> writes: > [ Unknown signature status ] > On Wed, Jul 27, 2016 at 12:01:33PM +0530, Nikunj A Dadhania wrote: >> David Gibson <david@gibson.dropbear.id.au> writes: >> >> > [ Unknown signature status ] >> > On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote: >> >> From: Vivek Andrew Sha <vivekandrewsha@gmail.com> >> >> >> >> Adds Vector Shift Right Variable instruction. >> >> >> >> Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com> >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> >> >> --- >> >> target-ppc/helper.h | 1 + >> >> target-ppc/int_helper.c | 17 +++++++++++++++++ >> >> target-ppc/translate.c | 2 ++ >> >> 3 files changed, 20 insertions(+) >> >> >> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h >> >> index 9703f85..8eada2f 100644 >> >> --- a/target-ppc/helper.h >> >> +++ b/target-ppc/helper.h >> >> @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr) >> >> DEF_HELPER_3(vsld, void, avr, avr, avr) >> >> DEF_HELPER_3(vslo, void, avr, avr, avr) >> >> DEF_HELPER_3(vsro, void, avr, avr, avr) >> >> +DEF_HELPER_3(vsrv, void, avr, avr, avr) >> >> DEF_HELPER_3(vslv, void, avr, avr, avr) >> >> DEF_HELPER_3(vaddcuw, void, avr, avr, avr) >> >> DEF_HELPER_3(vsubcuw, void, avr, avr, avr) >> >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c >> >> index 412398f..f4776f0 100644 >> >> --- a/target-ppc/int_helper.c >> >> +++ b/target-ppc/int_helper.c >> >> @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) >> >> } >> >> } >> >> >> >> +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) >> >> +{ >> >> + int i; >> >> + unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1]; >> >> + >> >> + src[0] = 0; >> >> + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { >> >> + src[i + 1] = a->u8[i]; >> >> + } >> >> + >> >> + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { >> >> + shift = b->u8[i] & 0x7; /* extract shift value */ >> >> + bytes = (src[i] << 8) + src[i + 1]; /* extract adjacent bytes */ >> > >> > I think you should be able to construct bytes on the fly without >> > pre-generating teh whole of src, as you already did for vslv. >> >> Had done that, but that introduces a bug like this, for eg: >> >> vslv vra,vra,vrb >> >> So modified vra->u8[i] is used during subsequent operation as input. >> >> Assuming I take care or special casing "0": >> >> bytes = ((vra->u8[i - 1] << 8) | (vra->u8[i])) >> vra->u8[i] = (bytes >> shift) & 0xFF; >> >> when i = 1, bytes will ((vra->u8[0] << 8) | (vra->u8[1])). But vra->u8[0], >> was changed in the previous operation. > > Ah, good point. > >> Thats the reason src[] is needed > > It's probably possible to avoid generating all of src by instead > generating the bytes one loop iteration ahead, but that sounds fiddly, > so the current approach is fine for now. Or do the operation in the reverse: starting from size to 0. Let me try that. Regards Nikunj
On Wed, Jul 27, 2016 at 12:27:27PM +0530, Nikunj A Dadhania wrote: > David Gibson <david@gibson.dropbear.id.au> writes: > > > [ Unknown signature status ] > > On Wed, Jul 27, 2016 at 12:01:33PM +0530, Nikunj A Dadhania wrote: > >> David Gibson <david@gibson.dropbear.id.au> writes: > >> > >> > [ Unknown signature status ] > >> > On Wed, Jul 27, 2016 at 12:56:57AM +0530, Nikunj A Dadhania wrote: > >> >> From: Vivek Andrew Sha <vivekandrewsha@gmail.com> > >> >> > >> >> Adds Vector Shift Right Variable instruction. > >> >> > >> >> Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com> > >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > >> >> --- > >> >> target-ppc/helper.h | 1 + > >> >> target-ppc/int_helper.c | 17 +++++++++++++++++ > >> >> target-ppc/translate.c | 2 ++ > >> >> 3 files changed, 20 insertions(+) > >> >> > >> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h > >> >> index 9703f85..8eada2f 100644 > >> >> --- a/target-ppc/helper.h > >> >> +++ b/target-ppc/helper.h > >> >> @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr) > >> >> DEF_HELPER_3(vsld, void, avr, avr, avr) > >> >> DEF_HELPER_3(vslo, void, avr, avr, avr) > >> >> DEF_HELPER_3(vsro, void, avr, avr, avr) > >> >> +DEF_HELPER_3(vsrv, void, avr, avr, avr) > >> >> DEF_HELPER_3(vslv, void, avr, avr, avr) > >> >> DEF_HELPER_3(vaddcuw, void, avr, avr, avr) > >> >> DEF_HELPER_3(vsubcuw, void, avr, avr, avr) > >> >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c > >> >> index 412398f..f4776f0 100644 > >> >> --- a/target-ppc/int_helper.c > >> >> +++ b/target-ppc/int_helper.c > >> >> @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > >> >> } > >> >> } > >> >> > >> >> +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > >> >> +{ > >> >> + int i; > >> >> + unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1]; > >> >> + > >> >> + src[0] = 0; > >> >> + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { > >> >> + src[i + 1] = a->u8[i]; > >> >> + } > >> >> + > >> >> + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { > >> >> + shift = b->u8[i] & 0x7; /* extract shift value */ > >> >> + bytes = (src[i] << 8) + src[i + 1]; /* extract adjacent bytes */ > >> > > >> > I think you should be able to construct bytes on the fly without > >> > pre-generating teh whole of src, as you already did for vslv. > >> > >> Had done that, but that introduces a bug like this, for eg: > >> > >> vslv vra,vra,vrb > >> > >> So modified vra->u8[i] is used during subsequent operation as input. > >> > >> Assuming I take care or special casing "0": > >> > >> bytes = ((vra->u8[i - 1] << 8) | (vra->u8[i])) > >> vra->u8[i] = (bytes >> shift) & 0xFF; > >> > >> when i = 1, bytes will ((vra->u8[0] << 8) | (vra->u8[1])). But vra->u8[0], > >> was changed in the previous operation. > > > > Ah, good point. > > > >> Thats the reason src[] is needed > > > > It's probably possible to avoid generating all of src by instead > > generating the bytes one loop iteration ahead, but that sounds fiddly, > > so the current approach is fine for now. > > Or do the operation in the reverse: starting from size to 0. Let me try > that. Good idea, that should work.
diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 9703f85..8eada2f 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -211,6 +211,7 @@ DEF_HELPER_3(vslw, void, avr, avr, avr) DEF_HELPER_3(vsld, void, avr, avr, avr) DEF_HELPER_3(vslo, void, avr, avr, avr) DEF_HELPER_3(vsro, void, avr, avr, avr) +DEF_HELPER_3(vsrv, void, avr, avr, avr) DEF_HELPER_3(vslv, void, avr, avr, avr) DEF_HELPER_3(vaddcuw, void, avr, avr, avr) DEF_HELPER_3(vsubcuw, void, avr, avr, avr) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 412398f..f4776f0 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1722,6 +1722,23 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) } } +void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + unsigned int shift, bytes, src[ARRAY_SIZE(r->u8) + 1]; + + src[0] = 0; + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { + src[i + 1] = a->u8[i]; + } + + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { + shift = b->u8[i] & 0x7; /* extract shift value */ + bytes = (src[i] << 8) + src[i + 1]; /* extract adjacent bytes */ + r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ + } +} + void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) { int sh = shift & 0xf; diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 473f21a..3382cd0 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -7457,6 +7457,7 @@ GEN_VXFORM(vsraw, 2, 14); GEN_VXFORM(vsrad, 2, 15); GEN_VXFORM(vslo, 6, 16); GEN_VXFORM(vsro, 6, 17); +GEN_VXFORM(vsrv, 2, 28); GEN_VXFORM(vslv, 2, 29); GEN_VXFORM(vaddcuw, 0, 6); GEN_VXFORM(vsubcuw, 0, 22); @@ -10943,6 +10944,7 @@ GEN_VXFORM(vsraw, 2, 14), GEN_VXFORM_207(vsrad, 2, 15), GEN_VXFORM(vslo, 6, 16), GEN_VXFORM(vsro, 6, 17), +GEN_VXFORM(vsrv, 2, 28), GEN_VXFORM(vslv, 2, 29), GEN_VXFORM(vaddcuw, 0, 6), GEN_VXFORM(vsubcuw, 0, 22),