Message ID | 20201207044655.2312-4-zhiwei_liu@c-sky.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | target/arm bug fix | expand |
On 12/6/20 10:46 PM, LIU Zhiwei wrote: > For SIMD fcmla(by element), if the number of elements is less than > the number of elements within one segment,i.e. 4H arrangement, > we should not calculate the entire segment. > > Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> > --- > target/arm/vec_helper.c | 8 ++++++++ > 1 file changed, 8 insertions(+) > > diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c > index 7174030377..44b8165323 100644 > --- a/target/arm/vec_helper.c > +++ b/target/arm/vec_helper.c > @@ -544,6 +544,10 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, > neg_real <<= 15; > neg_imag <<= 15; > > + /* Adjust eltspersegment for simd 4H */ > + if (eltspersegment > elements) { > + eltspersegment = elements; > + } Ok. Maybe better to fold this back to the initialization using MIN. > for (i = 0; i < elements; i += eltspersegment) { > float16 mr = m[H2(i + 2 * index + 0)]; > float16 mi = m[H2(i + 2 * index + 1)]; > @@ -610,6 +614,10 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, > neg_real <<= 31; > neg_imag <<= 31; > > + /* Adjust eltspersegment for simd 4H */ > + if (eltspersegment > elements) { > + eltspersegment = elements; > + } Incorrect: this function only computes 4S. > for (i = 0; i < elements; i += eltspersegment) { > float32 mr = m[H4(i + 2 * index + 0)]; > float32 mi = m[H4(i + 2 * index + 1)]; > r~
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 7174030377..44b8165323 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -544,6 +544,10 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, neg_real <<= 15; neg_imag <<= 15; + /* Adjust eltspersegment for simd 4H */ + if (eltspersegment > elements) { + eltspersegment = elements; + } for (i = 0; i < elements; i += eltspersegment) { float16 mr = m[H2(i + 2 * index + 0)]; float16 mi = m[H2(i + 2 * index + 1)]; @@ -610,6 +614,10 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, neg_real <<= 31; neg_imag <<= 31; + /* Adjust eltspersegment for simd 4H */ + if (eltspersegment > elements) { + eltspersegment = elements; + } for (i = 0; i < elements; i += eltspersegment) { float32 mr = m[H4(i + 2 * index + 0)]; float32 mi = m[H4(i + 2 * index + 1)];
For SIMD fcmla(by element), if the number of elements is less than the number of elements within one segment,i.e. 4H arrangement, we should not calculate the entire segment. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> --- target/arm/vec_helper.c | 8 ++++++++ 1 file changed, 8 insertions(+)