diff mbox series

[3/4] target/arm: Fixup SIMD fcmla(by element) in 4H arrangement

Message ID 20201207044655.2312-4-zhiwei_liu@c-sky.com (mailing list archive)
State New, archived
Headers show
Series target/arm bug fix | expand

Commit Message

LIU Zhiwei Dec. 7, 2020, 4:46 a.m. UTC
For SIMD fcmla(by element), if the number of elements is less than
the number of elements within one segment,i.e. 4H arrangement,
we should not calculate the entire segment.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/arm/vec_helper.c | 8 ++++++++
 1 file changed, 8 insertions(+)

Comments

Richard Henderson Dec. 8, 2020, 9:04 p.m. UTC | #1
On 12/6/20 10:46 PM, LIU Zhiwei wrote:
> For SIMD fcmla(by element), if the number of elements is less than
> the number of elements within one segment,i.e. 4H arrangement,
> we should not calculate the entire segment.
> 
> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
> ---
>  target/arm/vec_helper.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
> index 7174030377..44b8165323 100644
> --- a/target/arm/vec_helper.c
> +++ b/target/arm/vec_helper.c
> @@ -544,6 +544,10 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm,
>      neg_real <<= 15;
>      neg_imag <<= 15;
>  
> +    /* Adjust eltspersegment for simd 4H */
> +    if (eltspersegment > elements) {
> +        eltspersegment = elements;
> +    }

Ok.  Maybe better to fold this back to the initialization using MIN.

>      for (i = 0; i < elements; i += eltspersegment) {
>          float16 mr = m[H2(i + 2 * index + 0)];
>          float16 mi = m[H2(i + 2 * index + 1)];
> @@ -610,6 +614,10 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm,
>      neg_real <<= 31;
>      neg_imag <<= 31;
>  
> +    /* Adjust eltspersegment for simd 4H */
> +    if (eltspersegment > elements) {
> +        eltspersegment = elements;
> +    }

Incorrect: this function only computes 4S.

>      for (i = 0; i < elements; i += eltspersegment) {
>          float32 mr = m[H4(i + 2 * index + 0)];
>          float32 mi = m[H4(i + 2 * index + 1)];
> 


r~
diff mbox series

Patch

diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 7174030377..44b8165323 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -544,6 +544,10 @@  void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm,
     neg_real <<= 15;
     neg_imag <<= 15;
 
+    /* Adjust eltspersegment for simd 4H */
+    if (eltspersegment > elements) {
+        eltspersegment = elements;
+    }
     for (i = 0; i < elements; i += eltspersegment) {
         float16 mr = m[H2(i + 2 * index + 0)];
         float16 mi = m[H2(i + 2 * index + 1)];
@@ -610,6 +614,10 @@  void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm,
     neg_real <<= 31;
     neg_imag <<= 31;
 
+    /* Adjust eltspersegment for simd 4H */
+    if (eltspersegment > elements) {
+        eltspersegment = elements;
+    }
     for (i = 0; i < elements; i += eltspersegment) {
         float32 mr = m[H4(i + 2 * index + 0)];
         float32 mi = m[H4(i + 2 * index + 1)];