diff mbox series

[11/19] target/arm: Replace ARM_FEATURE_VFP4 with isar_feature_aa32_simdfmac

Message ID 20200214181547.21408-12-richard.henderson@linaro.org (mailing list archive)
State New, archived
Headers show
Series target/arm: vfp feature and decodetree cleanup | expand

Commit Message

Richard Henderson Feb. 14, 2020, 6:15 p.m. UTC
All remaining tests for VFP4 are for fused multiply-add insns.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.h               |  5 +++++
 target/arm/translate-vfp.inc.c | 12 ++++++++----
 target/arm/translate.c         |  2 +-
 3 files changed, 14 insertions(+), 5 deletions(-)

Comments

Peter Maydell Feb. 20, 2020, 4:37 p.m. UTC | #1
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> All remaining tests for VFP4 are for fused multiply-add insns.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/cpu.h               |  5 +++++
>  target/arm/translate-vfp.inc.c | 12 ++++++++----
>  target/arm/translate.c         |  2 +-
>  3 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 4ff28418df..f27b8e35df 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -3468,6 +3468,11 @@ static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)
>      return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;
>  }
>
> +static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id)
> +{
> +    return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0;
> +}

This is tricky, because the SIMDFMAC register
field indicates "do we have fused-multiply-accumulate
for either VFP or Neon", so in a VFP-no-Neon core or
a Neon-no-VFP core it will be 1 but can't be used on its
own as a gate on "should this insn be present".

Currently in the part of arm_cpu_realize() which handles
the user having selected vfp=off and/or neon=off we
do allow (for AArch32 cores) both of those combinations.

trans_VFM_dp already tests aa32_fpdp_v2, so I think the
main thing we need to do is add a test on aa32_fpsp_v2 to
trans_VFM_sp.

We clear the SIMDFMAC field to 0 in the !has_neon condition,
and I think that should actually be in the !neon && !vfp part.

I propose to squash in the following and beef up the commit message:

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index f641478fc80..d4c73a20b6a 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3506,6 +3506,13 @@ static inline bool
isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)
     return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;
 }

+/*
+ * Note that this ID register field covers both VFP and Neon FMAC,
+ * so should usually be tested in combination with some other
+ * check that confirms the presence of whichever of VFP or Neon is
+ * relevant, to avoid accidentally enabling a Neon feature on
+ * a VFP-no-Neon core or vice-versa.
+ */
 static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id)
 {
     return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0;
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index d5a75c265ac..95ada81ebae 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1510,7 +1510,6 @@ static void arm_cpu_realizefn(DeviceState *dev,
Error **errp)
         u = FIELD_DP32(u, MVFR1, SIMDINT, 0);
         u = FIELD_DP32(u, MVFR1, SIMDSP, 0);
         u = FIELD_DP32(u, MVFR1, SIMDHP, 0);
-        u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0);
         cpu->isar.mvfr1 = u;

         u = cpu->isar.mvfr2;
@@ -1533,6 +1532,11 @@ static void arm_cpu_realizefn(DeviceState *dev,
Error **errp)
         u = cpu->isar.mvfr0;
         u = FIELD_DP32(u, MVFR0, SIMDREG, 0);
         cpu->isar.mvfr0 = u;
+
+        /* Despite the name, this field covers both VFP and Neon */
+        u = cpu->isar.mvfr1;
+        u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0);
+        cpu->isar.mvfr1;
     }

     if (arm_feature(env, ARM_FEATURE_M) && !cpu->has_dsp) {
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index f6f7601fe2a..69052d840a4 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -1805,8 +1805,13 @@ static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
      * Present in VFPv4 only.
      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+     * Note that we can't rely on the SIMDFMAC check alone, because
+     * in a Neon-no-VFP core that ID register field will be non-zero.
      */
-    if (!dc_isar_feature(aa32_simdfmac, s)) {
+    if (!dc_isar_feature(aa32_simdfmac, s) ||
+        !dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
         return false;
     }
     if (s->vec_len != 0 || s->vec_stride != 0) {


thanks
-- PMM
Peter Maydell Feb. 20, 2020, 4:41 p.m. UTC | #2
On Thu, 20 Feb 2020 at 16:37, Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Fri, 14 Feb 2020 at 18:16, Richard Henderson
> <richard.henderson@linaro.org> wrote:
> >
> > All remaining tests for VFP4 are for fused multiply-add insns.
> >
> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> > ---
> >  target/arm/cpu.h               |  5 +++++
> >  target/arm/translate-vfp.inc.c | 12 ++++++++----
> >  target/arm/translate.c         |  2 +-
> >  3 files changed, 14 insertions(+), 5 deletions(-)
> >
> > diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> > index 4ff28418df..f27b8e35df 100644
> > --- a/target/arm/cpu.h
> > +++ b/target/arm/cpu.h
> > @@ -3468,6 +3468,11 @@ static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)
> >      return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;
> >  }
> >
> > +static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id)
> > +{
> > +    return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0;
> > +}
>
> This is tricky, because the SIMDFMAC register
> field indicates "do we have fused-multiply-accumulate
> for either VFP or Neon", so in a VFP-no-Neon core or
> a Neon-no-VFP core it will be 1 but can't be used on its
> own as a gate on "should this insn be present".
>
> Currently in the part of arm_cpu_realize() which handles
> the user having selected vfp=off and/or neon=off we
> do allow (for AArch32 cores) both of those combinations.
>
> trans_VFM_dp already tests aa32_fpdp_v2, so I think the
> main thing we need to do is add a test on aa32_fpsp_v2 to
> trans_VFM_sp.
>
> We clear the SIMDFMAC field to 0 in the !has_neon condition,
> and I think that should actually be in the !neon && !vfp part.
>
> I propose to squash in the following and beef up the commit message:
>

> +        /* Despite the name, this field covers both VFP and Neon */
> +        u = cpu->isar.mvfr1;
> +        u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0);
> +        cpu->isar.mvfr1;

 ... "cpu->isar.mvfr1 = u;", obviously.


> -    if (!dc_isar_feature(aa32_simdfmac, s)) {
> +    if (!dc_isar_feature(aa32_simdfmac, s) ||
> +        !dc_isar_feature(aa32_fpsp_v2, s)) {
> +        return false;
> +    }

and not that extra "}".

-- PMM
Richard Henderson Feb. 20, 2020, 5:55 p.m. UTC | #3
On 2/20/20 8:37 AM, Peter Maydell wrote:
> This is tricky, because the SIMDFMAC register
> field indicates "do we have fused-multiply-accumulate
> for either VFP or Neon", so in a VFP-no-Neon core or
> a Neon-no-VFP core it will be 1 but can't be used on its
> own as a gate on "should this insn be present".
> 
> Currently in the part of arm_cpu_realize() which handles
> the user having selected vfp=off and/or neon=off we
> do allow (for AArch32 cores) both of those combinations.
> 
> trans_VFM_dp already tests aa32_fpdp_v2, so I think the
> main thing we need to do is add a test on aa32_fpsp_v2 to
> trans_VFM_sp.
> 
> We clear the SIMDFMAC field to 0 in the !has_neon condition,
> and I think that should actually be in the !neon && !vfp part.
> 
> I propose to squash in the following and beef up the commit message:

Good catch.  Makes sense.


r~
diff mbox series

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 4ff28418df..f27b8e35df 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3468,6 +3468,11 @@  static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)
     return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;
 }
 
+static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0;
+}
+
 static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id)
 {
     return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 1;
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index 8913320259..f6f7601fe2 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -1806,8 +1806,10 @@  static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
      */
-    if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
-        (s->vec_len != 0 || s->vec_stride != 0)) {
+    if (!dc_isar_feature(aa32_simdfmac, s)) {
+        return false;
+    }
+    if (s->vec_len != 0 || s->vec_stride != 0) {
         return false;
     }
 
@@ -1864,8 +1866,10 @@  static bool trans_VFM_dp(DisasContext *s, arg_VFM_dp *a)
      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
      */
-    if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
-        (s->vec_len != 0 || s->vec_stride != 0)) {
+    if (!dc_isar_feature(aa32_simdfmac, s)) {
+        return false;
+    }
+    if (s->vec_len != 0 || s->vec_stride != 0) {
         return false;
     }
 
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 3b9bf13933..0da780102c 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -4877,7 +4877,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             }
             break;
         case NEON_3R_VFM_VQRDMLSH:
-            if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
+            if (!dc_isar_feature(aa32_simdfmac, s)) {
                 return 1;
             }
             break;