diff mbox series

[RFC,20/65] target/riscv: rvv-0.9: update vext_max_elems() for load/store insns

Message ID 20200710104920.13550-21-frank.chang@sifive.com (mailing list archive)
State New, archived
Headers show
Series target/riscv: support vector extension v0.9 | expand

Commit Message

Frank Chang July 10, 2020, 10:48 a.m. UTC
From: Frank Chang <frank.chang@sifive.com>

Signed-off-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/vector_helper.c | 118 ++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 50 deletions(-)
diff mbox series

Patch

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 39b9a462ab..2a006f956c 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -129,14 +129,32 @@  static inline uint32_t vext_vma(uint32_t desc)
 }
 
 /*
- * Get vector group length in bytes. Its range is [64, 2048].
- *
- * As simd_desc support at most 256, the max vlen is 512 bits.
- * So vlen in bytes is encoded as maxsz.
+ * Get the maximum number of elements can be operated.
  */
-static inline uint32_t vext_maxsz(uint32_t desc)
+static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz, bool is_ldst)
 {
-    return simd_maxsz(desc) << vext_lmul(desc);
+    /*
+     * As simd_desc support at most 256, the max vlen is 512 bits,
+     * so vlen in bytes (vlenb) is encoded as maxsz.
+     */
+    uint32_t vlenb = simd_maxsz(desc);
+
+    if (is_ldst) {
+        /*
+         * Vector load/store instructions have the EEW encoded
+         * directly in the instructions. The maximum vector size is
+         * calculated with EMUL rather than LMUL.
+         */
+        uint32_t eew = esz << 3;
+        uint32_t sew = vext_sew(desc);
+        float flmul = vext_vflmul(desc);
+        float emul = (float)eew / sew * flmul;
+        uint32_t emul_r = emul < 1 ? 1 : emul;
+        return vlenb * emul_r / esz;
+    } else {
+        /* Return VLMAX */
+        return vlenb * vext_vflmul(desc) / esz;
+    }
 }
 
 /*
@@ -296,7 +314,7 @@  vext_ldst_stride(void *vd, void *v0, target_ulong base,
 {
     uint32_t i, k;
     uint32_t nf = vext_nf(desc);
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t max_elems = vext_max_elems(desc, esz, true);
     uint32_t vta = vext_vta(desc);
 
     /* probe every access*/
@@ -314,15 +332,15 @@  vext_ldst_stride(void *vd, void *v0, target_ulong base,
         }
         while (k < nf) {
             target_ulong addr = base + stride * i + k * esz;
-            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            ldst_elem(env, addr, i + k * max_elems, vd, ra);
             k++;
         }
     }
     /* clear tail elements */
     if (clear_elem) {
         for (k = 0; k < nf; k++) {
-            clear_elem(vd, vta, env->vl + k * vlmax,
-                       env->vl * esz, vlmax * esz);
+            clear_elem(vd, vta, env->vl + k * max_elems,
+                       env->vl * esz, max_elems * esz);
         }
     }
 }
@@ -371,7 +389,7 @@  vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
 {
     uint32_t i, k;
     uint32_t nf = vext_nf(desc);
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t max_elems = vext_max_elems(desc, esz, true);
     uint32_t vta = vext_vta(desc);
 
     /* probe every access */
@@ -381,15 +399,15 @@  vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
         k = 0;
         while (k < nf) {
             target_ulong addr = base + (i * nf + k) * esz;
-            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            ldst_elem(env, addr, i + k * max_elems, vd, ra);
             k++;
         }
     }
     /* clear tail elements */
     if (clear_elem) {
         for (k = 0; k < nf; k++) {
-            clear_elem(vd, vta, env->vl + k * vlmax,
-                       env->vl * esz, vlmax * esz);
+            clear_elem(vd, vta, env->vl + k * max_elems,
+                       env->vl * esz, max_elems * esz);
         }
     }
 }
@@ -472,7 +490,7 @@  vext_ldst_index(void *vd, void *v0, target_ulong base,
     uint32_t i, k;
     uint32_t nf = vext_nf(desc);
     uint32_t vm = vext_vm(desc);
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t max_elems = vext_max_elems(desc, esz, true);
     uint32_t vta = vext_vta(desc);
 
     /* probe every access*/
@@ -491,15 +509,15 @@  vext_ldst_index(void *vd, void *v0, target_ulong base,
         }
         while (k < nf) {
             abi_ptr addr = get_index_addr(base, i, vs2) + k * esz;
-            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            ldst_elem(env, addr, i + k * max_elems, vd, ra);
             k++;
         }
     }
     /* clear tail elements */
     if (clear_elem) {
         for (k = 0; k < nf; k++) {
-            clear_elem(vd, vta, env->vl + k * vlmax,
-                       env->vl * esz, vlmax * esz);
+            clear_elem(vd, vta, env->vl + k * max_elems,
+                       env->vl * esz, max_elems * esz);
         }
     }
 }
@@ -570,7 +588,7 @@  vext_ldff(void *vd, void *v0, target_ulong base,
     uint32_t i, k, vl = 0;
     uint32_t nf = vext_nf(desc);
     uint32_t vm = vext_vm(desc);
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t max_elems = vext_max_elems(desc, esz, true);
     uint32_t vta = vext_vta(desc);
     target_ulong addr, offset, remain;
 
@@ -622,7 +640,7 @@  ProbeSuccess:
         }
         while (k < nf) {
             target_ulong addr = base + (i * nf + k) * esz;
-            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            ldst_elem(env, addr, i + k * max_elems, vd, ra);
             k++;
         }
     }
@@ -631,8 +649,8 @@  ProbeSuccess:
         return;
     }
     for (k = 0; k < nf; k++) {
-        clear_elem(vd, vta, env->vl + k * vlmax,
-                   env->vl * esz, vlmax * esz);
+        clear_elem(vd, vta, env->vl + k * max_elems,
+                   env->vl * esz, max_elems * esz);
     }
 }
 
@@ -659,7 +677,7 @@  vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
 {
     uint32_t i, k;
     uint32_t nf = vext_nf(desc);
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t max_elems = vext_max_elems(desc, esz, true);
 
     /* probe every access */
     probe_pages(env, base, env->vlenb * nf * esz, ra, access_type);
@@ -669,7 +687,7 @@  vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
         k = 0;
         while (k < nf) {
             target_ulong addr = base + (i * nf + k) * esz;
-            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            ldst_elem(env, addr, i + k * max_elems, vd, ra);
             k++;
         }
     }
@@ -812,7 +830,7 @@  vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
     target_long addr;
     uint32_t wd = vext_wd(desc);
     uint32_t vm = vext_vm(desc);
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t vlmax = vext_max_elems(desc, esz, false);
     uint32_t vta = vext_vta(desc);
 
     for (i = 0; i < env->vl; i++) {
@@ -983,7 +1001,7 @@  static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
                        uint32_t esz, uint32_t dsz,
                        opivv2_fn *fn, clear_fn *clearfn)
 {
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t vlmax = vext_max_elems(desc, esz, false);
     uint32_t vm = vext_vm(desc);
     uint32_t vta = vext_vta(desc);
     uint32_t vl = env->vl;
@@ -995,7 +1013,7 @@  static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
         }
         fn(vd, vs1, vs2, i);
     }
-    clearfn(vd, vta, vl, vl * dsz,  vlmax * dsz);
+    clearfn(vd, vta, vl, vl * dsz, vlmax * dsz);
 }
 
 /* generate the helpers for OPIVV */
@@ -1048,7 +1066,7 @@  static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
                        uint32_t esz, uint32_t dsz,
                        opivx2_fn fn, clear_fn *clearfn)
 {
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t vlmax = vext_max_elems(desc, esz, false);
     uint32_t vm = vext_vm(desc);
     uint32_t vta = vext_vta(desc);
     uint32_t vl = env->vl;
@@ -1060,7 +1078,7 @@  static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
         }
         fn(vd, s1, vs2, i);
     }
-    clearfn(vd, vta, vl, vl * dsz,  vlmax * dsz);
+    clearfn(vd, vta, vl, vl * dsz, vlmax * dsz);
 }
 
 /* generate the helpers for OPIVX */
@@ -1247,7 +1265,7 @@  void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
 {                                                             \
     uint32_t vl = env->vl;                                    \
     uint32_t esz = sizeof(ETYPE);                             \
-    uint32_t vlmax = vext_maxsz(desc) / esz;                  \
+    uint32_t vlmax = vext_max_elems(desc, esz, false);        \
     uint32_t vta = vext_vta(desc);                            \
     uint32_t i;                                               \
                                                               \
@@ -1277,7 +1295,7 @@  void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
 {                                                                        \
     uint32_t vl = env->vl;                                               \
     uint32_t esz = sizeof(ETYPE);                                        \
-    uint32_t vlmax = vext_maxsz(desc) / esz;                             \
+    uint32_t vlmax = vext_max_elems(desc, esz, false);                   \
     uint32_t vta = vext_vta(desc);                                       \
     uint32_t i;                                                          \
                                                                          \
@@ -1339,7 +1357,7 @@  void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
                   void *vs2, CPURISCVState *env, uint32_t desc) \
 {                                                               \
     uint32_t vl = env->vl;                                      \
-    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);          \
+    uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false);\
     uint32_t i;                                                 \
                                                                 \
     for (i = 0; i < vl; i++) {                                  \
@@ -1427,7 +1445,7 @@  void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
     uint32_t vm = vext_vm(desc);                                          \
     uint32_t vl = env->vl;                                                \
     uint32_t esz = sizeof(TS1);                                           \
-    uint32_t vlmax = vext_maxsz(desc) / esz;                              \
+    uint32_t vlmax = vext_max_elems(desc, esz, false);                    \
     uint32_t vta = vext_vta(desc);                                        \
     uint32_t i;                                                           \
                                                                           \
@@ -1465,7 +1483,7 @@  void HELPER(NAME)(void *vd, void *v0, target_ulong s1,                \
     uint32_t vm = vext_vm(desc);                                      \
     uint32_t vl = env->vl;                                            \
     uint32_t esz = sizeof(TD);                                        \
-    uint32_t vlmax = vext_maxsz(desc) / esz;                          \
+    uint32_t vlmax = vext_max_elems(desc, esz, false);                \
     uint32_t vta = vext_vta(desc);                                    \
     uint32_t i;                                                       \
                                                                       \
@@ -2108,7 +2126,7 @@  void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
 {                                                                    \
     uint32_t vl = env->vl;                                           \
     uint32_t esz = sizeof(ETYPE);                                    \
-    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t vlmax = vext_max_elems(desc, esz, false);               \
     uint32_t vta = vext_vta(desc);                                   \
     uint32_t i;                                                      \
                                                                      \
@@ -2130,7 +2148,7 @@  void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
 {                                                                    \
     uint32_t vl = env->vl;                                           \
     uint32_t esz = sizeof(ETYPE);                                    \
-    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t vlmax = vext_max_elems(desc, esz, false);               \
     uint32_t vta = vext_vta(desc);                                   \
     uint32_t i;                                                      \
                                                                      \
@@ -2151,7 +2169,7 @@  void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
 {                                                                    \
     uint32_t vl = env->vl;                                           \
     uint32_t esz = sizeof(ETYPE);                                    \
-    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t vlmax = vext_max_elems(desc, esz, false);               \
     uint32_t vta = vext_vta(desc);                                   \
     uint32_t i;                                                      \
                                                                      \
@@ -2173,7 +2191,7 @@  void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
 {                                                                    \
     uint32_t vl = env->vl;                                           \
     uint32_t esz = sizeof(ETYPE);                                    \
-    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
+    uint32_t vlmax = vext_max_elems(desc, esz, false);               \
     uint32_t vta = vext_vta(desc);                                   \
     uint32_t i;                                                      \
                                                                      \
@@ -2234,7 +2252,7 @@  vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
              uint32_t desc, uint32_t esz, uint32_t dsz,
              opivv2_rm_fn *fn, clear_fn *clearfn)
 {
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t vlmax = vext_max_elems(desc, esz, false);
     uint32_t vm = vext_vm(desc);
     uint32_t vta = vext_vta(desc);
     uint32_t vl = env->vl;
@@ -2354,7 +2372,7 @@  vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
              uint32_t desc, uint32_t esz, uint32_t dsz,
              opivx2_rm_fn *fn, clear_fn *clearfn)
 {
-    uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t vlmax = vext_max_elems(desc, esz, false);
     uint32_t vm = vext_vm(desc);
     uint32_t vta = vext_vta(desc);
     uint32_t vl = env->vl;
@@ -3258,7 +3276,7 @@  void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
                   void *vs2, CPURISCVState *env,          \
                   uint32_t desc)                          \
 {                                                         \
-    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
+    uint32_t vlmax = vext_max_elems(desc, ESZ, false);    \
     uint32_t vm = vext_vm(desc);                          \
     uint32_t vta = vext_vta(desc);                        \
     uint32_t vl = env->vl;                                \
@@ -3293,7 +3311,7 @@  void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
                   void *vs2, CPURISCVState *env,          \
                   uint32_t desc)                          \
 {                                                         \
-    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
+    uint32_t vlmax = vext_max_elems(desc, ESZ, false);    \
     uint32_t vm = vext_vm(desc);                          \
     uint32_t vta = vext_vta(desc);                        \
     uint32_t vl = env->vl;                                \
@@ -3864,7 +3882,7 @@  static void do_##NAME(void *vd, void *vs2, int i,      \
 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
         CPURISCVState *env, uint32_t desc)             \
 {                                                      \
-    uint32_t vlmax = vext_maxsz(desc) / ESZ;           \
+    uint32_t vlmax = vext_max_elems(desc, ESZ, false); \
     uint32_t vm = vext_vm(desc);                       \
     uint32_t vta = vext_vta(desc);                     \
     uint32_t vl = env->vl;                             \
@@ -4041,7 +4059,7 @@  void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
 {                                                                   \
     uint32_t vm = vext_vm(desc);                                    \
     uint32_t vl = env->vl;                                          \
-    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
+    uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false);    \
     uint32_t i;                                                     \
                                                                     \
     for (i = 0; i < vl; i++) {                                      \
@@ -4185,7 +4203,7 @@  static void do_##NAME(void *vd, void *vs2, int i)      \
 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
                   CPURISCVState *env, uint32_t desc)   \
 {                                                      \
-    uint32_t vlmax = vext_maxsz(desc) / ESZ;           \
+    uint32_t vlmax = vext_max_elems(desc, ESZ, false); \
     uint32_t vm = vext_vm(desc);                       \
     uint32_t vta = vext_vta(desc);                     \
     uint32_t vl = env->vl;                             \
@@ -4272,7 +4290,7 @@  void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
     uint32_t vm = vext_vm(desc);                              \
     uint32_t vl = env->vl;                                    \
     uint32_t esz = sizeof(ETYPE);                             \
-    uint32_t vlmax = vext_maxsz(desc) / esz;                  \
+    uint32_t vlmax = vext_max_elems(desc, esz, false);        \
     uint32_t vta = vext_vta(desc);                            \
     uint32_t i;                                               \
                                                               \
@@ -4772,7 +4790,7 @@  GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq)
 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
                   CPURISCVState *env, uint32_t desc)                      \
 {                                                                         \
-    uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
+    uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false);          \
     uint32_t vm = vext_vm(desc);                                          \
     uint32_t vta = vext_vta(desc);                                        \
     uint32_t vl = env->vl;                                                \
@@ -4882,7 +4900,7 @@  GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq)
 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
                   CPURISCVState *env, uint32_t desc)                      \
 {                                                                         \
-    uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
+    uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false);          \
     uint32_t vm = vext_vm(desc);                                          \
     uint32_t vta = vext_vta(desc);                                        \
     uint32_t vl = env->vl;                                                \
@@ -4912,7 +4930,7 @@  GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq)
 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
                   CPURISCVState *env, uint32_t desc)                      \
 {                                                                         \
-    uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
+    uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false);          \
     uint32_t vm = vext_vm(desc);                                          \
     uint32_t vta = vext_vta(desc);                                        \
     uint32_t vl = env->vl;                                                \
@@ -4942,7 +4960,7 @@  GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq)
 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
                   CPURISCVState *env, uint32_t desc)                      \
 {                                                                         \
-    uint32_t vlmax = env_archcpu(env)->cfg.vlen;                          \
+    uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false);          \
     uint32_t vta = vext_vta(desc);                                        \
     uint32_t vl = env->vl;                                                \
     uint32_t num = 0, i;                                                  \