diff mbox series

[v10,02/10] target/riscv: handle vstart >= vl in vext_set_tail_elems_1s()

Message ID 20240310115315.187283-3-dbarboza@ventanamicro.com (mailing list archive)
State New, archived
Headers show
Series riscv: set vstart_eq_zero on mark_vs_dirty | expand

Commit Message

Daniel Henrique Barboza March 10, 2024, 11:53 a.m. UTC
We're going to make changes that will required each helper to be
responsible for the 'vstart' management, i.e. we will relieve the
'vstart < vl' assumption that helpers have today.

To do that we'll need to deal with how we're updating tail elements
first. We can't update them if vstart >= vl, but at this moment we're
not guarding for it.

We have the vext_set_tail_elems_1s() helper to update tail elements.
Change it to accept an 'env' pointer, where we can read both vstart and
vl, and make it a no-op if vstart >= vl. Note that callers will need to
set env->start = 0 *after* the helper from now on.

The exception are three helpers: vext_ldst_stride(), vext_ldst_us() and
vext_ldst_index(). They are are incrementing env->vstart during
execution and will end up with env->vstart = vl when tail updating. For
these cases only, do an early check and exit if vstart >= vl, and set
env->vstart = 0 before updating the tail.

For everyone else we'll do vext_set_tail_elems_1s() and then clear
env->vstart. This is the case of vext_ldff() that is already using
set_tail_elems_1s(), and will be the case for the rest after the next
patches.

Let's also simplify the API a little by removing the 'nf' argument since
it can be derived from 'desc'.

Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/riscv/vector_helper.c | 59 ++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index ca79571ae2..a3b496b6e9 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -174,19 +174,32 @@  GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
 
-static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
-                                   uint32_t desc, uint32_t nf,
-                                   uint32_t esz, uint32_t max_elems)
+/*
+ * This function is sensitive to env->vstart changes since
+ * it'll be a no-op if vstart >= vl. Do not clear env->vstart
+ * before calling it unless you're certain that vstart < vl.
+ */
+static void vext_set_tail_elems_1s(CPURISCVState *env, void *vd,
+                                   uint32_t desc, uint32_t esz,
+                                   uint32_t max_elems)
 {
     uint32_t vta = vext_vta(desc);
+    uint32_t nf = vext_nf(desc);
     int k;
 
-    if (vta == 0) {
+    /*
+     * Section 5.4 of the RVV spec mentions:
+     * "When vstart ≥ vl, there are no body elements, and no
+     *  elements are updated in any destination vector register
+     *  group, including that no tail elements are updated
+     *  with agnostic values."
+     */
+    if (vta == 0 || env->vstart >= env->vl) {
         return;
     }
 
     for (k = 0; k < nf; ++k) {
-        vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
+        vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
                           (k * max_elems + max_elems) * esz);
     }
 }
@@ -207,6 +220,11 @@  vext_ldst_stride(void *vd, void *v0, target_ulong base,
     uint32_t esz = 1 << log2_esz;
     uint32_t vma = vext_vma(desc);
 
+    if (env->vstart >= env->vl) {
+        env->vstart = 0;
+        return;
+    }
+
     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
         k = 0;
         while (k < nf) {
@@ -222,9 +240,13 @@  vext_ldst_stride(void *vd, void *v0, target_ulong base,
             k++;
         }
     }
+    /*
+     * Set vstart before tail update - vstart changed during
+     * execution and we already checked that vstart < vl.
+     */
     env->vstart = 0;
 
-    vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
+    vext_set_tail_elems_1s(env, vd, desc, esz, max_elems);
 }
 
 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
@@ -272,6 +294,11 @@  vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
     uint32_t max_elems = vext_max_elems(desc, log2_esz);
     uint32_t esz = 1 << log2_esz;
 
+    if (env->vstart >= env->vl) {
+        env->vstart = 0;
+        return;
+    }
+
     /* load bytes from guest memory */
     for (i = env->vstart; i < evl; i++, env->vstart++) {
         k = 0;
@@ -281,9 +308,13 @@  vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
             k++;
         }
     }
+    /*
+     * Set vstart before tail update - vstart changed during
+     * execution and we already checked that vstart < vl.
+     */
     env->vstart = 0;
 
-    vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
+    vext_set_tail_elems_1s(env, vd, desc, esz, max_elems);
 }
 
 /*
@@ -386,6 +417,11 @@  vext_ldst_index(void *vd, void *v0, target_ulong base,
     uint32_t esz = 1 << log2_esz;
     uint32_t vma = vext_vma(desc);
 
+    if (env->vstart >= env->vl) {
+        env->vstart = 0;
+        return;
+    }
+
     /* load bytes from guest memory */
     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
         k = 0;
@@ -402,9 +438,13 @@  vext_ldst_index(void *vd, void *v0, target_ulong base,
             k++;
         }
     }
+    /*
+     * Set vstart before tail update - vstart changed during
+     * execution and we already checked that vstart < vl.
+     */
     env->vstart = 0;
 
-    vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
+    vext_set_tail_elems_1s(env, vd, desc, esz, max_elems);
 }
 
 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
@@ -532,9 +572,8 @@  ProbeSuccess:
             k++;
         }
     }
+    vext_set_tail_elems_1s(env, vd, desc, esz, max_elems);
     env->vstart = 0;
-
-    vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
 }
 
 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \