@@ -129,14 +129,32 @@ static inline uint32_t vext_vma(uint32_t desc)
}
/*
- * Get vector group length in bytes. Its range is [64, 2048].
- *
- * As simd_desc support at most 256, the max vlen is 512 bits.
- * So vlen in bytes is encoded as maxsz.
+ * Get the maximum number of elements can be operated.
*/
-static inline uint32_t vext_maxsz(uint32_t desc)
+static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz, bool is_ldst)
{
- return simd_maxsz(desc) << vext_lmul(desc);
+ /*
+ * As simd_desc support at most 256, the max vlen is 512 bits,
+ * so vlen in bytes (vlenb) is encoded as maxsz.
+ */
+ uint32_t vlenb = simd_maxsz(desc);
+
+ if (is_ldst) {
+ /*
+ * Vector load/store instructions have the EEW encoded
+ * directly in the instructions. The maximum vector size is
+ * calculated with EMUL rather than LMUL.
+ */
+ uint32_t eew = esz << 3;
+ uint32_t sew = vext_sew(desc);
+ float flmul = vext_vflmul(desc);
+ float emul = (float)eew / sew * flmul;
+ uint32_t emul_r = emul < 1 ? 1 : emul;
+ return vlenb * emul_r / esz;
+ } else {
+ /* Return VLMAX */
+ return vlenb * vext_vflmul(desc) / esz;
+ }
}
/*
@@ -296,7 +314,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
{
uint32_t i, k;
uint32_t nf = vext_nf(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t max_elems = vext_max_elems(desc, esz, true);
uint32_t vta = vext_vta(desc);
/* probe every access*/
@@ -314,15 +332,15 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
}
while (k < nf) {
target_ulong addr = base + stride * i + k * esz;
- ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
k++;
}
}
/* clear tail elements */
if (clear_elem) {
for (k = 0; k < nf; k++) {
- clear_elem(vd, vta, env->vl + k * vlmax,
- env->vl * esz, vlmax * esz);
+ clear_elem(vd, vta, env->vl + k * max_elems,
+ env->vl * esz, max_elems * esz);
}
}
}
@@ -371,7 +389,7 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
{
uint32_t i, k;
uint32_t nf = vext_nf(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t max_elems = vext_max_elems(desc, esz, true);
uint32_t vta = vext_vta(desc);
/* probe every access */
@@ -381,15 +399,15 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
k = 0;
while (k < nf) {
target_ulong addr = base + (i * nf + k) * esz;
- ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
k++;
}
}
/* clear tail elements */
if (clear_elem) {
for (k = 0; k < nf; k++) {
- clear_elem(vd, vta, env->vl + k * vlmax,
- env->vl * esz, vlmax * esz);
+ clear_elem(vd, vta, env->vl + k * max_elems,
+ env->vl * esz, max_elems * esz);
}
}
}
@@ -472,7 +490,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
uint32_t i, k;
uint32_t nf = vext_nf(desc);
uint32_t vm = vext_vm(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t max_elems = vext_max_elems(desc, esz, true);
uint32_t vta = vext_vta(desc);
/* probe every access*/
@@ -491,15 +509,15 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
}
while (k < nf) {
abi_ptr addr = get_index_addr(base, i, vs2) + k * esz;
- ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
k++;
}
}
/* clear tail elements */
if (clear_elem) {
for (k = 0; k < nf; k++) {
- clear_elem(vd, vta, env->vl + k * vlmax,
- env->vl * esz, vlmax * esz);
+ clear_elem(vd, vta, env->vl + k * max_elems,
+ env->vl * esz, max_elems * esz);
}
}
}
@@ -570,7 +588,7 @@ vext_ldff(void *vd, void *v0, target_ulong base,
uint32_t i, k, vl = 0;
uint32_t nf = vext_nf(desc);
uint32_t vm = vext_vm(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t max_elems = vext_max_elems(desc, esz, true);
uint32_t vta = vext_vta(desc);
target_ulong addr, offset, remain;
@@ -622,7 +640,7 @@ ProbeSuccess:
}
while (k < nf) {
target_ulong addr = base + (i * nf + k) * esz;
- ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
k++;
}
}
@@ -631,8 +649,8 @@ ProbeSuccess:
return;
}
for (k = 0; k < nf; k++) {
- clear_elem(vd, vta, env->vl + k * vlmax,
- env->vl * esz, vlmax * esz);
+ clear_elem(vd, vta, env->vl + k * max_elems,
+ env->vl * esz, max_elems * esz);
}
}
@@ -659,7 +677,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
{
uint32_t i, k;
uint32_t nf = vext_nf(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t max_elems = vext_max_elems(desc, esz, true);
/* probe every access */
probe_pages(env, base, env->vlenb * nf * esz, ra, access_type);
@@ -669,7 +687,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
k = 0;
while (k < nf) {
target_ulong addr = base + (i * nf + k) * esz;
- ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ ldst_elem(env, addr, i + k * max_elems, vd, ra);
k++;
}
}
@@ -812,7 +830,7 @@ vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
target_long addr;
uint32_t wd = vext_wd(desc);
uint32_t vm = vext_vm(desc);
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t vlmax = vext_max_elems(desc, esz, false);
uint32_t vta = vext_vta(desc);
for (i = 0; i < env->vl; i++) {
@@ -983,7 +1001,7 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
uint32_t esz, uint32_t dsz,
opivv2_fn *fn, clear_fn *clearfn)
{
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t vlmax = vext_max_elems(desc, esz, false);
uint32_t vm = vext_vm(desc);
uint32_t vta = vext_vta(desc);
uint32_t vl = env->vl;
@@ -995,7 +1013,7 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
}
fn(vd, vs1, vs2, i);
}
- clearfn(vd, vta, vl, vl * dsz, vlmax * dsz);
+ clearfn(vd, vta, vl, vl * dsz, vlmax * dsz);
}
/* generate the helpers for OPIVV */
@@ -1048,7 +1066,7 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
uint32_t esz, uint32_t dsz,
opivx2_fn fn, clear_fn *clearfn)
{
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t vlmax = vext_max_elems(desc, esz, false);
uint32_t vm = vext_vm(desc);
uint32_t vta = vext_vta(desc);
uint32_t vl = env->vl;
@@ -1060,7 +1078,7 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
}
fn(vd, s1, vs2, i);
}
- clearfn(vd, vta, vl, vl * dsz, vlmax * dsz);
+ clearfn(vd, vta, vl, vl * dsz, vlmax * dsz);
}
/* generate the helpers for OPIVX */
@@ -1247,7 +1265,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
{ \
uint32_t vl = env->vl; \
uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
+ uint32_t vlmax = vext_max_elems(desc, esz, false); \
uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
@@ -1277,7 +1295,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
{ \
uint32_t vl = env->vl; \
uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
+ uint32_t vlmax = vext_max_elems(desc, esz, false); \
uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
@@ -1339,7 +1357,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vl = env->vl; \
- uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
+ uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false);\
uint32_t i; \
\
for (i = 0; i < vl; i++) { \
@@ -1427,7 +1445,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t esz = sizeof(TS1); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
+ uint32_t vlmax = vext_max_elems(desc, esz, false); \
uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
@@ -1465,7 +1483,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t esz = sizeof(TD); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
+ uint32_t vlmax = vext_max_elems(desc, esz, false); \
uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
@@ -2108,7 +2126,7 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
{ \
uint32_t vl = env->vl; \
uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
+ uint32_t vlmax = vext_max_elems(desc, esz, false); \
uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
@@ -2130,7 +2148,7 @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
{ \
uint32_t vl = env->vl; \
uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
+ uint32_t vlmax = vext_max_elems(desc, esz, false); \
uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
@@ -2151,7 +2169,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
{ \
uint32_t vl = env->vl; \
uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
+ uint32_t vlmax = vext_max_elems(desc, esz, false); \
uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
@@ -2173,7 +2191,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
{ \
uint32_t vl = env->vl; \
uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
+ uint32_t vlmax = vext_max_elems(desc, esz, false); \
uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
@@ -2234,7 +2252,7 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
uint32_t desc, uint32_t esz, uint32_t dsz,
opivv2_rm_fn *fn, clear_fn *clearfn)
{
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t vlmax = vext_max_elems(desc, esz, false);
uint32_t vm = vext_vm(desc);
uint32_t vta = vext_vta(desc);
uint32_t vl = env->vl;
@@ -2354,7 +2372,7 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
uint32_t desc, uint32_t esz, uint32_t dsz,
opivx2_rm_fn *fn, clear_fn *clearfn)
{
- uint32_t vlmax = vext_maxsz(desc) / esz;
+ uint32_t vlmax = vext_max_elems(desc, esz, false);
uint32_t vm = vext_vm(desc);
uint32_t vta = vext_vta(desc);
uint32_t vl = env->vl;
@@ -3258,7 +3276,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
- uint32_t vlmax = vext_maxsz(desc) / ESZ; \
+ uint32_t vlmax = vext_max_elems(desc, ESZ, false); \
uint32_t vm = vext_vm(desc); \
uint32_t vta = vext_vta(desc); \
uint32_t vl = env->vl; \
@@ -3293,7 +3311,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
- uint32_t vlmax = vext_maxsz(desc) / ESZ; \
+ uint32_t vlmax = vext_max_elems(desc, ESZ, false); \
uint32_t vm = vext_vm(desc); \
uint32_t vta = vext_vta(desc); \
uint32_t vl = env->vl; \
@@ -3864,7 +3882,7 @@ static void do_##NAME(void *vd, void *vs2, int i, \
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t vlmax = vext_maxsz(desc) / ESZ; \
+ uint32_t vlmax = vext_max_elems(desc, ESZ, false); \
uint32_t vm = vext_vm(desc); \
uint32_t vta = vext_vta(desc); \
uint32_t vl = env->vl; \
@@ -4041,7 +4059,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
- uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \
+ uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \
uint32_t i; \
\
for (i = 0; i < vl; i++) { \
@@ -4185,7 +4203,7 @@ static void do_##NAME(void *vd, void *vs2, int i) \
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t vlmax = vext_maxsz(desc) / ESZ; \
+ uint32_t vlmax = vext_max_elems(desc, ESZ, false); \
uint32_t vm = vext_vm(desc); \
uint32_t vta = vext_vta(desc); \
uint32_t vl = env->vl; \
@@ -4272,7 +4290,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
uint32_t esz = sizeof(ETYPE); \
- uint32_t vlmax = vext_maxsz(desc) / esz; \
+ uint32_t vlmax = vext_max_elems(desc, esz, false); \
uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
@@ -4772,7 +4790,7 @@ GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq)
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen; \
+ uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \
uint32_t vm = vext_vm(desc); \
uint32_t vta = vext_vta(desc); \
uint32_t vl = env->vl; \
@@ -4882,7 +4900,7 @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq)
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen; \
+ uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \
uint32_t vm = vext_vm(desc); \
uint32_t vta = vext_vta(desc); \
uint32_t vl = env->vl; \
@@ -4912,7 +4930,7 @@ GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq)
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen; \
+ uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \
uint32_t vm = vext_vm(desc); \
uint32_t vta = vext_vta(desc); \
uint32_t vl = env->vl; \
@@ -4942,7 +4960,7 @@ GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq)
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- uint32_t vlmax = env_archcpu(env)->cfg.vlen; \
+ uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \
uint32_t vta = vext_vta(desc); \
uint32_t vl = env->vl; \
uint32_t num = 0, i; \