diff mbox series

[09/13] target/riscv: Adjust vector address with ol

Message ID 20211101100143.44356-10-zhiwei_liu@c-sky.com (mailing list archive)
State New, archived
Headers show
Series Support UXL filed in xstatus. | expand

Commit Message

LIU Zhiwei Nov. 1, 2021, 10:01 a.m. UTC
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc |  8 ++++
 target/riscv/internals.h                |  1 +
 target/riscv/vector_helper.c            | 54 +++++++++++++++++--------
 3 files changed, 46 insertions(+), 17 deletions(-)

Comments

Richard Henderson Nov. 1, 2021, 11:35 a.m. UTC | #1
On 11/1/21 6:01 AM, LIU Zhiwei wrote:
> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
> ---
>   target/riscv/insn_trans/trans_rvv.c.inc |  8 ++++
>   target/riscv/internals.h                |  1 +
>   target/riscv/vector_helper.c            | 54 +++++++++++++++++--------
>   3 files changed, 46 insertions(+), 17 deletions(-)
> 
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index ed042f7bb9..5cd9b802df 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -233,6 +233,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>   }
>   
> @@ -286,6 +287,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>   }
>   
> @@ -365,6 +367,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>   }
>   
> @@ -404,6 +407,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>       fn =  fns[seq][s->sew];
>       if (fn == NULL) {
>           return false;
> @@ -490,6 +494,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>   }
>   
> @@ -542,6 +547,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>   }
>   
> @@ -617,6 +623,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>       data = FIELD_DP32(data, VDATA, NF, a->nf);
> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>   }
>   
> @@ -724,6 +731,7 @@ static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq)
>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>       data = FIELD_DP32(data, VDATA, WD, a->wd);
> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>       return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>   }
>   /*
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index b15ad394bb..f74b8291e4 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -27,6 +27,7 @@ FIELD(VDATA, VM, 8, 1)
>   FIELD(VDATA, LMUL, 9, 2)
>   FIELD(VDATA, NF, 11, 4)
>   FIELD(VDATA, WD, 11, 1)
> +FIELD(VDATA, OL, 15, 2)
>   
>   /* float point classify helpers */
>   target_ulong fclass_h(uint64_t frs1);
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 535420ee66..451688c328 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -112,6 +112,11 @@ static uint32_t vext_wd(uint32_t desc)
>       return (simd_data(desc) >> 11) & 0x1;
>   }
>   
> +static inline uint32_t vext_ol(uint32_t desc)
> +{
> +    return FIELD_EX32(simd_data(desc), VDATA, OL);
> +}

XLEN not OLEN.

> @@ -123,6 +128,14 @@ static inline uint32_t vext_maxsz(uint32_t desc)
>       return simd_maxsz(desc) << vext_lmul(desc);
>   }
>   
> +static inline target_ulong adjust_addr(target_ulong addr, uint32_t olen)
> +{
> +    if (olen < TARGET_LONG_BITS) {
> +        addr &= UINT32_MAX;
> +    }
> +    return addr;
> +}

Here's where I'm unsure.  This looks a lot like the changes that are required to support 
pointer-masking in vectors, which Alexey said he was going to look at.

(1) Do we need to pass anything in VEXT at all?
     We do have CPURISCVState, so we could just use cpu_get_ml,
     which we would also need for env->mmte etc for pointer masking.

(2) Do we try to streamline the "normal" case with a simple bit in VEXT
     that indicates if the address needs modification at all?  I.e. the
     bit is set if UXLEN < TARGET_LONG_BITS or if PM_ENABLED?

(3) Do we try to streamline the computation by passing down composite
     mask and base parameters.  This way we don't need to do complex
     examination of ENV to determine execution mode, and instead always
     compute

        addr = (addr & mask) | base;

     where mask = -1, base = 0 for "normal" addressing, and when
     UXLEN == 32, mask <= UINT32_MAX.

(4) Do we in fact want to pre-compute these into known slots on ENV,
     so that we don't have to pass these around as separate parameters?
     We would adjust these values during PM CSR changes and when
     changing privilege levels.


r~
LIU Zhiwei Nov. 8, 2021, 9:28 a.m. UTC | #2
On 2021/11/1 下午7:35, Richard Henderson wrote:

> On 11/1/21 6:01 AM, LIU Zhiwei wrote:
>> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
>> ---
>>   target/riscv/insn_trans/trans_rvv.c.inc |  8 ++++
>>   target/riscv/internals.h                |  1 +
>>   target/riscv/vector_helper.c            | 54 +++++++++++++++++--------
>>   3 files changed, 46 insertions(+), 17 deletions(-)
>>
>> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
>> b/target/riscv/insn_trans/trans_rvv.c.inc
>> index ed042f7bb9..5cd9b802df 100644
>> --- a/target/riscv/insn_trans/trans_rvv.c.inc
>> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
>> @@ -233,6 +233,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm 
>> *a, uint8_t seq)
>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>   }
>>   @@ -286,6 +287,7 @@ static bool st_us_op(DisasContext *s, 
>> arg_r2nfvm *a, uint8_t seq)
>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>   }
>>   @@ -365,6 +367,7 @@ static bool ld_stride_op(DisasContext *s, 
>> arg_rnfvm *a, uint8_t seq)
>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>   }
>>   @@ -404,6 +407,7 @@ static bool st_stride_op(DisasContext *s, 
>> arg_rnfvm *a, uint8_t seq)
>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>       fn =  fns[seq][s->sew];
>>       if (fn == NULL) {
>>           return false;
>> @@ -490,6 +494,7 @@ static bool ld_index_op(DisasContext *s, 
>> arg_rnfvm *a, uint8_t seq)
>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>   }
>>   @@ -542,6 +547,7 @@ static bool st_index_op(DisasContext *s, 
>> arg_rnfvm *a, uint8_t seq)
>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>   }
>>   @@ -617,6 +623,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm 
>> *a, uint8_t seq)
>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>>   }
>>   @@ -724,6 +731,7 @@ static bool amo_op(DisasContext *s, arg_rwdvm 
>> *a, uint8_t seq)
>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>       data = FIELD_DP32(data, VDATA, WD, a->wd);
>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>       return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>   }
>>   /*
>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>> index b15ad394bb..f74b8291e4 100644
>> --- a/target/riscv/internals.h
>> +++ b/target/riscv/internals.h
>> @@ -27,6 +27,7 @@ FIELD(VDATA, VM, 8, 1)
>>   FIELD(VDATA, LMUL, 9, 2)
>>   FIELD(VDATA, NF, 11, 4)
>>   FIELD(VDATA, WD, 11, 1)
>> +FIELD(VDATA, OL, 15, 2)
>>     /* float point classify helpers */
>>   target_ulong fclass_h(uint64_t frs1);
>> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
>> index 535420ee66..451688c328 100644
>> --- a/target/riscv/vector_helper.c
>> +++ b/target/riscv/vector_helper.c
>> @@ -112,6 +112,11 @@ static uint32_t vext_wd(uint32_t desc)
>>       return (simd_data(desc) >> 11) & 0x1;
>>   }
>>   +static inline uint32_t vext_ol(uint32_t desc)
>> +{
>> +    return FIELD_EX32(simd_data(desc), VDATA, OL);
>> +}
>
> XLEN not OLEN.
OK.
>
>> @@ -123,6 +128,14 @@ static inline uint32_t vext_maxsz(uint32_t desc)
>>       return simd_maxsz(desc) << vext_lmul(desc);
>>   }
>>   +static inline target_ulong adjust_addr(target_ulong addr, uint32_t 
>> olen)
>> +{
>> +    if (olen < TARGET_LONG_BITS) {
>> +        addr &= UINT32_MAX;
>> +    }
>> +    return addr;
>> +}
>
> Here's where I'm unsure.  This looks a lot like the changes that are 
> required to support pointer-masking in vectors, which Alexey said he 
> was going to look at.
>
> (1) Do we need to pass anything in VEXT at all?
>     We do have CPURISCVState, so we could just use cpu_get_ml,
Yes, we should use cpu_get_xl.
> which we would also need for env->mmte etc for pointer masking.

Do you mean env->mpmmask and env->mpmbase? I think yes, we should also 
adjust these register behaviors with xlen.

>
> (2) Do we try to streamline the "normal" case with a simple bit in VEXT
>     that indicates if the address needs modification at all?  I.e. the
>     bit is set if UXLEN < TARGET_LONG_BITS or if PM_ENABLED?
>
> (3) Do we try to streamline the computation by passing down composite
>     mask and base parameters.  This way we don't need to do complex
>     examination of ENV to determine execution mode, and instead always
>     compute
>
>        addr = (addr & mask) | base;
>
>     where mask = -1, base = 0 for "normal" addressing, and when
>     UXLEN == 32, mask <= UINT32_MAX.

Do you mean add env->pmmask and env->pmbase?

I can initialize themin riscv_tr_init_disas_context, such as by 
env->xpmmask & UINT32_MAX .

>
> (4) Do we in fact want to pre-compute these into known slots on ENV,
>     so that we don't have to pass these around as separate parameters?
>     We would adjust these values during PM CSR changes and when
>     changing privilege levels.
>
>
> r~
Richard Henderson Nov. 9, 2021, 6:37 a.m. UTC | #3
On 11/8/21 10:28 AM, LIU Zhiwei wrote:
> On 2021/11/1 下午7:35, Richard Henderson wrote:
> 
>> On 11/1/21 6:01 AM, LIU Zhiwei wrote:
>>> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
>>> ---
>>>   target/riscv/insn_trans/trans_rvv.c.inc |  8 ++++
>>>   target/riscv/internals.h                |  1 +
>>>   target/riscv/vector_helper.c            | 54 +++++++++++++++++--------
>>>   3 files changed, 46 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
>>> b/target/riscv/insn_trans/trans_rvv.c.inc
>>> index ed042f7bb9..5cd9b802df 100644
>>> --- a/target/riscv/insn_trans/trans_rvv.c.inc
>>> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
>>> @@ -233,6 +233,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>   }
>>>   @@ -286,6 +287,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>   }
>>>   @@ -365,6 +367,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>   }
>>>   @@ -404,6 +407,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>       fn =  fns[seq][s->sew];
>>>       if (fn == NULL) {
>>>           return false;
>>> @@ -490,6 +494,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>   }
>>>   @@ -542,6 +547,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>   }
>>>   @@ -617,6 +623,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>>>   }
>>>   @@ -724,6 +731,7 @@ static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq)
>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>       data = FIELD_DP32(data, VDATA, WD, a->wd);
>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>       return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>   }
>>>   /*
>>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>>> index b15ad394bb..f74b8291e4 100644
>>> --- a/target/riscv/internals.h
>>> +++ b/target/riscv/internals.h
>>> @@ -27,6 +27,7 @@ FIELD(VDATA, VM, 8, 1)
>>>   FIELD(VDATA, LMUL, 9, 2)
>>>   FIELD(VDATA, NF, 11, 4)
>>>   FIELD(VDATA, WD, 11, 1)
>>> +FIELD(VDATA, OL, 15, 2)
>>>     /* float point classify helpers */
>>>   target_ulong fclass_h(uint64_t frs1);
>>> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
>>> index 535420ee66..451688c328 100644
>>> --- a/target/riscv/vector_helper.c
>>> +++ b/target/riscv/vector_helper.c
>>> @@ -112,6 +112,11 @@ static uint32_t vext_wd(uint32_t desc)
>>>       return (simd_data(desc) >> 11) & 0x1;
>>>   }
>>>   +static inline uint32_t vext_ol(uint32_t desc)
>>> +{
>>> +    return FIELD_EX32(simd_data(desc), VDATA, OL);
>>> +}
>>
>> XLEN not OLEN.
> OK.
>>
>>> @@ -123,6 +128,14 @@ static inline uint32_t vext_maxsz(uint32_t desc)
>>>       return simd_maxsz(desc) << vext_lmul(desc);
>>>   }
>>>   +static inline target_ulong adjust_addr(target_ulong addr, uint32_t olen)
>>> +{
>>> +    if (olen < TARGET_LONG_BITS) {
>>> +        addr &= UINT32_MAX;
>>> +    }
>>> +    return addr;
>>> +}
>>
>> Here's where I'm unsure.  This looks a lot like the changes that are required to support 
>> pointer-masking in vectors, which Alexey said he was going to look at.
>>
>> (1) Do we need to pass anything in VEXT at all?
>>     We do have CPURISCVState, so we could just use cpu_get_ml,
> Yes, we should use cpu_get_xl.
>> which we would also need for env->mmte etc for pointer masking.
> 
> Do you mean env->mpmmask and env->mpmbase? I think yes, we should also adjust these 
> register behaviors with xlen.

I mean the set of [msu]pmmask and [msu]pmbase, selected as appropriate for the current 
execution mode.

>> (3) Do we try to streamline the computation by passing down composite
>>     mask and base parameters.  This way we don't need to do complex
>>     examination of ENV to determine execution mode, and instead always
>>     compute
>>
>>        addr = (addr & mask) | base;
>>
>>     where mask = -1, base = 0 for "normal" addressing, and when
>>     UXLEN == 32, mask <= UINT32_MAX.
> 
> Do you mean add env->pmmask and env->pmbase?
> 
> I can initialize them in riscv_tr_init_disas_context, such as by env->xpmmask & UINT32_MAX .
> 
>>
>> (4) Do we in fact want to pre-compute these into known slots on ENV,
>>     so that we don't have to pass these around as separate parameters?
>>     We would adjust these values during PM CSR changes and when
>>     changing privilege levels.
For option (3), I was suggesting a mask + base pair passed down from TCG-generated code.

For option (4), I was suggesting embedding a mask + base pair in env, which would be 
re-computed at every privilege level change, plus reset and vmload.

In both cases, the mask would be a combination of [msu]pmmask & (RV32 ? UINT32_MAX : 
UINT64_MAX), as you say.


r~
LIU Zhiwei Nov. 9, 2021, 8:04 a.m. UTC | #4
On 2021/11/9 下午2:37, Richard Henderson wrote:

> On 11/8/21 10:28 AM, LIU Zhiwei wrote:
>> On 2021/11/1 下午7:35, Richard Henderson wrote:
>>
>>> On 11/1/21 6:01 AM, LIU Zhiwei wrote:
>>>> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
>>>> ---
>>>>   target/riscv/insn_trans/trans_rvv.c.inc |  8 ++++
>>>>   target/riscv/internals.h                |  1 +
>>>>   target/riscv/vector_helper.c            | 54 
>>>> +++++++++++++++++--------
>>>>   3 files changed, 46 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
>>>> b/target/riscv/insn_trans/trans_rvv.c.inc
>>>> index ed042f7bb9..5cd9b802df 100644
>>>> --- a/target/riscv/insn_trans/trans_rvv.c.inc
>>>> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
>>>> @@ -233,6 +233,7 @@ static bool ld_us_op(DisasContext *s, 
>>>> arg_r2nfvm *a, uint8_t seq)
>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>   }
>>>>   @@ -286,6 +287,7 @@ static bool st_us_op(DisasContext *s, 
>>>> arg_r2nfvm *a, uint8_t seq)
>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>   }
>>>>   @@ -365,6 +367,7 @@ static bool ld_stride_op(DisasContext *s, 
>>>> arg_rnfvm *a, uint8_t seq)
>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>   }
>>>>   @@ -404,6 +407,7 @@ static bool st_stride_op(DisasContext *s, 
>>>> arg_rnfvm *a, uint8_t seq)
>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>       fn =  fns[seq][s->sew];
>>>>       if (fn == NULL) {
>>>>           return false;
>>>> @@ -490,6 +494,7 @@ static bool ld_index_op(DisasContext *s, 
>>>> arg_rnfvm *a, uint8_t seq)
>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>   }
>>>>   @@ -542,6 +547,7 @@ static bool st_index_op(DisasContext *s, 
>>>> arg_rnfvm *a, uint8_t seq)
>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>   }
>>>>   @@ -617,6 +623,7 @@ static bool ldff_op(DisasContext *s, 
>>>> arg_r2nfvm *a, uint8_t seq)
>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>>>>   }
>>>>   @@ -724,6 +731,7 @@ static bool amo_op(DisasContext *s, arg_rwdvm 
>>>> *a, uint8_t seq)
>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>       data = FIELD_DP32(data, VDATA, WD, a->wd);
>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>       return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>   }
>>>>   /*
>>>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>>>> index b15ad394bb..f74b8291e4 100644
>>>> --- a/target/riscv/internals.h
>>>> +++ b/target/riscv/internals.h
>>>> @@ -27,6 +27,7 @@ FIELD(VDATA, VM, 8, 1)
>>>>   FIELD(VDATA, LMUL, 9, 2)
>>>>   FIELD(VDATA, NF, 11, 4)
>>>>   FIELD(VDATA, WD, 11, 1)
>>>> +FIELD(VDATA, OL, 15, 2)
>>>>     /* float point classify helpers */
>>>>   target_ulong fclass_h(uint64_t frs1);
>>>> diff --git a/target/riscv/vector_helper.c 
>>>> b/target/riscv/vector_helper.c
>>>> index 535420ee66..451688c328 100644
>>>> --- a/target/riscv/vector_helper.c
>>>> +++ b/target/riscv/vector_helper.c
>>>> @@ -112,6 +112,11 @@ static uint32_t vext_wd(uint32_t desc)
>>>>       return (simd_data(desc) >> 11) & 0x1;
>>>>   }
>>>>   +static inline uint32_t vext_ol(uint32_t desc)
>>>> +{
>>>> +    return FIELD_EX32(simd_data(desc), VDATA, OL);
>>>> +}
>>>
>>> XLEN not OLEN.
>> OK.
>>>
>>>> @@ -123,6 +128,14 @@ static inline uint32_t vext_maxsz(uint32_t desc)
>>>>       return simd_maxsz(desc) << vext_lmul(desc);
>>>>   }
>>>>   +static inline target_ulong adjust_addr(target_ulong addr, 
>>>> uint32_t olen)
>>>> +{
>>>> +    if (olen < TARGET_LONG_BITS) {
>>>> +        addr &= UINT32_MAX;
>>>> +    }
>>>> +    return addr;
>>>> +}
>>>
>>> Here's where I'm unsure.  This looks a lot like the changes that are 
>>> required to support pointer-masking in vectors, which Alexey said he 
>>> was going to look at.
>>>
>>> (1) Do we need to pass anything in VEXT at all?
>>>     We do have CPURISCVState, so we could just use cpu_get_ml,
>> Yes, we should use cpu_get_xl.
>>> which we would also need for env->mmte etc for pointer masking.
>>
>> Do you mean env->mpmmask and env->mpmbase? I think yes, we should 
>> also adjust these register behaviors with xlen.
>
> I mean the set of [msu]pmmask and [msu]pmbase, selected as appropriate 
> for the current execution mode.
>
>>> (3) Do we try to streamline the computation by passing down composite
>>>     mask and base parameters.  This way we don't need to do complex
>>>     examination of ENV to determine execution mode, and instead always
>>>     compute
>>>
>>>        addr = (addr & mask) | base;
>>>
>>>     where mask = -1, base = 0 for "normal" addressing, and when
>>>     UXLEN == 32, mask <= UINT32_MAX.
>>
>> Do you mean add env->pmmask and env->pmbase?
>>
>> I can initialize them in riscv_tr_init_disas_context, such as by 
>> env->xpmmask & UINT32_MAX .
>>
>>>
>>> (4) Do we in fact want to pre-compute these into known slots on ENV,
>>>     so that we don't have to pass these around as separate parameters?
>>>     We would adjust these values during PM CSR changes and when
>>>     changing privilege levels.
> For option (3), I was suggesting a mask + base pair passed down from 
> TCG-generated code.
>
> For option (4), I was suggesting embedding a mask + base pair in env, 
> which would be re-computed at every privilege level change, plus reset 
> and vmload.
>
> In both cases, the mask would be a combination of [msu]pmmask & (RV32 
> ? UINT32_MAX : UINT64_MAX), as you say.

We will calculate [msu]pmmask by  csrrw , and we have ignored high bits 
there.

Can we just use the [msu]pmmmask?

Thanks,
Zhiwei

>
>
> r~
Richard Henderson Nov. 9, 2021, 8:18 a.m. UTC | #5
On 11/9/21 9:04 AM, LIU Zhiwei wrote:
> On 2021/11/9 下午2:37, Richard Henderson wrote:
> 
>> On 11/8/21 10:28 AM, LIU Zhiwei wrote:
>>> On 2021/11/1 下午7:35, Richard Henderson wrote:
>>>
>>>> On 11/1/21 6:01 AM, LIU Zhiwei wrote:
>>>>> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
>>>>> ---
>>>>>   target/riscv/insn_trans/trans_rvv.c.inc |  8 ++++
>>>>>   target/riscv/internals.h                |  1 +
>>>>>   target/riscv/vector_helper.c            | 54 +++++++++++++++++--------
>>>>>   3 files changed, 46 insertions(+), 17 deletions(-)
>>>>>
>>>>> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
>>>>> b/target/riscv/insn_trans/trans_rvv.c.inc
>>>>> index ed042f7bb9..5cd9b802df 100644
>>>>> --- a/target/riscv/insn_trans/trans_rvv.c.inc
>>>>> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
>>>>> @@ -233,6 +233,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>>   }
>>>>>   @@ -286,6 +287,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>>   }
>>>>>   @@ -365,6 +367,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t 
>>>>> seq)
>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>   }
>>>>>   @@ -404,6 +407,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t 
>>>>> seq)
>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>       fn =  fns[seq][s->sew];
>>>>>       if (fn == NULL) {
>>>>>           return false;
>>>>> @@ -490,6 +494,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>   }
>>>>>   @@ -542,6 +547,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t 
>>>>> seq)
>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>   }
>>>>>   @@ -617,6 +623,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>>>>>   }
>>>>>   @@ -724,6 +731,7 @@ static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq)
>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>       data = FIELD_DP32(data, VDATA, WD, a->wd);
>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>       return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>   }
>>>>>   /*
>>>>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>>>>> index b15ad394bb..f74b8291e4 100644
>>>>> --- a/target/riscv/internals.h
>>>>> +++ b/target/riscv/internals.h
>>>>> @@ -27,6 +27,7 @@ FIELD(VDATA, VM, 8, 1)
>>>>>   FIELD(VDATA, LMUL, 9, 2)
>>>>>   FIELD(VDATA, NF, 11, 4)
>>>>>   FIELD(VDATA, WD, 11, 1)
>>>>> +FIELD(VDATA, OL, 15, 2)
>>>>>     /* float point classify helpers */
>>>>>   target_ulong fclass_h(uint64_t frs1);
>>>>> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
>>>>> index 535420ee66..451688c328 100644
>>>>> --- a/target/riscv/vector_helper.c
>>>>> +++ b/target/riscv/vector_helper.c
>>>>> @@ -112,6 +112,11 @@ static uint32_t vext_wd(uint32_t desc)
>>>>>       return (simd_data(desc) >> 11) & 0x1;
>>>>>   }
>>>>>   +static inline uint32_t vext_ol(uint32_t desc)
>>>>> +{
>>>>> +    return FIELD_EX32(simd_data(desc), VDATA, OL);
>>>>> +}
>>>>
>>>> XLEN not OLEN.
>>> OK.
>>>>
>>>>> @@ -123,6 +128,14 @@ static inline uint32_t vext_maxsz(uint32_t desc)
>>>>>       return simd_maxsz(desc) << vext_lmul(desc);
>>>>>   }
>>>>>   +static inline target_ulong adjust_addr(target_ulong addr, uint32_t olen)
>>>>> +{
>>>>> +    if (olen < TARGET_LONG_BITS) {
>>>>> +        addr &= UINT32_MAX;
>>>>> +    }
>>>>> +    return addr;
>>>>> +}
>>>>
>>>> Here's where I'm unsure.  This looks a lot like the changes that are required to 
>>>> support pointer-masking in vectors, which Alexey said he was going to look at.
>>>>
>>>> (1) Do we need to pass anything in VEXT at all?
>>>>     We do have CPURISCVState, so we could just use cpu_get_ml,
>>> Yes, we should use cpu_get_xl.
>>>> which we would also need for env->mmte etc for pointer masking.
>>>
>>> Do you mean env->mpmmask and env->mpmbase? I think yes, we should also adjust these 
>>> register behaviors with xlen.
>>
>> I mean the set of [msu]pmmask and [msu]pmbase, selected as appropriate for the current 
>> execution mode.
>>
>>>> (3) Do we try to streamline the computation by passing down composite
>>>>     mask and base parameters.  This way we don't need to do complex
>>>>     examination of ENV to determine execution mode, and instead always
>>>>     compute
>>>>
>>>>        addr = (addr & mask) | base;
>>>>
>>>>     where mask = -1, base = 0 for "normal" addressing, and when
>>>>     UXLEN == 32, mask <= UINT32_MAX.
>>>
>>> Do you mean add env->pmmask and env->pmbase?
>>>
>>> I can initialize them in riscv_tr_init_disas_context, such as by env->xpmmask & 
>>> UINT32_MAX .
>>>
>>>>
>>>> (4) Do we in fact want to pre-compute these into known slots on ENV,
>>>>     so that we don't have to pass these around as separate parameters?
>>>>     We would adjust these values during PM CSR changes and when
>>>>     changing privilege levels.
>> For option (3), I was suggesting a mask + base pair passed down from TCG-generated code.
>>
>> For option (4), I was suggesting embedding a mask + base pair in env, which would be 
>> re-computed at every privilege level change, plus reset and vmload.
>>
>> In both cases, the mask would be a combination of [msu]pmmask & (RV32 ? UINT32_MAX : 
>> UINT64_MAX), as you say.
> 
> We will calculate [msu]pmmask by  csrrw , and we have ignored high bits there.
> 
> Can we just use the [msu]pmmmask?

We could.  However:

In order to select [msu]pmmask, we have to look up the current cpu state.  In order to 
mask the high bits, we have to look up the current xl, which requires that we look up the 
current cpu state then extract the xl from misa  and mstatus.

All of which means that we're doing repeated lookups for every memory access.  I am 
suggesting that we either (3) compile those lookups into the generated code or (4) cache 
those lookups when state changes (csr writes and priv changes).


r~
LIU Zhiwei Nov. 9, 2021, 8:39 a.m. UTC | #6
On 2021/11/9 下午4:18, Richard Henderson wrote:
> On 11/9/21 9:04 AM, LIU Zhiwei wrote:
>> On 2021/11/9 下午2:37, Richard Henderson wrote:
>>
>>> On 11/8/21 10:28 AM, LIU Zhiwei wrote:
>>>> On 2021/11/1 下午7:35, Richard Henderson wrote:
>>>>
>>>>> On 11/1/21 6:01 AM, LIU Zhiwei wrote:
>>>>>> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
>>>>>> ---
>>>>>>   target/riscv/insn_trans/trans_rvv.c.inc |  8 ++++
>>>>>>   target/riscv/internals.h                |  1 +
>>>>>>   target/riscv/vector_helper.c            | 54 
>>>>>> +++++++++++++++++--------
>>>>>>   3 files changed, 46 insertions(+), 17 deletions(-)
>>>>>>
>>>>>> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
>>>>>> b/target/riscv/insn_trans/trans_rvv.c.inc
>>>>>> index ed042f7bb9..5cd9b802df 100644
>>>>>> --- a/target/riscv/insn_trans/trans_rvv.c.inc
>>>>>> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
>>>>>> @@ -233,6 +233,7 @@ static bool ld_us_op(DisasContext *s, 
>>>>>> arg_r2nfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>>>   }
>>>>>>   @@ -286,6 +287,7 @@ static bool st_us_op(DisasContext *s, 
>>>>>> arg_r2nfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>>>   }
>>>>>>   @@ -365,6 +367,7 @@ static bool ld_stride_op(DisasContext *s, 
>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>   }
>>>>>>   @@ -404,6 +407,7 @@ static bool st_stride_op(DisasContext *s, 
>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       fn =  fns[seq][s->sew];
>>>>>>       if (fn == NULL) {
>>>>>>           return false;
>>>>>> @@ -490,6 +494,7 @@ static bool ld_index_op(DisasContext *s, 
>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>   }
>>>>>>   @@ -542,6 +547,7 @@ static bool st_index_op(DisasContext *s, 
>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>   }
>>>>>>   @@ -617,6 +623,7 @@ static bool ldff_op(DisasContext *s, 
>>>>>> arg_r2nfvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>>>>>>   }
>>>>>>   @@ -724,6 +731,7 @@ static bool amo_op(DisasContext *s, 
>>>>>> arg_rwdvm *a, uint8_t seq)
>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>       data = FIELD_DP32(data, VDATA, WD, a->wd);
>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>       return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>   }
>>>>>>   /*
>>>>>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>>>>>> index b15ad394bb..f74b8291e4 100644
>>>>>> --- a/target/riscv/internals.h
>>>>>> +++ b/target/riscv/internals.h
>>>>>> @@ -27,6 +27,7 @@ FIELD(VDATA, VM, 8, 1)
>>>>>>   FIELD(VDATA, LMUL, 9, 2)
>>>>>>   FIELD(VDATA, NF, 11, 4)
>>>>>>   FIELD(VDATA, WD, 11, 1)
>>>>>> +FIELD(VDATA, OL, 15, 2)
>>>>>>     /* float point classify helpers */
>>>>>>   target_ulong fclass_h(uint64_t frs1);
>>>>>> diff --git a/target/riscv/vector_helper.c 
>>>>>> b/target/riscv/vector_helper.c
>>>>>> index 535420ee66..451688c328 100644
>>>>>> --- a/target/riscv/vector_helper.c
>>>>>> +++ b/target/riscv/vector_helper.c
>>>>>> @@ -112,6 +112,11 @@ static uint32_t vext_wd(uint32_t desc)
>>>>>>       return (simd_data(desc) >> 11) & 0x1;
>>>>>>   }
>>>>>>   +static inline uint32_t vext_ol(uint32_t desc)
>>>>>> +{
>>>>>> +    return FIELD_EX32(simd_data(desc), VDATA, OL);
>>>>>> +}
>>>>>
>>>>> XLEN not OLEN.
>>>> OK.
>>>>>
>>>>>> @@ -123,6 +128,14 @@ static inline uint32_t vext_maxsz(uint32_t 
>>>>>> desc)
>>>>>>       return simd_maxsz(desc) << vext_lmul(desc);
>>>>>>   }
>>>>>>   +static inline target_ulong adjust_addr(target_ulong addr, 
>>>>>> uint32_t olen)
>>>>>> +{
>>>>>> +    if (olen < TARGET_LONG_BITS) {
>>>>>> +        addr &= UINT32_MAX;
>>>>>> +    }
>>>>>> +    return addr;
>>>>>> +}
>>>>>
>>>>> Here's where I'm unsure.  This looks a lot like the changes that 
>>>>> are required to support pointer-masking in vectors, which Alexey 
>>>>> said he was going to look at.
>>>>>
>>>>> (1) Do we need to pass anything in VEXT at all?
>>>>>     We do have CPURISCVState, so we could just use cpu_get_ml,
>>>> Yes, we should use cpu_get_xl.
>>>>> which we would also need for env->mmte etc for pointer masking.
>>>>
>>>> Do you mean env->mpmmask and env->mpmbase? I think yes, we should 
>>>> also adjust these register behaviors with xlen.
>>>
>>> I mean the set of [msu]pmmask and [msu]pmbase, selected as 
>>> appropriate for the current execution mode.
>>>
>>>>> (3) Do we try to streamline the computation by passing down composite
>>>>>     mask and base parameters.  This way we don't need to do complex
>>>>>     examination of ENV to determine execution mode, and instead 
>>>>> always
>>>>>     compute
>>>>>
>>>>>        addr = (addr & mask) | base;
>>>>>
>>>>>     where mask = -1, base = 0 for "normal" addressing, and when
>>>>>     UXLEN == 32, mask <= UINT32_MAX.
>>>>
>>>> Do you mean add env->pmmask and env->pmbase?
>>>>
>>>> I can initialize them in riscv_tr_init_disas_context, such as by 
>>>> env->xpmmask & UINT32_MAX .
>>>>
>>>>>
>>>>> (4) Do we in fact want to pre-compute these into known slots on ENV,
>>>>>     so that we don't have to pass these around as separate 
>>>>> parameters?
>>>>>     We would adjust these values during PM CSR changes and when
>>>>>     changing privilege levels.
>>> For option (3), I was suggesting a mask + base pair passed down from 
>>> TCG-generated code.
>>>
>>> For option (4), I was suggesting embedding a mask + base pair in 
>>> env, which would be re-computed at every privilege level change, 
>>> plus reset and vmload.
>>>
>>> In both cases, the mask would be a combination of [msu]pmmask & 
>>> (RV32 ? UINT32_MAX : UINT64_MAX), as you say.
>>
>> We will calculate [msu]pmmask by  csrrw , and we have ignored high 
>> bits there.
>>
>> Can we just use the [msu]pmmmask?
>
> We could.  However:
>
> In order to select [msu]pmmask, we have to look up the current cpu 
> state.  In order to mask the high bits, we have to look up the current 
> xl, which requires that we look up the current cpu state then extract 
> the xl from misa  and mstatus.
>
> All of which means that we're doing repeated lookups for every memory 
> access.  I am suggesting that we either (3) compile those lookups into 
> the generated code or (4) cache those lookups when state changes (csr 
> writes and priv changes).


Do you mean we should add this code to riscv_tr_init_disas_context

     if (ctx->pm_enabled) {
          switch (priv) {
          case PRV_M:
              env->mask = env->mpmmask;
              env->base = env->mpmbase;
              break;
          case PRV_S:
              env->mask = env->spmmask;
              env->base = env->spmbase;
              break;
          case PRV_U:
              env->mask = env->upmmask;
              env->base = env->upmbase;
              break;
          default:
              g_assert_not_reached();
          }
          ctx->pm_mask = pm_mask[priv];
          ctx->pm_base = pm_base[priv];
          ctx->need_mask = true; /* new flag for mask */

      } else if (get_xlen(ctx)  < TARGET_LONG_BITS) {
          env->mask = UINT32_MAX;
          env->base = 0;
          ctx->pm_mask = tcg_constant_tl(UINT32_MAX);
          ctx->pm_base = tcg_constant_tl(0);

         ctx->need_mask = true;

      } else {
	 env->mask = UINT64_MAX;
          env->base = 0;
      }

Thanks,
Zhiwei

>
>
> r~
LIU Zhiwei Nov. 9, 2021, 9:05 a.m. UTC | #7
On 2021/11/9 下午4:39, LIU Zhiwei wrote:
>
>
> On 2021/11/9 下午4:18, Richard Henderson wrote:
>> On 11/9/21 9:04 AM, LIU Zhiwei wrote:
>>> On 2021/11/9 下午2:37, Richard Henderson wrote:
>>>
>>>> On 11/8/21 10:28 AM, LIU Zhiwei wrote:
>>>>> On 2021/11/1 下午7:35, Richard Henderson wrote:
>>>>>
>>>>>> On 11/1/21 6:01 AM, LIU Zhiwei wrote:
>>>>>>> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
>>>>>>> ---
>>>>>>>   target/riscv/insn_trans/trans_rvv.c.inc |  8 ++++
>>>>>>>   target/riscv/internals.h                |  1 +
>>>>>>>   target/riscv/vector_helper.c            | 54 
>>>>>>> +++++++++++++++++--------
>>>>>>>   3 files changed, 46 insertions(+), 17 deletions(-)
>>>>>>>
>>>>>>> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
>>>>>>> b/target/riscv/insn_trans/trans_rvv.c.inc
>>>>>>> index ed042f7bb9..5cd9b802df 100644
>>>>>>> --- a/target/riscv/insn_trans/trans_rvv.c.inc
>>>>>>> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
>>>>>>> @@ -233,6 +233,7 @@ static bool ld_us_op(DisasContext *s, 
>>>>>>> arg_r2nfvm *a, uint8_t seq)
>>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>>>>   }
>>>>>>>   @@ -286,6 +287,7 @@ static bool st_us_op(DisasContext *s, 
>>>>>>> arg_r2nfvm *a, uint8_t seq)
>>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>>       return ldst_us_trans(a->rd, a->rs1, data, fn, s);
>>>>>>>   }
>>>>>>>   @@ -365,6 +367,7 @@ static bool ld_stride_op(DisasContext *s, 
>>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>>       return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>>   }
>>>>>>>   @@ -404,6 +407,7 @@ static bool st_stride_op(DisasContext *s, 
>>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>>       fn =  fns[seq][s->sew];
>>>>>>>       if (fn == NULL) {
>>>>>>>           return false;
>>>>>>> @@ -490,6 +494,7 @@ static bool ld_index_op(DisasContext *s, 
>>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>>   }
>>>>>>>   @@ -542,6 +547,7 @@ static bool st_index_op(DisasContext *s, 
>>>>>>> arg_rnfvm *a, uint8_t seq)
>>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>>       return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>>   }
>>>>>>>   @@ -617,6 +623,7 @@ static bool ldff_op(DisasContext *s, 
>>>>>>> arg_r2nfvm *a, uint8_t seq)
>>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>>       data = FIELD_DP32(data, VDATA, NF, a->nf);
>>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>>       return ldff_trans(a->rd, a->rs1, data, fn, s);
>>>>>>>   }
>>>>>>>   @@ -724,6 +731,7 @@ static bool amo_op(DisasContext *s, 
>>>>>>> arg_rwdvm *a, uint8_t seq)
>>>>>>>       data = FIELD_DP32(data, VDATA, VM, a->vm);
>>>>>>>       data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
>>>>>>>       data = FIELD_DP32(data, VDATA, WD, a->wd);
>>>>>>> +    data = FIELD_DP32(data, VDATA, OL, s->ol);
>>>>>>>       return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
>>>>>>>   }
>>>>>>>   /*
>>>>>>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>>>>>>> index b15ad394bb..f74b8291e4 100644
>>>>>>> --- a/target/riscv/internals.h
>>>>>>> +++ b/target/riscv/internals.h
>>>>>>> @@ -27,6 +27,7 @@ FIELD(VDATA, VM, 8, 1)
>>>>>>>   FIELD(VDATA, LMUL, 9, 2)
>>>>>>>   FIELD(VDATA, NF, 11, 4)
>>>>>>>   FIELD(VDATA, WD, 11, 1)
>>>>>>> +FIELD(VDATA, OL, 15, 2)
>>>>>>>     /* float point classify helpers */
>>>>>>>   target_ulong fclass_h(uint64_t frs1);
>>>>>>> diff --git a/target/riscv/vector_helper.c 
>>>>>>> b/target/riscv/vector_helper.c
>>>>>>> index 535420ee66..451688c328 100644
>>>>>>> --- a/target/riscv/vector_helper.c
>>>>>>> +++ b/target/riscv/vector_helper.c
>>>>>>> @@ -112,6 +112,11 @@ static uint32_t vext_wd(uint32_t desc)
>>>>>>>       return (simd_data(desc) >> 11) & 0x1;
>>>>>>>   }
>>>>>>>   +static inline uint32_t vext_ol(uint32_t desc)
>>>>>>> +{
>>>>>>> +    return FIELD_EX32(simd_data(desc), VDATA, OL);
>>>>>>> +}
>>>>>>
>>>>>> XLEN not OLEN.
>>>>> OK.
>>>>>>
>>>>>>> @@ -123,6 +128,14 @@ static inline uint32_t vext_maxsz(uint32_t 
>>>>>>> desc)
>>>>>>>       return simd_maxsz(desc) << vext_lmul(desc);
>>>>>>>   }
>>>>>>>   +static inline target_ulong adjust_addr(target_ulong addr, 
>>>>>>> uint32_t olen)
>>>>>>> +{
>>>>>>> +    if (olen < TARGET_LONG_BITS) {
>>>>>>> +        addr &= UINT32_MAX;
>>>>>>> +    }
>>>>>>> +    return addr;
>>>>>>> +}
>>>>>>
>>>>>> Here's where I'm unsure.  This looks a lot like the changes that 
>>>>>> are required to support pointer-masking in vectors, which Alexey 
>>>>>> said he was going to look at.
>>>>>>
>>>>>> (1) Do we need to pass anything in VEXT at all?
>>>>>>     We do have CPURISCVState, so we could just use cpu_get_ml,
>>>>> Yes, we should use cpu_get_xl.
>>>>>> which we would also need for env->mmte etc for pointer masking.
>>>>>
>>>>> Do you mean env->mpmmask and env->mpmbase? I think yes, we should 
>>>>> also adjust these register behaviors with xlen.
>>>>
>>>> I mean the set of [msu]pmmask and [msu]pmbase, selected as 
>>>> appropriate for the current execution mode.
>>>>
>>>>>> (3) Do we try to streamline the computation by passing down 
>>>>>> composite
>>>>>>     mask and base parameters.  This way we don't need to do complex
>>>>>>     examination of ENV to determine execution mode, and instead 
>>>>>> always
>>>>>>     compute
>>>>>>
>>>>>>        addr = (addr & mask) | base;
>>>>>>
>>>>>>     where mask = -1, base = 0 for "normal" addressing, and when
>>>>>>     UXLEN == 32, mask <= UINT32_MAX.
>>>>>
>>>>> Do you mean add env->pmmask and env->pmbase?
>>>>>
>>>>> I can initialize them in riscv_tr_init_disas_context, such as by 
>>>>> env->xpmmask & UINT32_MAX .
>>>>>
>>>>>>
>>>>>> (4) Do we in fact want to pre-compute these into known slots on ENV,
>>>>>>     so that we don't have to pass these around as separate 
>>>>>> parameters?
>>>>>>     We would adjust these values during PM CSR changes and when
>>>>>>     changing privilege levels.
>>>> For option (3), I was suggesting a mask + base pair passed down 
>>>> from TCG-generated code.
>>>>
>>>> For option (4), I was suggesting embedding a mask + base pair in 
>>>> env, which would be re-computed at every privilege level change, 
>>>> plus reset and vmload.
>>>>
>>>> In both cases, the mask would be a combination of [msu]pmmask & 
>>>> (RV32 ? UINT32_MAX : UINT64_MAX), as you say.
>>>
>>> We will calculate [msu]pmmask by  csrrw , and we have ignored high 
>>> bits there.
>>>
>>> Can we just use the [msu]pmmmask?
>>
>> We could.  However:
>>
>> In order to select [msu]pmmask, we have to look up the current cpu 
>> state.  In order to mask the high bits, we have to look up the 
>> current xl, which requires that we look up the current cpu state then 
>> extract the xl from misa  and mstatus.
>>
>> All of which means that we're doing repeated lookups for every memory 
>> access.  I am suggesting that we either (3) compile those lookups 
>> into the generated code or (4) cache those lookups when state changes 
>> (csr writes and priv changes).
>
>
> Do you mean we should add this code to riscv_tr_init_disas_context
>
>      if (ctx->pm_enabled) {
>           switch (priv) {
>           case PRV_M:
>               env->mask = env->mpmmask;
>               env->base = env->mpmbase;
>               break;
>           case PRV_S:
>               env->mask = env->spmmask;
>               env->base = env->spmbase;
>               break;
>           case PRV_U:
>               env->mask = env->upmmask;
>               env->base = env->upmbase;
>               break;
>           default:
>               g_assert_not_reached();
>           }
>           ctx->pm_mask = pm_mask[priv];
>           ctx->pm_base = pm_base[priv];
>           ctx->need_mask = true; /* new flag for mask */
>       } else if (get_xlen(ctx)  < TARGET_LONG_BITS) {
>           env->mask = UINT32_MAX;
>           env->base = 0;
>           ctx->pm_mask = tcg_constant_tl(UINT32_MAX);
>           ctx->pm_base = tcg_constant_tl(0);
>          ctx->need_mask = true;
>       } else {
> 	 env->mask = UINT64_MAX;
>           env->base = 0;
>       }

I think the code is wrong, perhaps we should modify the write_mpmmask
env->mask = env->mpmmask = value;

Zhiwei

> Thanks,
> Zhiwei
>>
>>
>> r~
Richard Henderson Nov. 9, 2021, 9:25 a.m. UTC | #8
On 11/9/21 10:05 AM, LIU Zhiwei wrote:
>> Do you mean we should add this code to riscv_tr_init_disas_context
>>
>>      if (ctx->pm_enabled) {
>>           switch (priv) {
>>           case PRV_M:
>>               env->mask = env->mpmmask;
>>               env->base = env->mpmbase;
>>               break;
>>           case PRV_S:
>>               env->mask = env->spmmask;
>>               env->base = env->spmbase;
>>               break;
>>           case PRV_U:
>>               env->mask = env->upmmask;
>>               env->base = env->upmbase;
>>               break;
>>           default:
>>               g_assert_not_reached();
>>           }
>>           ctx->pm_mask = pm_mask[priv];
>>           ctx->pm_base = pm_base[priv];
>>           ctx->need_mask = true; /* new flag for mask */
>>       } else if (get_xlen(ctx)  < TARGET_LONG_BITS) {
>>           env->mask = UINT32_MAX;
>>           env->base = 0;

Certainly we cannot modify env in riscv_tr_init_disas_context.

>>           ctx->pm_mask = tcg_constant_tl(UINT32_MAX);
>>           ctx->pm_base = tcg_constant_tl(0);
>>          ctx->need_mask = true;
>>       } else {
>> 	 env->mask = UINT64_MAX;
>>           env->base = 0;
>>       }
> 
> I think the code is wrong, perhaps we should modify the write_mpmmask
> env->mask = env->mpmmask = value;

Something like that, yes.  However, env->mask must be set based on env->priv, etc; you 
can't just assign the same as mpmmask.

Then you also need to update env->mask in a hook like you created in patch 11 to switch 
context (though I would call it from helper_mret and helper_sret directly, and not create 
a new call from tcg).  Then you need to call the hook as well on exception entry, reset, 
and vmstate_riscv_cpu.post_load.


r~
diff mbox series

Patch

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index ed042f7bb9..5cd9b802df 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -233,6 +233,7 @@  static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
+    data = FIELD_DP32(data, VDATA, OL, s->ol);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s);
 }
 
@@ -286,6 +287,7 @@  static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
+    data = FIELD_DP32(data, VDATA, OL, s->ol);
     return ldst_us_trans(a->rd, a->rs1, data, fn, s);
 }
 
@@ -365,6 +367,7 @@  static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
+    data = FIELD_DP32(data, VDATA, OL, s->ol);
     return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
 }
 
@@ -404,6 +407,7 @@  static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
+    data = FIELD_DP32(data, VDATA, OL, s->ol);
     fn =  fns[seq][s->sew];
     if (fn == NULL) {
         return false;
@@ -490,6 +494,7 @@  static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
+    data = FIELD_DP32(data, VDATA, OL, s->ol);
     return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
 }
 
@@ -542,6 +547,7 @@  static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
+    data = FIELD_DP32(data, VDATA, OL, s->ol);
     return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s);
 }
 
@@ -617,6 +623,7 @@  static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, NF, a->nf);
+    data = FIELD_DP32(data, VDATA, OL, s->ol);
     return ldff_trans(a->rd, a->rs1, data, fn, s);
 }
 
@@ -724,6 +731,7 @@  static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq)
     data = FIELD_DP32(data, VDATA, VM, a->vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
     data = FIELD_DP32(data, VDATA, WD, a->wd);
+    data = FIELD_DP32(data, VDATA, OL, s->ol);
     return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s);
 }
 /*
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index b15ad394bb..f74b8291e4 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -27,6 +27,7 @@  FIELD(VDATA, VM, 8, 1)
 FIELD(VDATA, LMUL, 9, 2)
 FIELD(VDATA, NF, 11, 4)
 FIELD(VDATA, WD, 11, 1)
+FIELD(VDATA, OL, 15, 2)
 
 /* float point classify helpers */
 target_ulong fclass_h(uint64_t frs1);
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 535420ee66..451688c328 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -112,6 +112,11 @@  static uint32_t vext_wd(uint32_t desc)
     return (simd_data(desc) >> 11) & 0x1;
 }
 
+static inline uint32_t vext_ol(uint32_t desc)
+{
+    return FIELD_EX32(simd_data(desc), VDATA, OL);
+}
+
 /*
  * Get vector group length in bytes. Its range is [64, 2048].
  *
@@ -123,6 +128,14 @@  static inline uint32_t vext_maxsz(uint32_t desc)
     return simd_maxsz(desc) << vext_lmul(desc);
 }
 
+static inline target_ulong adjust_addr(target_ulong addr, uint32_t olen)
+{
+    if (olen < TARGET_LONG_BITS) {
+        addr &= UINT32_MAX;
+    }
+    return addr;
+}
+
 /*
  * This function checks watchpoint before real load operation.
  *
@@ -135,17 +148,17 @@  static inline uint32_t vext_maxsz(uint32_t desc)
  */
 static void probe_pages(CPURISCVState *env, target_ulong addr,
                         target_ulong len, uintptr_t ra,
-                        MMUAccessType access_type)
+                        MMUAccessType access_type, uint32_t olen)
 {
     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
     target_ulong curlen = MIN(pagelen, len);
 
-    probe_access(env, addr, curlen, access_type,
+    probe_access(env, adjust_addr(addr, olen), curlen, access_type,
                  cpu_mmu_index(env, false), ra);
     if (len > curlen) {
         addr += curlen;
         curlen = len - curlen;
-        probe_access(env, addr, curlen, access_type,
+        probe_access(env, adjust_addr(addr, olen), curlen, access_type,
                      cpu_mmu_index(env, false), ra);
     }
 }
@@ -290,13 +303,14 @@  vext_ldst_stride(void *vd, void *v0, target_ulong base,
     uint32_t nf = vext_nf(desc);
     uint32_t mlen = vext_mlen(desc);
     uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t olen = 16 << vext_ol(desc);
 
     /* probe every access*/
     for (i = 0; i < env->vl; i++) {
         if (!vm && !vext_elem_mask(v0, mlen, i)) {
             continue;
         }
-        probe_pages(env, base + stride * i, nf * msz, ra, access_type);
+        probe_pages(env, base + stride * i, nf * msz, ra, access_type, olen);
     }
     /* do real access */
     for (i = 0; i < env->vl; i++) {
@@ -306,7 +320,7 @@  vext_ldst_stride(void *vd, void *v0, target_ulong base,
         }
         while (k < nf) {
             target_ulong addr = base + stride * i + k * msz;
-            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            ldst_elem(env, adjust_addr(addr, olen), i + k * vlmax, vd, ra);
             k++;
         }
     }
@@ -391,15 +405,16 @@  vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
     uint32_t i, k;
     uint32_t nf = vext_nf(desc);
     uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t olen = 16 << vext_ol(desc);
 
     /* probe every access */
-    probe_pages(env, base, env->vl * nf * msz, ra, access_type);
+    probe_pages(env, base, env->vl * nf * msz, ra, access_type, olen);
     /* load bytes from guest memory */
     for (i = 0; i < env->vl; i++) {
         k = 0;
         while (k < nf) {
             target_ulong addr = base + (i * nf + k) * msz;
-            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            ldst_elem(env, adjust_addr(addr, olen), i + k * vlmax, vd, ra);
             k++;
         }
     }
@@ -519,6 +534,7 @@  vext_ldst_index(void *vd, void *v0, target_ulong base,
     uint32_t vm = vext_vm(desc);
     uint32_t mlen = vext_mlen(desc);
     uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t olen = 16 << vext_ol(desc);
 
     /* probe every access*/
     for (i = 0; i < env->vl; i++) {
@@ -526,7 +542,7 @@  vext_ldst_index(void *vd, void *v0, target_ulong base,
             continue;
         }
         probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra,
-                    access_type);
+                    access_type, olen);
     }
     /* load bytes from guest memory */
     for (i = 0; i < env->vl; i++) {
@@ -536,7 +552,7 @@  vext_ldst_index(void *vd, void *v0, target_ulong base,
         }
         while (k < nf) {
             abi_ptr addr = get_index_addr(base, i, vs2) + k * msz;
-            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            ldst_elem(env, adjust_addr(addr, olen), i + k * vlmax, vd, ra);
             k++;
         }
     }
@@ -619,6 +635,7 @@  vext_ldff(void *vd, void *v0, target_ulong base,
     uint32_t nf = vext_nf(desc);
     uint32_t vm = vext_vm(desc);
     uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t olen = 16 << vext_ol(desc);
     target_ulong addr, offset, remain;
 
     /* probe every access*/
@@ -626,9 +643,9 @@  vext_ldff(void *vd, void *v0, target_ulong base,
         if (!vm && !vext_elem_mask(v0, mlen, i)) {
             continue;
         }
-        addr = base + nf * i * msz;
+        addr = adjust_addr(base + nf * i * msz, olen);
         if (i == 0) {
-            probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
+            probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD, olen);
         } else {
             /* if it triggers an exception, no need to check watchpoint */
             remain = nf * msz;
@@ -643,7 +660,7 @@  vext_ldff(void *vd, void *v0, target_ulong base,
                         goto ProbeSuccess;
                     }
 #else
-                    probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
+                    probe_pages(env, addr, offset, ra, MMU_DATA_LOAD, olen);
 #endif
                 } else {
                     vl = i;
@@ -653,7 +670,7 @@  vext_ldff(void *vd, void *v0, target_ulong base,
                     break;
                 }
                 remain -= offset;
-                addr += offset;
+                addr = adjust_addr(addr + offset, olen);
             }
         }
     }
@@ -669,7 +686,7 @@  ProbeSuccess:
         }
         while (k < nf) {
             target_ulong addr = base + (i * nf + k) * msz;
-            ldst_elem(env, addr, i + k * vlmax, vd, ra);
+            ldst_elem(env, adjust_addr(addr, olen), i + k * vlmax, vd, ra);
             k++;
         }
     }
@@ -795,20 +812,23 @@  vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
     uint32_t vm = vext_vm(desc);
     uint32_t mlen = vext_mlen(desc);
     uint32_t vlmax = vext_maxsz(desc) / esz;
+    uint32_t olen = 16 << vext_ol(desc);
 
     for (i = 0; i < env->vl; i++) {
         if (!vm && !vext_elem_mask(v0, mlen, i)) {
             continue;
         }
-        probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD);
-        probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE);
+        probe_pages(env, get_index_addr(base, i, vs2), msz, ra,
+                    MMU_DATA_LOAD, olen);
+        probe_pages(env, get_index_addr(base, i, vs2), msz, ra,
+                    MMU_DATA_STORE, olen);
     }
     for (i = 0; i < env->vl; i++) {
         if (!vm && !vext_elem_mask(v0, mlen, i)) {
             continue;
         }
         addr = get_index_addr(base, i, vs2);
-        noatomic_op(vs3, addr, wd, i, env, ra);
+        noatomic_op(vs3, adjust_addr(addr, olen), wd, i, env, ra);
     }
     clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz);
 }