diff mbox series

[v2,01/17] RISC-V: add vfp field in CPURISCVState

Message ID 1568183141-67641-2-git-send-email-zhiwei_liu@c-sky.com (mailing list archive)
State New, archived
Headers show
Series RISC-V: support vector extension | expand

Commit Message

LIU Zhiwei Sept. 11, 2019, 6:25 a.m. UTC
From: LIU Zhiwei <zhiwei_liu@c-sky.com>

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/cpu.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

Comments

Chih-Min Chao Sept. 11, 2019, 2:51 p.m. UTC | #1
On Wed, Sep 11, 2019 at 2:35 PM liuzhiwei <zhiwei_liu@c-sky.com> wrote:

> From: LIU Zhiwei <zhiwei_liu@c-sky.com>
>
> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
> ---
>  target/riscv/cpu.h | 28 ++++++++++++++++++++++++++++
>  1 file changed, 28 insertions(+)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 0adb307..c992b1d 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -93,9 +93,37 @@ typedef struct CPURISCVState CPURISCVState;
>
>  #include "pmp.h"
>
> +#define VLEN 128
> +#define VUNIT(x) (VLEN / x)
> +
>  struct CPURISCVState {
>      target_ulong gpr[32];
>      uint64_t fpr[32]; /* assume both F and D extensions */
> +
> +    /* vector coprocessor state.  */
> +    struct {
> +        union VECTOR {
> +            float64  f64[VUNIT(64)];
> +            float32  f32[VUNIT(32)];
> +            float16  f16[VUNIT(16)];
> +            uint64_t u64[VUNIT(64)];
> +            int64_t  s64[VUNIT(64)];
> +            uint32_t u32[VUNIT(32)];
> +            int32_t  s32[VUNIT(32)];
> +            uint16_t u16[VUNIT(16)];
> +            int16_t  s16[VUNIT(16)];
> +            uint8_t  u8[VUNIT(8)];
> +            int8_t   s8[VUNIT(8)];
> +        } vreg[32];
> +        target_ulong vxrm;
> +        target_ulong vxsat;
> +        target_ulong vl;
> +        target_ulong vstart;
> +        target_ulong vtype;
> +        float_status fp_status;
> +    } vfp;
> +
> +    bool         foflag;
>      target_ulong pc;
>      target_ulong load_res;
>      target_ulong load_val;
> --
> 2.7.4
>
>
Could  the VLEN be configurable in cpu initialization but not fixed in
compilation phase ?
Take the integer element as example  and the difference should be the
stride of vfp.vreg[x] isn't continuous

    struct {
        union VECTOR {
            uint64_t *u64;
            uint16_t *u16;
            uint8_t  *u8;
        } vreg[32];
    } vfp;

   initialization

    int vlen = 256;  //parameter from cpu command line option
    int elem = vlen / 8;
    int size = elem * 32;

    uint8_t *mem = malloc(size)
    for (int idx = 0; idx < 32; ++idx) {
        vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
        vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
        vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
   }

  chihmin
Richard Henderson Sept. 11, 2019, 10:32 p.m. UTC | #2
On 9/11/19 2:25 AM, liuzhiwei wrote:
>      uint64_t fpr[32]; /* assume both F and D extensions */
> +
> +    /* vector coprocessor state.  */
> +    struct {
> +        union VECTOR {
> +            float64  f64[VUNIT(64)];
> +            float32  f32[VUNIT(32)];
> +            float16  f16[VUNIT(16)];
> +            uint64_t u64[VUNIT(64)];
> +            int64_t  s64[VUNIT(64)];
> +            uint32_t u32[VUNIT(32)];
> +            int32_t  s32[VUNIT(32)];
> +            uint16_t u16[VUNIT(16)];
> +            int16_t  s16[VUNIT(16)];
> +            uint8_t  u8[VUNIT(8)];
> +            int8_t   s8[VUNIT(8)];
> +        } vreg[32];
> +        target_ulong vxrm;
> +        target_ulong vxsat;
> +        target_ulong vl;
> +        target_ulong vstart;
> +        target_ulong vtype;
> +        float_status fp_status;
> +    } vfp;

Is there a good reason why you're putting all of these into a sub-structure?
And more, a sub-structure whose name, vfp, looks like it is copied from ARM?

Why are the vxrm, vxsat, vl, vstart, vtype fields sized target_ulong?  I would
think that most could be uint32_t.  Although I suppose frm is also target_ulong
and need not be...

Why are you adding a new fp_status field?  The new vector floating point
instructions set the exact same fflags exception bits as normal fp instructions.


r~
Richard Henderson Sept. 11, 2019, 10:39 p.m. UTC | #3
On 9/11/19 10:51 AM, Chih-Min Chao wrote:
> Could  the VLEN be configurable in cpu initialization but not fixed in
> compilation phase ?
> Take the integer element as example  and the difference should be the
> stride of vfp.vreg[x] isn't continuous

Do you really want an unbounded amount of vector register storage?

>     uint8_t *mem = malloc(size)
>     for (int idx = 0; idx < 32; ++idx) {
>         vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
>         vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
>         vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
>    }

This isn't adjusting the stride of the elements.  And in any case this would
have to be re-adjusted for every vsetvl.


r~
Chih-Min Chao Sept. 12, 2019, 2:53 p.m. UTC | #4
On Thu, Sep 12, 2019 at 6:39 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 9/11/19 10:51 AM, Chih-Min Chao wrote:
> > Could  the VLEN be configurable in cpu initialization but not fixed in
> > compilation phase ?
> > Take the integer element as example  and the difference should be the
> > stride of vfp.vreg[x] isn't continuous
>
> Do you really want an unbounded amount of vector register storage?


 Hi Richard,

VLEN is implementation-defined parameter and the only limitation on spec is
that it must be power of 2.
What I prefer is the value could be adjustable in runtime.

>


> >     uint8_t *mem = malloc(size)
> >     for (int idx = 0; idx < 32; ++idx) {
> >         vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
> >         vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
> >         vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
> >    }
>
> This isn't adjusting the stride of the elements.  And in any case this
> would
> have to be re-adjusted for every vsetvl.
>
>  Not sure about the relation with vsetvl. Could you provide an example ?

Chih-Min

>
> r~
>
Richard Henderson Sept. 12, 2019, 3:06 p.m. UTC | #5
On 9/12/19 10:53 AM, Chih-Min Chao wrote:
> 
> 
> On Thu, Sep 12, 2019 at 6:39 AM Richard Henderson <richard.henderson@linaro.org
> <mailto:richard.henderson@linaro.org>> wrote:
> 
>     On 9/11/19 10:51 AM, Chih-Min Chao wrote:
>     > Could  the VLEN be configurable in cpu initialization but not fixed in
>     > compilation phase ?
>     > Take the integer element as example  and the difference should be the
>     > stride of vfp.vreg[x] isn't continuous
> 
>     Do you really want an unbounded amount of vector register storage?
> 
> 
>  Hi Richard,
> 
> VLEN is implementation-defined parameter and the only limitation on spec is
> that it must be power of 2.
> What I prefer is the value could be adjustable in runtime.

Ok, fine, I suppose.  I'll let a risc-v maintainer opine on whether there
should be some sanity check on the bounds of VLEN.  If you really do have an
unbounded vlen, you'll need to consider carefully how you want to manage migration.

>     >     uint8_t *mem = malloc(size)
>     >     for (int idx = 0; idx < 32; ++idx) {
>     >         vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
>     >         vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
>     >         vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
>     >    }
> 
>     This isn't adjusting the stride of the elements.  And in any case this would
>     have to be re-adjusted for every vsetvl.
> 
>  Not sure about the relation with vsetvl. Could you provide an example ?

Well, I think it's merely a matter of there's no point having so many different
pointers into the block of memory that provides the backing storage.  I've
asserted elsewhere in the thread that we shouldn't have an array of 32
"registers" anyway.


r~
LIU Zhiwei Sept. 17, 2019, 8:09 a.m. UTC | #6
On 2019/9/11 下午10:51, Chih-Min Chao wrote:
>
>
> On Wed, Sep 11, 2019 at 2:35 PM liuzhiwei <zhiwei_liu@c-sky.com 
> <mailto:zhiwei_liu@c-sky.com>> wrote:
>
>     From: LIU Zhiwei <zhiwei_liu@c-sky.com <mailto:zhiwei_liu@c-sky.com>>
>
>     Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com
>     <mailto:zhiwei_liu@c-sky.com>>
>     ---
>      target/riscv/cpu.h | 28 ++++++++++++++++++++++++++++
>      1 file changed, 28 insertions(+)
>
>     diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>     index 0adb307..c992b1d 100644
>     --- a/target/riscv/cpu.h
>     +++ b/target/riscv/cpu.h
>     @@ -93,9 +93,37 @@ typedef struct CPURISCVState CPURISCVState;
>
>      #include "pmp.h"
>
>     +#define VLEN 128
>     +#define VUNIT(x) (VLEN / x)
>     +
>      struct CPURISCVState {
>          target_ulong gpr[32];
>          uint64_t fpr[32]; /* assume both F and D extensions */
>     +
>     +    /* vector coprocessor state.  */
>     +    struct {
>     +        union VECTOR {
>     +            float64  f64[VUNIT(64)];
>     +            float32  f32[VUNIT(32)];
>     +            float16  f16[VUNIT(16)];
>     +            uint64_t u64[VUNIT(64)];
>     +            int64_t  s64[VUNIT(64)];
>     +            uint32_t u32[VUNIT(32)];
>     +            int32_t  s32[VUNIT(32)];
>     +            uint16_t u16[VUNIT(16)];
>     +            int16_t  s16[VUNIT(16)];
>     +            uint8_t  u8[VUNIT(8)];
>     +            int8_t   s8[VUNIT(8)];
>     +        } vreg[32];
>     +        target_ulong vxrm;
>     +        target_ulong vxsat;
>     +        target_ulong vl;
>     +        target_ulong vstart;
>     +        target_ulong vtype;
>     +        float_status fp_status;
>     +    } vfp;
>     +
>     +    bool         foflag;
>          target_ulong pc;
>          target_ulong load_res;
>          target_ulong load_val;
>     -- 
>     2.7.4
>
>
> Could  the VLEN be configurable in cpu initialization but not fixed in 
> compilation phase ?

Yes,  it's important that VLEN is configurable to support different 
types of cpu.

> Take the integer element as example  and the difference should be the 
> stride of vfp.vreg[x] isn't continuous
>
>     struct {
>         union VECTOR {
>             uint64_t *u64;
>             uint16_t *u16;
>             uint8_t  *u8;
>         } vreg[32];
>     } vfp;
>
>    initialization
>     int vlen = 256;  //parameter from cpu command line option
>     int elem = vlen / 8;
>     int size = elem * 32;
>
>     uint8_t *mem = malloc(size)
>     for (int idx = 0; idx < 32; ++idx) {
>         vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
>         vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
>         vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
>    }
>
>   chihmin

It's a good idea. I will accept it.

Thanks for review.

Zhiwei
diff mbox series

Patch

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 0adb307..c992b1d 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -93,9 +93,37 @@  typedef struct CPURISCVState CPURISCVState;
 
 #include "pmp.h"
 
+#define VLEN 128
+#define VUNIT(x) (VLEN / x)
+
 struct CPURISCVState {
     target_ulong gpr[32];
     uint64_t fpr[32]; /* assume both F and D extensions */
+
+    /* vector coprocessor state.  */
+    struct {
+        union VECTOR {
+            float64  f64[VUNIT(64)];
+            float32  f32[VUNIT(32)];
+            float16  f16[VUNIT(16)];
+            uint64_t u64[VUNIT(64)];
+            int64_t  s64[VUNIT(64)];
+            uint32_t u32[VUNIT(32)];
+            int32_t  s32[VUNIT(32)];
+            uint16_t u16[VUNIT(16)];
+            int16_t  s16[VUNIT(16)];
+            uint8_t  u8[VUNIT(8)];
+            int8_t   s8[VUNIT(8)];
+        } vreg[32];
+        target_ulong vxrm;
+        target_ulong vxsat;
+        target_ulong vl;
+        target_ulong vstart;
+        target_ulong vtype;
+        float_status fp_status;
+    } vfp;
+
+    bool         foflag;
     target_ulong pc;
     target_ulong load_res;
     target_ulong load_val;