diff mbox series

[v5,54/60] target/riscv: integer extract instruction

Message ID 20200312145900.2054-55-zhiwei_liu@c-sky.com (mailing list archive)
State New, archived
Headers show
Series target/riscv: support vector extension v0.7.1 | expand

Commit Message

LIU Zhiwei March 12, 2020, 2:58 p.m. UTC
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/helper.h                   |  5 ++++
 target/riscv/insn32.decode              |  1 +
 target/riscv/insn_trans/trans_rvv.inc.c | 33 +++++++++++++++++++++++++
 target/riscv/vector_helper.c            | 20 +++++++++++++++
 4 files changed, 59 insertions(+)

Comments

Richard Henderson March 15, 2020, 2:53 a.m. UTC | #1
On 3/12/20 7:58 AM, LIU Zhiwei wrote:
> +static bool trans_vext_x_v(DisasContext *s, arg_r *a)
> +{
> +    if (vext_check_isa_ill(s, RVV)) {
> +        TCGv_ptr src2;
> +        TCGv dest, src1;
> +        gen_helper_vext_x_v fns[4] = {
> +            gen_helper_vext_x_v_b, gen_helper_vext_x_v_h,
> +            gen_helper_vext_x_v_w, gen_helper_vext_x_v_d
> +        };
> +
> +        dest = tcg_temp_new();
> +        src1 = tcg_temp_new();
> +        src2 = tcg_temp_new_ptr();
> +
> +        gen_get_gpr(src1, a->rs1);
> +        tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
> +
> +        fns[s->sew](dest, src2, src1, cpu_env);
> +        gen_set_gpr(a->rd, dest);
> +
> +        tcg_temp_free(dest);
> +        tcg_temp_free(src1);
> +        tcg_temp_free_ptr(src2);
> +        return true;
> +    }
> +    return false;
> +}

This entire operation can be performed inline easily.

static void extract_element(TCGv dest, TCGv_ptr base,
                            int ofs, int sew)
{
    switch (sew) {
    case MO_8:
        tcg_gen_ld8u_tl(dest, base, ofs);
        break;
    case MO_16:
        tcg_gen_ld16u_tl(dest, base, ofs);
        break;
    default:
        tcg_gen_ld32u_tl(dest, base, ofs);
        break;
#if TARGET_LONG_BITS == 64
    case MO_64:
        tcg_gen_ld_i64(dest, base, ofs);
        break;
#endif
    }
}

static bool trans_vext_x_v(DisasContext *s, arg_r *a)
{
...
    if (a->rs1 == 0) {
        /* Special case vmv.x.s rd, vs2. */
        do_extract(dest, cpu_env,
                   vreg_ofs(s, a->rs2), s->sew);
    } else {
        int vlen = s->vlen >> (3 + s->sew);
        TCGv_i32 ofs = tcg_temp_new_i32();
        TCGv_ptr  base = tcg_temp_new_ptr();
        TCGv t_vlen, t_zero;

        /* Mask the index to the length so that we do
           not produce an out-of-range load. */
        tcg_gen_trunc_tl_i32(ofs, cpu_gpr[a->rs1]);
        tcg_gen_andi_i32(ofs, ofs, vlen - 1);

        /* Convert the index to an offset.  */
        tcg_gen_shli_i32(ofs, ofs, s->sew);

        /* Convert the index to a pointer. */
        tcg_gen_extu_i32_ptr(base, ofs);
        tcg_gen_add_ptr(base, base, cpu_env);

        /* Perform the load. */
        do_extract(dest, base,
                   vreg_ofs(s, a->rs2), s->sew);
        tcg_temp_free_ptr(base);
        tcg_temp_free_i32(ofs);

        /* Flush out-of-range indexing to zero.  */
        t_vlen = tcg_const_tl(vlen);
        t_zero = tcg_const_tl(0);
        tcg_gen_movcond_tl(TCG_COND_LTU, dest, cpu_gpr[a->rs1],
                           t_vlen, dest, t_zero);
        tcg_temp_free(t_vlen);
        tcg_temp_free(t_zero);
    }


r~
LIU Zhiwei March 15, 2020, 5:15 a.m. UTC | #2
On 2020/3/15 10:53, Richard Henderson wrote:
> On 3/12/20 7:58 AM, LIU Zhiwei wrote:
>> +static bool trans_vext_x_v(DisasContext *s, arg_r *a)
>> +{
>> +    if (vext_check_isa_ill(s, RVV)) {
>> +        TCGv_ptr src2;
>> +        TCGv dest, src1;
>> +        gen_helper_vext_x_v fns[4] = {
>> +            gen_helper_vext_x_v_b, gen_helper_vext_x_v_h,
>> +            gen_helper_vext_x_v_w, gen_helper_vext_x_v_d
>> +        };
>> +
>> +        dest = tcg_temp_new();
>> +        src1 = tcg_temp_new();
>> +        src2 = tcg_temp_new_ptr();
>> +
>> +        gen_get_gpr(src1, a->rs1);
>> +        tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
>> +
>> +        fns[s->sew](dest, src2, src1, cpu_env);
>> +        gen_set_gpr(a->rd, dest);
>> +
>> +        tcg_temp_free(dest);
>> +        tcg_temp_free(src1);
>> +        tcg_temp_free_ptr(src2);
>> +        return true;
>> +    }
>> +    return false;
>> +}
> This entire operation can be performed inline easily.
>
> static void extract_element(TCGv dest, TCGv_ptr base,
>                              int ofs, int sew)
> {
>      switch (sew) {
>      case MO_8:
>          tcg_gen_ld8u_tl(dest, base, ofs);
>          break;
>      case MO_16:
>          tcg_gen_ld16u_tl(dest, base, ofs);
>          break;
>      default:
>          tcg_gen_ld32u_tl(dest, base, ofs);
>          break;
> #if TARGET_LONG_BITS == 64
>      case MO_64:
>          tcg_gen_ld_i64(dest, base, ofs);
>          break;
> #endif
>      }
> }
>
> static bool trans_vext_x_v(DisasContext *s, arg_r *a)
> {
> ...
>      if (a->rs1 == 0) {
>          /* Special case vmv.x.s rd, vs2. */
>          do_extract(dest, cpu_env,
>                     vreg_ofs(s, a->rs2), s->sew);
>      } else {
>          int vlen = s->vlen >> (3 + s->sew);
>          TCGv_i32 ofs = tcg_temp_new_i32();
>          TCGv_ptr  base = tcg_temp_new_ptr();
>          TCGv t_vlen, t_zero;
>
>          /* Mask the index to the length so that we do
>             not produce an out-of-range load. */
>          tcg_gen_trunc_tl_i32(ofs, cpu_gpr[a->rs1]);
>          tcg_gen_andi_i32(ofs, ofs, vlen - 1);
>
>          /* Convert the index to an offset.  */
>          tcg_gen_shli_i32(ofs, ofs, s->sew);

In  big endianess host, should I convert the index first before this 
statement.

#ifdef HOST_WORDS_BIGENDIAN
static void convert_idx(TCGv_i32 idx, int sew)
{
     switch (sew) {
     case MO_8:
         tcg_gen_xori_i32(idx, idx, 7);
         break;
     case MO_16:
         tcg_gen_xori_i32(idx, idx, 3);
         break;
     case MO_32:
         tcg_gen_xori_i32(idx, idx, 1);
         break;
     default:
         break;
     }
}
#endif


When convert the index to an offset, use this function first

#ifdef HOST_WORDS_BIGENDIAN
     convert_idx(ofs, s->sew)
#endif
/* Convert the index to an offset.  */
tcg_gen_shli_i32(ofs, ofs, s->sew)

Zhiwei
>          /* Convert the index to a pointer. */
>          tcg_gen_extu_i32_ptr(base, ofs);
>          tcg_gen_add_ptr(base, base, cpu_env);
>
>          /* Perform the load. */
>          do_extract(dest, base,
>                     vreg_ofs(s, a->rs2), s->sew);
>          tcg_temp_free_ptr(base);
>          tcg_temp_free_i32(ofs);
>
>          /* Flush out-of-range indexing to zero.  */
>          t_vlen = tcg_const_tl(vlen);
>          t_zero = tcg_const_tl(0);
>          tcg_gen_movcond_tl(TCG_COND_LTU, dest, cpu_gpr[a->rs1],
>                             t_vlen, dest, t_zero);
>          tcg_temp_free(t_vlen);
>          tcg_temp_free(t_zero);
>      }
>
> r~
Richard Henderson March 15, 2020, 5:21 a.m. UTC | #3
On 3/14/20 10:15 PM, LIU Zhiwei wrote:
> 
> 
> On 2020/3/15 10:53, Richard Henderson wrote:
>> On 3/12/20 7:58 AM, LIU Zhiwei wrote:
>>> +static bool trans_vext_x_v(DisasContext *s, arg_r *a)
>>> +{
>>> +    if (vext_check_isa_ill(s, RVV)) {
>>> +        TCGv_ptr src2;
>>> +        TCGv dest, src1;
>>> +        gen_helper_vext_x_v fns[4] = {
>>> +            gen_helper_vext_x_v_b, gen_helper_vext_x_v_h,
>>> +            gen_helper_vext_x_v_w, gen_helper_vext_x_v_d
>>> +        };
>>> +
>>> +        dest = tcg_temp_new();
>>> +        src1 = tcg_temp_new();
>>> +        src2 = tcg_temp_new_ptr();
>>> +
>>> +        gen_get_gpr(src1, a->rs1);
>>> +        tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
>>> +
>>> +        fns[s->sew](dest, src2, src1, cpu_env);
>>> +        gen_set_gpr(a->rd, dest);
>>> +
>>> +        tcg_temp_free(dest);
>>> +        tcg_temp_free(src1);
>>> +        tcg_temp_free_ptr(src2);
>>> +        return true;
>>> +    }
>>> +    return false;
>>> +}
>> This entire operation can be performed inline easily.
>>
>> static void extract_element(TCGv dest, TCGv_ptr base,
>>                              int ofs, int sew)
>> {
>>      switch (sew) {
>>      case MO_8:
>>          tcg_gen_ld8u_tl(dest, base, ofs);
>>          break;
>>      case MO_16:
>>          tcg_gen_ld16u_tl(dest, base, ofs);
>>          break;
>>      default:
>>          tcg_gen_ld32u_tl(dest, base, ofs);
>>          break;
>> #if TARGET_LONG_BITS == 64
>>      case MO_64:
>>          tcg_gen_ld_i64(dest, base, ofs);
>>          break;
>> #endif
>>      }
>> }
>>
>> static bool trans_vext_x_v(DisasContext *s, arg_r *a)
>> {
>> ...
>>      if (a->rs1 == 0) {
>>          /* Special case vmv.x.s rd, vs2. */
>>          do_extract(dest, cpu_env,
>>                     vreg_ofs(s, a->rs2), s->sew);
>>      } else {
>>          int vlen = s->vlen >> (3 + s->sew);
>>          TCGv_i32 ofs = tcg_temp_new_i32();
>>          TCGv_ptr  base = tcg_temp_new_ptr();
>>          TCGv t_vlen, t_zero;
>>
>>          /* Mask the index to the length so that we do
>>             not produce an out-of-range load. */
>>          tcg_gen_trunc_tl_i32(ofs, cpu_gpr[a->rs1]);
>>          tcg_gen_andi_i32(ofs, ofs, vlen - 1);
>>
>>          /* Convert the index to an offset.  */
>>          tcg_gen_shli_i32(ofs, ofs, s->sew);
> 
> In  big endianess host, should I convert the index first before this statement.
> 
> #ifdef HOST_WORDS_BIGENDIAN
> static void convert_idx(TCGv_i32 idx, int sew)
> {
>     switch (sew) {
>     case MO_8:
>         tcg_gen_xori_i32(idx, idx, 7);
>         break;
>     case MO_16:
>         tcg_gen_xori_i32(idx, idx, 3);
>         break;
>     case MO_32:
>         tcg_gen_xori_i32(idx, idx, 1);
>         break;
>     default:
>         break;
>     }
> }
> #endif
> 
> 
> When convert the index to an offset, use this function first
> 
> #ifdef HOST_WORDS_BIGENDIAN
>     convert_idx(ofs, s->sew)
> #endif

Yes, I forgot about endian adjust.

I would say

static void endian_adjust(TCGv_i32 ofs, int sew)
{
#ifdef HOST_WORDS_BIGENDIAN
    tcg_gen_xori_i32(ofs, ofs, 7 >> sew);
#endif
}

so that you don't need the ifdef at the use site.


r~
diff mbox series

Patch

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index e3f2970221..d94347a9a5 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1101,3 +1101,8 @@  DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32)
 DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32)
 DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32)
 DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32)
+
+DEF_HELPER_3(vext_x_v_b, tl, ptr, tl, env)
+DEF_HELPER_3(vext_x_v_h, tl, ptr, tl, env)
+DEF_HELPER_3(vext_x_v_w, tl, ptr, tl, env)
+DEF_HELPER_3(vext_x_v_d, tl, ptr, tl, env)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 1504059415..c26a186d6a 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -555,6 +555,7 @@  vmsif_m         010110 . ..... 00011 010 ..... 1010111 @r2_vm
 vmsof_m         010110 . ..... 00010 010 ..... 1010111 @r2_vm
 viota_m         010110 . ..... 10000 010 ..... 1010111 @r2_vm
 vid_v           010110 . 00000 10001 010 ..... 1010111 @r1_vm
+vext_x_v        001100 1 ..... ..... 010 ..... 1010111 @r
 
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
index 1ff72a6406..46651dfb10 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -2210,3 +2210,36 @@  static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
     }
     return false;
 }
+
+/*
+ *** Vector Permutation Instructions
+ */
+/* Integer Extract Instruction */
+typedef void (* gen_helper_vext_x_v)(TCGv, TCGv_ptr, TCGv, TCGv_env);
+static bool trans_vext_x_v(DisasContext *s, arg_r *a)
+{
+    if (vext_check_isa_ill(s, RVV)) {
+        TCGv_ptr src2;
+        TCGv dest, src1;
+        gen_helper_vext_x_v fns[4] = {
+            gen_helper_vext_x_v_b, gen_helper_vext_x_v_h,
+            gen_helper_vext_x_v_w, gen_helper_vext_x_v_d
+        };
+
+        dest = tcg_temp_new();
+        src1 = tcg_temp_new();
+        src2 = tcg_temp_new_ptr();
+
+        gen_get_gpr(src1, a->rs1);
+        tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
+
+        fns[s->sew](dest, src2, src1, cpu_env);
+        gen_set_gpr(a->rd, dest);
+
+        tcg_temp_free(dest);
+        tcg_temp_free(src1);
+        tcg_temp_free_ptr(src2);
+        return true;
+    }
+    return false;
+}
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index ff3b60e9c8..8704ee120f 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4440,3 +4440,23 @@  GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb)
 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh)
 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl)
 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq)
+
+/*
+ *** Vector Permutation Instructions
+ */
+/* Integer Extract Instruction */
+#define GEN_VEXT_X_V(NAME, ETYPE, H)                    \
+target_ulong HELPER(NAME)(void *vs2, target_ulong s1,   \
+        CPURISCVState *env)                             \
+{                                                       \
+    uint32_t vlen = env_archcpu(env)->cfg.vlen / 8;     \
+                                                        \
+    if (s1 >= vlen / sizeof(ETYPE)) {                   \
+        return 0;                                       \
+    }                                                   \
+    return *((ETYPE *)vs2 + s1);                        \
+}
+GEN_VEXT_X_V(vext_x_v_b, uint8_t, H1)
+GEN_VEXT_X_V(vext_x_v_h, uint16_t, H2)
+GEN_VEXT_X_V(vext_x_v_w, uint32_t, H4)
+GEN_VEXT_X_V(vext_x_v_d, uint64_t, H8)