diff mbox series

[v4,36/48] target/loongarch: Implement xvbitclr xvbitset xvbitrev

Message ID 20230830084902.2113960-37-gaosong@loongson.cn (mailing list archive)
State New, archived
Headers show
Series Add LoongArch LASX instructions | expand

Commit Message

Song Gao Aug. 30, 2023, 8:48 a.m. UTC
This patch includes:
- XVBITCLR[I].{B/H/W/D};
- XVBITSET[I].{B/H/W/D};
- XVBITREV[I].{B/H/W/D}.

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 target/loongarch/vec.h                       |  4 ++
 target/loongarch/insns.decode                | 27 +++++++++++
 target/loongarch/disas.c                     | 25 ++++++++++
 target/loongarch/vec_helper.c                | 48 ++++++++++----------
 target/loongarch/insn_trans/trans_lasx.c.inc | 27 +++++++++++
 5 files changed, 106 insertions(+), 25 deletions(-)

Comments

Richard Henderson Aug. 30, 2023, 11:30 p.m. UTC | #1
On 8/30/23 01:48, Song Gao wrote:
> This patch includes:
> - XVBITCLR[I].{B/H/W/D};
> - XVBITSET[I].{B/H/W/D};
> - XVBITREV[I].{B/H/W/D}.
> 
> Signed-off-by: Song Gao<gaosong@loongson.cn>
> ---
>   target/loongarch/vec.h                       |  4 ++
>   target/loongarch/insns.decode                | 27 +++++++++++
>   target/loongarch/disas.c                     | 25 ++++++++++
>   target/loongarch/vec_helper.c                | 48 ++++++++++----------
>   target/loongarch/insn_trans/trans_lasx.c.inc | 27 +++++++++++
>   5 files changed, 106 insertions(+), 25 deletions(-)
> 
> diff --git a/target/loongarch/vec.h b/target/loongarch/vec.h
> index 4497cd4a6d..aae70f9de9 100644
> --- a/target/loongarch/vec.h
> +++ b/target/loongarch/vec.h
> @@ -85,4 +85,8 @@
>   #define DO_CLZ_W(N)  (clz32(N))
>   #define DO_CLZ_D(N)  (clz64(N))
>   
> +#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
> +#define DO_BITSET(a, bit) (a | 1ull << bit)
> +#define DO_BITREV(a, bit) (a ^ (1ull << bit))
> +


Aside from this movement,

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
diff mbox series

Patch

diff --git a/target/loongarch/vec.h b/target/loongarch/vec.h
index 4497cd4a6d..aae70f9de9 100644
--- a/target/loongarch/vec.h
+++ b/target/loongarch/vec.h
@@ -85,4 +85,8 @@ 
 #define DO_CLZ_W(N)  (clz32(N))
 #define DO_CLZ_D(N)  (clz64(N))
 
+#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
+#define DO_BITSET(a, bit) (a | 1ull << bit)
+#define DO_BITREV(a, bit) (a ^ (1ull << bit))
+
 #endif /* LOONGARCH_VEC_H */
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index d683c6a6ab..cb6db8002a 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1784,6 +1784,33 @@  xvpcnt_h         0111 01101001 11000 01001 ..... .....    @vv
 xvpcnt_w         0111 01101001 11000 01010 ..... .....    @vv
 xvpcnt_d         0111 01101001 11000 01011 ..... .....    @vv
 
+xvbitclr_b       0111 01010000 11000 ..... ..... .....    @vvv
+xvbitclr_h       0111 01010000 11001 ..... ..... .....    @vvv
+xvbitclr_w       0111 01010000 11010 ..... ..... .....    @vvv
+xvbitclr_d       0111 01010000 11011 ..... ..... .....    @vvv
+xvbitclri_b      0111 01110001 00000 01 ... ..... .....   @vv_ui3
+xvbitclri_h      0111 01110001 00000 1 .... ..... .....   @vv_ui4
+xvbitclri_w      0111 01110001 00001 ..... ..... .....    @vv_ui5
+xvbitclri_d      0111 01110001 0001 ...... ..... .....    @vv_ui6
+
+xvbitset_b       0111 01010000 11100 ..... ..... .....    @vvv
+xvbitset_h       0111 01010000 11101 ..... ..... .....    @vvv
+xvbitset_w       0111 01010000 11110 ..... ..... .....    @vvv
+xvbitset_d       0111 01010000 11111 ..... ..... .....    @vvv
+xvbitseti_b      0111 01110001 01000 01 ... ..... .....   @vv_ui3
+xvbitseti_h      0111 01110001 01000 1 .... ..... .....   @vv_ui4
+xvbitseti_w      0111 01110001 01001 ..... ..... .....    @vv_ui5
+xvbitseti_d      0111 01110001 0101 ...... ..... .....    @vv_ui6
+
+xvbitrev_b       0111 01010001 00000 ..... ..... .....    @vvv
+xvbitrev_h       0111 01010001 00001 ..... ..... .....    @vvv
+xvbitrev_w       0111 01010001 00010 ..... ..... .....    @vvv
+xvbitrev_d       0111 01010001 00011 ..... ..... .....    @vvv
+xvbitrevi_b      0111 01110001 10000 01 ... ..... .....   @vv_ui3
+xvbitrevi_h      0111 01110001 10000 1 .... ..... .....   @vv_ui4
+xvbitrevi_w      0111 01110001 10001 ..... ..... .....    @vv_ui5
+xvbitrevi_d      0111 01110001 1001 ...... ..... .....    @vv_ui6
+
 xvreplgr2vr_b    0111 01101001 11110 00000 ..... .....    @vr
 xvreplgr2vr_h    0111 01101001 11110 00001 ..... .....    @vr
 xvreplgr2vr_w    0111 01101001 11110 00010 ..... .....    @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 9e31f9bbbc..dad9243fd7 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2210,6 +2210,31 @@  INSN_LASX(xvpcnt_h,          vv)
 INSN_LASX(xvpcnt_w,          vv)
 INSN_LASX(xvpcnt_d,          vv)
 
+INSN_LASX(xvbitclr_b,        vvv)
+INSN_LASX(xvbitclr_h,        vvv)
+INSN_LASX(xvbitclr_w,        vvv)
+INSN_LASX(xvbitclr_d,        vvv)
+INSN_LASX(xvbitclri_b,       vv_i)
+INSN_LASX(xvbitclri_h,       vv_i)
+INSN_LASX(xvbitclri_w,       vv_i)
+INSN_LASX(xvbitclri_d,       vv_i)
+INSN_LASX(xvbitset_b,        vvv)
+INSN_LASX(xvbitset_h,        vvv)
+INSN_LASX(xvbitset_w,        vvv)
+INSN_LASX(xvbitset_d,        vvv)
+INSN_LASX(xvbitseti_b,       vv_i)
+INSN_LASX(xvbitseti_h,       vv_i)
+INSN_LASX(xvbitseti_w,       vv_i)
+INSN_LASX(xvbitseti_d,       vv_i)
+INSN_LASX(xvbitrev_b,        vvv)
+INSN_LASX(xvbitrev_h,        vvv)
+INSN_LASX(xvbitrev_w,        vvv)
+INSN_LASX(xvbitrev_d,        vvv)
+INSN_LASX(xvbitrevi_b,       vv_i)
+INSN_LASX(xvbitrevi_h,       vv_i)
+INSN_LASX(xvbitrevi_w,       vv_i)
+INSN_LASX(xvbitrevi_d,       vv_i)
+
 INSN_LASX(xvreplgr2vr_b,     vr)
 INSN_LASX(xvreplgr2vr_h,     vr)
 INSN_LASX(xvreplgr2vr_w,     vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 9c2b52fd7d..03b42dc887 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2195,21 +2195,18 @@  VPCNT(vpcnt_h, 16, UH, ctpop16)
 VPCNT(vpcnt_w, 32, UW, ctpop32)
 VPCNT(vpcnt_d, 64, UD, ctpop64)
 
-#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
-#define DO_BITSET(a, bit) (a | 1ull << bit)
-#define DO_BITREV(a, bit) (a ^ (1ull << bit))
-
-#define DO_BIT(NAME, BIT, E, DO_OP)                         \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{                                                           \
-    int i;                                                  \
-    VReg *Vd = (VReg *)vd;                                  \
-    VReg *Vj = (VReg *)vj;                                  \
-    VReg *Vk = (VReg *)vk;                                  \
-                                                            \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                     \
-        Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT);           \
-    }                                                       \
+#define DO_BIT(NAME, BIT, E, DO_OP)                            \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{                                                              \
+    int i;                                                     \
+    VReg *Vd = (VReg *)vd;                                     \
+    VReg *Vj = (VReg *)vj;                                     \
+    VReg *Vk = (VReg *)vk;                                     \
+    int oprsz = simd_oprsz(desc);                              \
+                                                               \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
+        Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i) % BIT);            \
+    }                                                          \
 }
 
 DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
@@ -2225,16 +2222,17 @@  DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
 DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
 DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
 
-#define DO_BITI(NAME, BIT, E, DO_OP)                            \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
-{                                                               \
-    int i;                                                      \
-    VReg *Vd = (VReg *)vd;                                      \
-    VReg *Vj = (VReg *)vj;                                      \
-                                                                \
-    for (i = 0; i < LSX_LEN/BIT; i++) {                         \
-        Vd->E(i) = DO_OP(Vj->E(i), imm);                        \
-    }                                                           \
+#define DO_BITI(NAME, BIT, E, DO_OP)                               \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{                                                                  \
+    int i;                                                         \
+    VReg *Vd = (VReg *)vd;                                         \
+    VReg *Vj = (VReg *)vj;                                         \
+    int oprsz = simd_oprsz(desc);                                  \
+                                                                   \
+    for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
+        Vd->E(i) = DO_OP(Vj->E(i), imm);                           \
+    }                                                              \
 }
 
 DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
diff --git a/target/loongarch/insn_trans/trans_lasx.c.inc b/target/loongarch/insn_trans/trans_lasx.c.inc
index 2a24de178d..92c6506e04 100644
--- a/target/loongarch/insn_trans/trans_lasx.c.inc
+++ b/target/loongarch/insn_trans/trans_lasx.c.inc
@@ -533,6 +533,33 @@  TRANS(xvpcnt_h, LASX, gen_vv, 32, gen_helper_vpcnt_h)
 TRANS(xvpcnt_w, LASX, gen_vv, 32, gen_helper_vpcnt_w)
 TRANS(xvpcnt_d, LASX, gen_vv, 32, gen_helper_vpcnt_d)
 
+TRANS(xvbitclr_b, LASX, gvec_vvv, 32, MO_8, do_vbitclr)
+TRANS(xvbitclr_h, LASX, gvec_vvv, 32, MO_16, do_vbitclr)
+TRANS(xvbitclr_w, LASX, gvec_vvv, 32, MO_32, do_vbitclr)
+TRANS(xvbitclr_d, LASX, gvec_vvv, 32, MO_64, do_vbitclr)
+TRANS(xvbitclri_b, LASX, gvec_vv_i, 32, MO_8, do_vbitclri)
+TRANS(xvbitclri_h, LASX, gvec_vv_i, 32, MO_16, do_vbitclri)
+TRANS(xvbitclri_w, LASX, gvec_vv_i, 32, MO_32, do_vbitclri)
+TRANS(xvbitclri_d, LASX, gvec_vv_i, 32, MO_64, do_vbitclri)
+
+TRANS(xvbitset_b, LASX, gvec_vvv, 32, MO_8, do_vbitset)
+TRANS(xvbitset_h, LASX, gvec_vvv, 32, MO_16, do_vbitset)
+TRANS(xvbitset_w, LASX, gvec_vvv, 32, MO_32, do_vbitset)
+TRANS(xvbitset_d, LASX, gvec_vvv, 32, MO_64, do_vbitset)
+TRANS(xvbitseti_b, LASX, gvec_vv_i, 32, MO_8, do_vbitseti)
+TRANS(xvbitseti_h, LASX, gvec_vv_i, 32, MO_16, do_vbitseti)
+TRANS(xvbitseti_w, LASX, gvec_vv_i, 32, MO_32, do_vbitseti)
+TRANS(xvbitseti_d, LASX, gvec_vv_i, 32, MO_64, do_vbitseti)
+
+TRANS(xvbitrev_b, LASX, gvec_vvv, 32, MO_8, do_vbitrev)
+TRANS(xvbitrev_h, LASX, gvec_vvv, 32, MO_16, do_vbitrev)
+TRANS(xvbitrev_w, LASX, gvec_vvv, 32, MO_32, do_vbitrev)
+TRANS(xvbitrev_d, LASX, gvec_vvv, 32, MO_64, do_vbitrev)
+TRANS(xvbitrevi_b, LASX, gvec_vv_i, 32, MO_8, do_vbitrevi)
+TRANS(xvbitrevi_h, LASX, gvec_vv_i, 32, MO_16, do_vbitrevi)
+TRANS(xvbitrevi_w, LASX, gvec_vv_i, 32, MO_32, do_vbitrevi)
+TRANS(xvbitrevi_d, LASX, gvec_vv_i, 32, MO_64, do_vbitrevi)
+
 TRANS(xvreplgr2vr_b, LASX, gvec_dup, 32, MO_8)
 TRANS(xvreplgr2vr_h, LASX, gvec_dup, 32, MO_16)
 TRANS(xvreplgr2vr_w, LASX, gvec_dup, 32, MO_32)