[v1,07/15] tcg/riscv: Implement vector mov/dup{m/i}

Message ID	20240813113436.831-8-zhiwei_liu@linux.alibaba.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org> From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> To: qemu-devel@nongnu.org Cc: qemu-riscv@nongnu.org, palmer@dabbelt.com, alistair.francis@wdc.com, dbarboza@ventanamicro.com, liwei1518@gmail.com, bmeng.cn@gmail.com, zhiwei_liu@linux.alibaba.com, richard.henderson@linaro.org, TANG Tiancheng <tangtiancheng.ttc@alibaba-inc.com> Subject: [PATCH v1 07/15] tcg/riscv: Implement vector mov/dup{m/i} Date: Tue, 13 Aug 2024 19:34:28 +0800 Message-Id: <20240813113436.831-8-zhiwei_liu@linux.alibaba.com> In-Reply-To: <20240813113436.831-1-zhiwei_liu@linux.alibaba.com> References: <20240813113436.831-1-zhiwei_liu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Received-SPF: pass client-ip=115.124.30.130; envelope-from=zhiwei_liu@linux.alibaba.com; helo=out30-130.freemail.mail.aliyun.com X-Spam_score_int: -174 X-Spam_score: -17.5 X-Spam_bar: ----------------- X-Spam_report: (-17.5 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, ENV_AND_HDR_SPF_MATCH=-0.5, RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01, UNPARSEABLE_RELAY=0.001, USER_IN_DEF_DKIM_WL=-7.5, USER_IN_DEF_SPF_WL=-7.5 autolearn=ham autolearn_force=no X-Spam_action: no action Precedence: list Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
Series	tcg/riscv: Add support for vector \| expand [v1,00/15] tcg/riscv: Add support for vector [v1,01/15] util: Add RISC-V vector extension probe in cpuinfo [v1,02/15] tcg/op-gvec: Fix iteration step in 32-bit operation [v1,03/15] tcg: Fix register allocation constraints [v1,04/15] tcg/riscv: Add basic support for vector [v1,05/15] tcg/riscv: Add riscv vset{i}vli support [v1,06/15] tcg/riscv: Implement vector load/store [v1,07/15] tcg/riscv: Implement vector mov/dup{m/i} [v1,08/15] tcg/riscv: Add support for basic vector opcodes [v1,09/15] tcg/riscv: Implement vector cmp ops [v1,10/15] tcg/riscv: Implement vector not/neg ops [v1,11/15] tcg/riscv: Implement vector sat/mul ops [v1,12/15] tcg/riscv: Implement vector min/max ops [v1,13/15] tcg/riscv: Implement vector shs/v ops [v1,14/15] tcg/riscv: Implement vector roti/v/x shi ops [v1,15/15] tcg/riscv: Enable vector TCG host-native

Message ID

20240813113436.831-8-zhiwei_liu@linux.alibaba.com (mailing list archive)

State

New, archived

Headers

From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
To: qemu-devel@nongnu.org
Cc: qemu-riscv@nongnu.org, palmer@dabbelt.com, alistair.francis@wdc.com,
 dbarboza@ventanamicro.com, liwei1518@gmail.com, bmeng.cn@gmail.com,
 zhiwei_liu@linux.alibaba.com, richard.henderson@linaro.org,
 TANG Tiancheng <tangtiancheng.ttc@alibaba-inc.com>
Subject: [PATCH v1 07/15] tcg/riscv: Implement vector mov/dup{m/i}
Date: Tue, 13 Aug 2024 19:34:28 +0800
Message-Id: <20240813113436.831-8-zhiwei_liu@linux.alibaba.com>
In-Reply-To: <20240813113436.831-1-zhiwei_liu@linux.alibaba.com>
References: <20240813113436.831-1-zhiwei_liu@linux.alibaba.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Received-SPF: pass client-ip=115.124.30.130;
 envelope-from=zhiwei_liu@linux.alibaba.com;
 helo=out30-130.freemail.mail.aliyun.com
X-Spam_score_int: -174
X-Spam_score: -17.5
X-Spam_bar: -----------------
X-Spam_report: (-17.5 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1,
 DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, ENV_AND_HDR_SPF_MATCH=-0.5,
 RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001, SPF_PASS=-0.001,
 T_SCC_BODY_TEXT_LINE=-0.01, UNPARSEABLE_RELAY=0.001,
 USER_IN_DEF_DKIM_WL=-7.5,
 USER_IN_DEF_SPF_WL=-7.5 autolearn=ham autolearn_force=no
X-Spam_action: no action
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
 <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
 <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org

Series

tcg/riscv: Add support for vector | expand

Commit Message

LIU Zhiwei Aug. 13, 2024, 11:34 a.m. UTC

From: TANG Tiancheng <tangtiancheng.ttc@alibaba-inc.com>

Signed-off-by: TANG Tiancheng <tangtiancheng.ttc@alibaba-inc.com>
Reviewed-by: Liu Zhiwei <zhiwei_liu@linux.alibaba.com>
---
 tcg/riscv/tcg-target.c.inc | 43 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

Comments

Richard Henderson Aug. 14, 2024, 9:11 a.m. UTC | #1

On 8/13/24 21:34, LIU Zhiwei wrote:
> @@ -641,6 +645,13 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
>       case TCG_TYPE_I64:
>           tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
>           break;
> +    case TCG_TYPE_V64:
> +    case TCG_TYPE_V128:
> +    case TCG_TYPE_V256:
> +        tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0);
> +        tcg_target_set_vec_config(s, type, prev_vece);
> +        tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true);

I suggest these asserts be in tcg_out_opc_*
That way you don't need to replicate to all uses.

> +static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
> +                                   TCGReg dst, TCGReg src)

Oh, please drop all of the inline markup, from all patches.
Let the compiler decide.

> +static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
> +                                    TCGReg dst, TCGReg base, intptr_t offset)
> +{
> +    tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
> +    return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
> +}

Is this really better than using strided load with rs2 = r0?


r~

LIU Zhiwei Aug. 15, 2024, 10:49 a.m. UTC | #2

On 2024/8/14 17:11, Richard Henderson wrote:
> On 8/13/24 21:34, LIU Zhiwei wrote:
>> @@ -641,6 +645,13 @@ static bool tcg_out_mov(TCGContext *s, TCGType 
>> type, TCGReg ret, TCGReg arg)
>>       case TCG_TYPE_I64:
>>           tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
>>           break;
>> +    case TCG_TYPE_V64:
>> +    case TCG_TYPE_V128:
>> +    case TCG_TYPE_V256:
>> +        tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0);
>> +        tcg_target_set_vec_config(s, type, prev_vece);
>> +        tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true);
>
> I suggest these asserts be in tcg_out_opc_*
> That way you don't need to replicate to all uses.
OK.
>
>> +static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, 
>> unsigned vece,
>> +                                   TCGReg dst, TCGReg src)
>
> Oh, please drop all of the inline markup, from all patches.
> Let the compiler decide.
>
OK.
>> +static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, 
>> unsigned vece,
>> +                                    TCGReg dst, TCGReg base, 
>> intptr_t offset)
>> +{
>> +    tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
>> +    return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
>> +}
>
> Is this really better than using strided load with rs2 = r0?

It depends.  For our test board, it is.

Thanks,
Zhiwei

>
>
> r~

Richard Henderson Aug. 20, 2024, 9 a.m. UTC | #3

On 8/13/24 21:34, LIU Zhiwei wrote:
> +    case TCG_TYPE_V64:
> +    case TCG_TYPE_V128:
> +    case TCG_TYPE_V256:
> +        tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0);
> +        tcg_target_set_vec_config(s, type, prev_vece);
> +        tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true);
> +        break;

Is it worth using whole register move (vmvNr.v) for the appropriate VLEN?


r~

LIU Zhiwei Aug. 20, 2024, 9:26 a.m. UTC | #4

On 2024/8/20 17:00, Richard Henderson wrote:
> On 8/13/24 21:34, LIU Zhiwei wrote:
>> +    case TCG_TYPE_V64:
>> +    case TCG_TYPE_V128:
>> +    case TCG_TYPE_V256:
>> +        tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0);
>> +        tcg_target_set_vec_config(s, type, prev_vece);
>> +        tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true);
>> +        break;
>
> Is it worth using whole register move (vmvNr.v) for the appropriate VLEN?

Yes. We will use vmvNr.v next version. Thanks for your suggestion.

Zhiwei

>
>
> r~

diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index f17d679d71..f60913e805 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -288,6 +288,10 @@  typedef enum {
     OPC_VSE16_V = 0x5027 | V_SUMOP,
     OPC_VSE32_V = 0x6027 | V_SUMOP,
     OPC_VSE64_V = 0x7027 | V_SUMOP,
+
+    OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
+    OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
+    OPC_VMV_V_X = 0x5e000057 | V_OPIVX,
 } RISCVInsn;
 
 /*
@@ -641,6 +645,13 @@  static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
     case TCG_TYPE_I64:
         tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
         break;
+    case TCG_TYPE_V64:
+    case TCG_TYPE_V128:
+    case TCG_TYPE_V256:
+        tcg_debug_assert(ret > TCG_REG_V0 && arg > TCG_REG_V0);
+        tcg_target_set_vec_config(s, type, prev_vece);
+        tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true);
+        break;
     default:
         g_assert_not_reached();
     }
@@ -977,6 +988,33 @@  static void tcg_out_addsub2(TCGContext *s,
     }
 }
 
+static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+                                   TCGReg dst, TCGReg src)
+{
+    tcg_target_set_vec_config(s, type, vece);
+    tcg_out_opc_vx(s, OPC_VMV_V_X, dst, TCG_REG_V0, src, true);
+    return true;
+}
+
+static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+                                    TCGReg dst, TCGReg base, intptr_t offset)
+{
+    tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
+    return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
+}
+
+static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+                                    TCGReg dst, int64_t arg)
+{
+    if (arg < 16 && arg >= -16) {
+        tcg_target_set_vec_config(s, type, vece);
+        tcg_out_opc_vi(s, OPC_VMV_V_I, dst, TCG_REG_V0, arg, true);
+        return;
+    }
+    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
+    tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
+}
+
 static const struct {
     RISCVInsn op;
     bool swap;
@@ -2111,6 +2149,9 @@  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         tcg_target_set_vec_config(s, type, vece);
     }
     switch (opc) {
+    case INDEX_op_dupm_vec:
+        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+        break;
     case INDEX_op_ld_vec:
         tcg_out_ld(s, type, a0, a1, a2);
         break;
@@ -2282,6 +2323,8 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 
     case INDEX_op_st_vec:
         return C_O0_I2(v, r);
+    case INDEX_op_dup_vec:
+    case INDEX_op_dupm_vec:
     case INDEX_op_ld_vec:
         return C_O1_I1(v, r);
     default:

[v1,07/15] tcg/riscv: Implement vector mov/dup{m/i}

Commit Message

Comments

Patch