diff mbox series

[v2,07/22] target/loongarch: Add fixed point arithmetic instruction translation

Message ID 1626861198-6133-8-git-send-email-gaosong@loongson.cn (mailing list archive)
State New, archived
Headers show
Series Add LoongArch linux-user emulation support | expand

Commit Message

Song Gao July 21, 2021, 9:53 a.m. UTC
This patch implement fixed point arithemtic instruction translation.

This includes:
- ADD.{W/D}, SUB.{W/D}
- ADDI.{W/D}, ADDU16ID
- ALSL.{W[U]/D}
- LU12I.W, LU32I.D LU52I.D
- SLT[U], SLT[U]I
- PCADDI, PCADDU12I, PCADDU18I, PCALAU12I
- AND, OR, NOR, XOR, ANDN, ORN
- MUL.{W/D}, MULH.{W[U]/D[U]}
- MULW.D.W[U]
- DIV.{W[U]/D[U]}, MOD.{W[U]/D[U]}
- ANDI, ORI, XORI

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 target/loongarch/insns.decode |   89 ++++
 target/loongarch/trans.inc.c  | 1090 +++++++++++++++++++++++++++++++++++++++++
 target/loongarch/translate.c  |   12 +
 target/loongarch/translate.h  |    1 +
 4 files changed, 1192 insertions(+)
 create mode 100644 target/loongarch/insns.decode
 create mode 100644 target/loongarch/trans.inc.c

Comments

Philippe Mathieu-Daudé July 21, 2021, 5:38 p.m. UTC | #1
On 7/21/21 11:53 AM, Song Gao wrote:
> This patch implement fixed point arithemtic instruction translation.
> 
> This includes:
> - ADD.{W/D}, SUB.{W/D}
> - ADDI.{W/D}, ADDU16ID
> - ALSL.{W[U]/D}
> - LU12I.W, LU32I.D LU52I.D
> - SLT[U], SLT[U]I
> - PCADDI, PCADDU12I, PCADDU18I, PCALAU12I
> - AND, OR, NOR, XOR, ANDN, ORN
> - MUL.{W/D}, MULH.{W[U]/D[U]}
> - MULW.D.W[U]
> - DIV.{W[U]/D[U]}, MOD.{W[U]/D[U]}
> - ANDI, ORI, XORI
> 
> Signed-off-by: Song Gao <gaosong@loongson.cn>
> ---
>  target/loongarch/insns.decode |   89 ++++
>  target/loongarch/trans.inc.c  | 1090 +++++++++++++++++++++++++++++++++++++++++
>  target/loongarch/translate.c  |   12 +
>  target/loongarch/translate.h  |    1 +
>  4 files changed, 1192 insertions(+)
>  create mode 100644 target/loongarch/insns.decode
>  create mode 100644 target/loongarch/trans.inc.c

Please don't include all .inc.c in one big translate.c...

> diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
> index 531f7e1..b60bdc2 100644
> --- a/target/loongarch/translate.c
> +++ b/target/loongarch/translate.c
> @@ -57,6 +57,15 @@ void gen_load_gpr(TCGv t, int reg)
>      }
>  }
>  
> +TCGv get_gpr(int regno)
> +{
> +    if (regno == 0) {
> +        return tcg_constant_tl(0);
> +    } else {
> +        return cpu_gpr[regno];
> +    }
> +}
> +
>  static inline void gen_save_pc(target_ulong pc)

... expose this one ...

>  {
>      tcg_gen_movi_tl(cpu_PC, pc);
> @@ -287,6 +296,9 @@ static bool loongarch_tr_breakpoint_check(DisasContextBase *dcbase,
>      return true;
>  }
>  
> +#include "decode-insns.c.inc"

... and move this include to "trans.c".

> +#include "trans.inc.c"

removing this include.

>  static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
>  {
>      CPULoongArchState *env = cs->env_ptr;
> diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
> index 333c3bf..ef4d4e7 100644
> --- a/target/loongarch/translate.h
> +++ b/target/loongarch/translate.h
> @@ -35,6 +35,7 @@ void check_fpu_enabled(DisasContext *ctx);
>  
>  void gen_base_offset_addr(TCGv addr, int base, int offset);
>  void gen_load_gpr(TCGv t, int reg);
> +TCGv get_gpr(int regno);
>  void gen_load_fpr32(TCGv_i32 t, int reg);
>  void gen_load_fpr64(TCGv_i64 t, int reg);
>  void gen_store_fpr32(TCGv_i32 t, int reg);
>
Philippe Mathieu-Daudé July 21, 2021, 5:49 p.m. UTC | #2
On 7/21/21 7:38 PM, Philippe Mathieu-Daudé wrote:
> On 7/21/21 11:53 AM, Song Gao wrote:
>> This patch implement fixed point arithemtic instruction translation.

Typo arithmetic.

>>
>> This includes:
>> - ADD.{W/D}, SUB.{W/D}
>> - ADDI.{W/D}, ADDU16ID
>> - ALSL.{W[U]/D}
>> - LU12I.W, LU32I.D LU52I.D
>> - SLT[U], SLT[U]I
>> - PCADDI, PCADDU12I, PCADDU18I, PCALAU12I
>> - AND, OR, NOR, XOR, ANDN, ORN
>> - MUL.{W/D}, MULH.{W[U]/D[U]}
>> - MULW.D.W[U]
>> - DIV.{W[U]/D[U]}, MOD.{W[U]/D[U]}
>> - ANDI, ORI, XORI
>>
>> Signed-off-by: Song Gao <gaosong@loongson.cn>
>> ---
>>  target/loongarch/insns.decode |   89 ++++
>>  target/loongarch/trans.inc.c  | 1090 +++++++++++++++++++++++++++++++++++++++++
>>  target/loongarch/translate.c  |   12 +
>>  target/loongarch/translate.h  |    1 +
>>  4 files changed, 1192 insertions(+)
>>  create mode 100644 target/loongarch/insns.decode
>>  create mode 100644 target/loongarch/trans.inc.c
> 
> Please don't include all .inc.c in one big translate.c...
> 
>> diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
>> index 531f7e1..b60bdc2 100644
>> --- a/target/loongarch/translate.c
>> +++ b/target/loongarch/translate.c
>> @@ -57,6 +57,15 @@ void gen_load_gpr(TCGv t, int reg)
>>      }
>>  }
>>  
>> +TCGv get_gpr(int regno)
>> +{
>> +    if (regno == 0) {
>> +        return tcg_constant_tl(0);
>> +    } else {
>> +        return cpu_gpr[regno];
>> +    }
>> +}
>> +
>>  static inline void gen_save_pc(target_ulong pc)
> 
> ... expose this one ...
> 
>>  {
>>      tcg_gen_movi_tl(cpu_PC, pc);
>> @@ -287,6 +296,9 @@ static bool loongarch_tr_breakpoint_check(DisasContextBase *dcbase,
>>      return true;
>>  }
>>  
>> +#include "decode-insns.c.inc"
> 
> ... and move this include to "trans.c".

Since you have the luck to add a new architecture, you could
start cleanly from scratch and add group of instructions, so
this patch would add "trans_arithmetic.c", etc.. in the series.

>> +#include "trans.inc.c"
> 
> removing this include.
> 
>>  static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
>>  {
>>      CPULoongArchState *env = cs->env_ptr;
>> diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
>> index 333c3bf..ef4d4e7 100644
>> --- a/target/loongarch/translate.h
>> +++ b/target/loongarch/translate.h
>> @@ -35,6 +35,7 @@ void check_fpu_enabled(DisasContext *ctx);
>>  
>>  void gen_base_offset_addr(TCGv addr, int base, int offset);
>>  void gen_load_gpr(TCGv t, int reg);
>> +TCGv get_gpr(int regno);
>>  void gen_load_fpr32(TCGv_i32 t, int reg);
>>  void gen_load_fpr64(TCGv_i64 t, int reg);
>>  void gen_store_fpr32(TCGv_i32 t, int reg);
>>
> 
>
Song Gao July 22, 2021, 7:41 a.m. UTC | #3
Hi, Philippe,

On 07/22/2021 01:49 AM, Philippe Mathieu-Daudé wrote:
> On 7/21/21 7:38 PM, Philippe Mathieu-Daudé wrote:
>> On 7/21/21 11:53 AM, Song Gao wrote:
>>> This patch implement fixed point arithemtic instruction translation.
> 
> Typo arithmetic.
> 
>>>
>>> This includes:
>>> - ADD.{W/D}, SUB.{W/D}
>>> - ADDI.{W/D}, ADDU16ID
>>> - ALSL.{W[U]/D}
>>> - LU12I.W, LU32I.D LU52I.D
>>> - SLT[U], SLT[U]I
>>> - PCADDI, PCADDU12I, PCADDU18I, PCALAU12I
>>> - AND, OR, NOR, XOR, ANDN, ORN
>>> - MUL.{W/D}, MULH.{W[U]/D[U]}
>>> - MULW.D.W[U]
>>> - DIV.{W[U]/D[U]}, MOD.{W[U]/D[U]}
>>> - ANDI, ORI, XORI
>>>
>>> Signed-off-by: Song Gao <gaosong@loongson.cn>
>>> ---
>>>  target/loongarch/insns.decode |   89 ++++
>>>  target/loongarch/trans.inc.c  | 1090 +++++++++++++++++++++++++++++++++++++++++
>>>  target/loongarch/translate.c  |   12 +
>>>  target/loongarch/translate.h  |    1 +
>>>  4 files changed, 1192 insertions(+)
>>>  create mode 100644 target/loongarch/insns.decode
>>>  create mode 100644 target/loongarch/trans.inc.c
>>
>> Please don't include all .inc.c in one big translate.c...
>>
>>> diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
>>> index 531f7e1..b60bdc2 100644
>>> --- a/target/loongarch/translate.c
>>> +++ b/target/loongarch/translate.c
>>> @@ -57,6 +57,15 @@ void gen_load_gpr(TCGv t, int reg)
>>>      }
>>>  }
>>>  
>>> +TCGv get_gpr(int regno)
>>> +{
>>> +    if (regno == 0) {
>>> +        return tcg_constant_tl(0);
>>> +    } else {
>>> +        return cpu_gpr[regno];
>>> +    }
>>> +}
>>> +
>>>  static inline void gen_save_pc(target_ulong pc)
>>
>> ... expose this one ...
>>
>>>  {
>>>      tcg_gen_movi_tl(cpu_PC, pc);
>>> @@ -287,6 +296,9 @@ static bool loongarch_tr_breakpoint_check(DisasContextBase *dcbase,
>>>      return true;
>>>  }
>>>  
>>> +#include "decode-insns.c.inc"
>>
>> ... and move this include to "trans.c".
> 
> Since you have the luck to add a new architecture, you could
> start cleanly from scratch and add group of instructions, so
> this patch would add "trans_arithmetic.c", etc.. in the series.
> 

Got it,  The file trans.inc.c seems too big ...

Thansk,
Song Gao
Richard Henderson July 23, 2021, 12:46 a.m. UTC | #4
On 7/20/21 11:53 PM, Song Gao wrote:
> +/* Fixed point arithmetic operation instruction translation */
> +static bool trans_add_w(DisasContext *ctx, arg_add_w *a)
> +{
> +    TCGv Rd = cpu_gpr[a->rd];
> +    TCGv Rj = cpu_gpr[a->rj];
> +    TCGv Rk = cpu_gpr[a->rk];
> +
> +    if (a->rd == 0) {
> +        /* Nop */
> +        return true;
> +    }
> +
> +    if (a->rj != 0 && a->rk != 0) {
> +        tcg_gen_add_tl(Rd, Rj, Rk);
> +        tcg_gen_ext32s_tl(Rd, Rd);
> +    } else if (a->rj == 0 && a->rk != 0) {
> +        tcg_gen_mov_tl(Rd, Rk);
> +    } else if (a->rj != 0 && a->rk == 0) {
> +        tcg_gen_mov_tl(Rd, Rj);
> +    } else {
> +        tcg_gen_movi_tl(Rd, 0);
> +    }
> +
> +    return true;
> +}

Do not do all of this "if reg(n) zero" testing.

Use a common function to perform the gpr lookup, and a small callback function for the 
operation.  Often, the callback function already exists within include/tcg/tcg-op.h.

Please see my riscv cleanup patch set I referenced vs patch 6.

> +static bool trans_orn(DisasContext *ctx, arg_orn *a)
> +{
> +    TCGv Rd = cpu_gpr[a->rd];
> +    TCGv Rj = cpu_gpr[a->rj];
> +
> +    if (a->rd == 0) {
> +        /* Nop */
> +        return true;
> +    }
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_load_gpr(t0, a->rk);
> +
> +    tcg_gen_not_tl(t0, t0);
> +    tcg_gen_or_tl(Rd, Rj, t0);

tcg_gen_orc_tl.

> +static bool trans_andn(DisasContext *ctx, arg_andn *a)
> +{
> +    TCGv Rd = cpu_gpr[a->rd];
> +    TCGv Rj = cpu_gpr[a->rj];
> +
> +    if (a->rd == 0) {
> +        /* Nop */
> +        return true;
> +    }
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_load_gpr(t0, a->rk);
> +
> +    tcg_gen_not_tl(t0, t0);
> +    tcg_gen_and_tl(Rd, Rj, t0);

tcg_gen_andc_tl.

> +static bool trans_mul_d(DisasContext *ctx, arg_mul_d *a)
> +{
> +    TCGv t0, t1;
> +    TCGv Rd = cpu_gpr[a->rd];
> +
> +    if (a->rd == 0) {
> +        /* Nop */
> +        return true;
> +    }
> +
> +    t0 = get_gpr(a->rj);
> +    t1 = get_gpr(a->rk);
> +
> +    check_loongarch_64(ctx);

Architecture checks go first, before you've decided the operation is a nop.

> +static bool trans_mulh_d(DisasContext *ctx, arg_mulh_d *a)
> +{
> +    TCGv t0, t1, t2;
> +    TCGv Rd = cpu_gpr[a->rd];
> +
> +    if (a->rd == 0) {
> +        /* Nop */
> +        return true;
> +    }
> +
> +    t0 = get_gpr(a->rj);
> +    t1 = get_gpr(a->rk);
> +    t2 = tcg_temp_new();
> +
> +    check_loongarch_64(ctx);
> +    tcg_gen_muls2_i64(t2, Rd, t0, t1);

If you actually supported LA32, you'd notice this doesn't compile.  Are you planning to 
support LA32 in the future?

> +static bool trans_lu32i_d(DisasContext *ctx, arg_lu32i_d *a)
> +{
> +    TCGv_i64 t0, t1;
> +    TCGv Rd = cpu_gpr[a->rd];
> +
> +    if (a->rd == 0) {
> +        /* Nop */
> +        return true;
> +    }
> +
> +    t0 = tcg_temp_new_i64();
> +    t1 = tcg_temp_new_i64();
> +
> +    tcg_gen_movi_tl(t0, a->si20);
> +    tcg_gen_concat_tl_i64(t1, Rd, t0);
> +    tcg_gen_mov_tl(Rd, t1);

Hmm.  Better as

   tcg_gen_deposit_tl(Rd, Rd, tcg_constant_tl(a->si20), 32, 32);

> +static bool trans_lu52i_d(DisasContext *ctx, arg_lu52i_d *a)
> +{
> +    TCGv t0, t1;
> +    TCGv Rd = cpu_gpr[a->rd];
> +
> +    if (a->rd == 0) {
> +        /* Nop */
> +        return true;
> +    }
> +
> +    t0 = tcg_temp_new();
> +    t1 = tcg_temp_new();
> +
> +    gen_load_gpr(t1, a->rj);
> +
> +    tcg_gen_movi_tl(t0, a->si12);
> +    tcg_gen_shli_tl(t0, t0, 52);
> +    tcg_gen_andi_tl(t1, t1, 0xfffffffffffffU);
> +    tcg_gen_or_tl(Rd, t0, t1);

Definitely better as

   tcg_gen_deposit_tl(Rd, Rd, tcg_constant_tl(a->si12), 52, 12);

> +static bool trans_addi_w(DisasContext *ctx, arg_addi_w *a)
> +{
> +    TCGv Rd = cpu_gpr[a->rd];
> +    TCGv Rj = cpu_gpr[a->rj];
> +    target_ulong uimm = (target_long)(a->si12);
> +
> +    if (a->rd == 0) {
> +        /* Nop */
> +        return true;
> +    }
> +
> +    if (a->rj != 0) {
> +        tcg_gen_addi_tl(Rd, Rj, uimm);
> +        tcg_gen_ext32s_tl(Rd, Rd);
> +    } else {
> +        tcg_gen_movi_tl(Rd, uimm);
> +    }
> +
> +    return true;
> +}

Again, there should be a common function for all of the two-register-immediate operations. 
  The callback here is exactly the same as for trans_add_w.

> +static bool trans_xori(DisasContext *ctx, arg_xori *a)
> +{
> +    TCGv Rd = cpu_gpr[a->rd];
> +    TCGv Rj = cpu_gpr[a->rj];
> +
> +    target_ulong uimm = (uint16_t)(a->ui12);

You shouldn't need these sorts of casts.


r~
Song Gao July 26, 2021, 11:56 a.m. UTC | #5
Hi, Richard.

On 07/23/2021 08:46 AM, Richard Henderson wrote:
> On 7/20/21 11:53 PM, Song Gao wrote:
>> +/* Fixed point arithmetic operation instruction translation */
>> +static bool trans_add_w(DisasContext *ctx, arg_add_w *a)
>> +{
>> +    TCGv Rd = cpu_gpr[a->rd];
>> +    TCGv Rj = cpu_gpr[a->rj];
>> +    TCGv Rk = cpu_gpr[a->rk];
>> +
>> +    if (a->rd == 0) {
>> +        /* Nop */
>> +        return true;
>> +    }
>> +
>> +    if (a->rj != 0 && a->rk != 0) {
>> +        tcg_gen_add_tl(Rd, Rj, Rk);
>> +        tcg_gen_ext32s_tl(Rd, Rd);
>> +    } else if (a->rj == 0 && a->rk != 0) {
>> +        tcg_gen_mov_tl(Rd, Rk);
>> +    } else if (a->rj != 0 && a->rk == 0) {
>> +        tcg_gen_mov_tl(Rd, Rj);
>> +    } else {
>> +        tcg_gen_movi_tl(Rd, 0);
>> +    }
>> +
>> +    return true;
>> +}
> 
> Do not do all of this "if reg(n) zero" testing.
> 
> Use a common function to perform the gpr lookup, and a small callback function for the operation.  Often, the callback function already exists within include/tcg/tcg-op.h.
> 
> Please see my riscv cleanup patch set I referenced vs patch 6.

I am not sure  that 'riscv cleanup' patchs at:
  
   https://patchew.org/QEMU/20210709042608.883256-1-richard.henderson@linaro.org 

It seems that  gpr_dst/gpr_src are common function to perform the gpr lookup. is that right? 


> 
>> +static bool trans_orn(DisasContext *ctx, arg_orn *a)
>> +{
>> +    TCGv Rd = cpu_gpr[a->rd];
>> +    TCGv Rj = cpu_gpr[a->rj];
>> +
>> +    if (a->rd == 0) {
>> +        /* Nop */
>> +        return true;
>> +    }
>> +
>> +    TCGv t0 = tcg_temp_new();
>> +    gen_load_gpr(t0, a->rk);
>> +
>> +    tcg_gen_not_tl(t0, t0);
>> +    tcg_gen_or_tl(Rd, Rj, t0);
> 
> tcg_gen_orc_tl.
> 
OK.
>> +static bool trans_andn(DisasContext *ctx, arg_andn *a)
>> +{
>> +    TCGv Rd = cpu_gpr[a->rd];
>> +    TCGv Rj = cpu_gpr[a->rj];
>> +
>> +    if (a->rd == 0) {
>> +        /* Nop */
>> +        return true;
>> +    }
>> +
>> +    TCGv t0 = tcg_temp_new();
>> +    gen_load_gpr(t0, a->rk);
>> +
>> +    tcg_gen_not_tl(t0, t0);
>> +    tcg_gen_and_tl(Rd, Rj, t0);
> 
> tcg_gen_andc_tl.
> 
OK.

>> +static bool trans_mul_d(DisasContext *ctx, arg_mul_d *a)
>> +{
>> +    TCGv t0, t1;
>> +    TCGv Rd = cpu_gpr[a->rd];
>> +
>> +    if (a->rd == 0) {
>> +        /* Nop */
>> +        return true;
>> +    }
>> +
>> +    t0 = get_gpr(a->rj);
>> +    t1 = get_gpr(a->rk);
>> +
>> +    check_loongarch_64(ctx);
> 
> Architecture checks go first, before you've decided the operation is a nop.
> 
OK.

>> +static bool trans_mulh_d(DisasContext *ctx, arg_mulh_d *a)
>> +{
>> +    TCGv t0, t1, t2;
>> +    TCGv Rd = cpu_gpr[a->rd];
>> +
>> +    if (a->rd == 0) {
>> +        /* Nop */
>> +        return true;
>> +    }
>> +
>> +    t0 = get_gpr(a->rj);
>> +    t1 = get_gpr(a->rk);
>> +    t2 = tcg_temp_new();
>> +
>> +    check_loongarch_64(ctx);
>> +    tcg_gen_muls2_i64(t2, Rd, t0, t1);
> 
> If you actually supported LA32, you'd notice this doesn't compile.  Are you planning to support LA32 in the future?
> 
No. 
>> +static bool trans_lu32i_d(DisasContext *ctx, arg_lu32i_d *a)
>> +{
>> +    TCGv_i64 t0, t1;
>> +    TCGv Rd = cpu_gpr[a->rd];
>> +
>> +    if (a->rd == 0) {
>> +        /* Nop */
>> +        return true;
>> +    }
>> +
>> +    t0 = tcg_temp_new_i64();
>> +    t1 = tcg_temp_new_i64();
>> +
>> +    tcg_gen_movi_tl(t0, a->si20);
>> +    tcg_gen_concat_tl_i64(t1, Rd, t0);
>> +    tcg_gen_mov_tl(Rd, t1);
> 
> Hmm.  Better as
> 
>   tcg_gen_deposit_tl(Rd, Rd, tcg_constant_tl(a->si20), 32, 32);
>
OK.>> +static bool trans_lu52i_d(DisasContext *ctx, arg_lu52i_d *a)
>> +{
>> +    TCGv t0, t1;
>> +    TCGv Rd = cpu_gpr[a->rd];
>> +
>> +    if (a->rd == 0) {
>> +        /* Nop */
>> +        return true;
>> +    }
>> +
>> +    t0 = tcg_temp_new();
>> +    t1 = tcg_temp_new();
>> +
>> +    gen_load_gpr(t1, a->rj);
>> +
>> +    tcg_gen_movi_tl(t0, a->si12);
>> +    tcg_gen_shli_tl(t0, t0, 52);
>> +    tcg_gen_andi_tl(t1, t1, 0xfffffffffffffU);
>> +    tcg_gen_or_tl(Rd, t0, t1);
> 
> Definitely better as
> 
>   tcg_gen_deposit_tl(Rd, Rd, tcg_constant_tl(a->si12), 52, 12);
> 
OK.
>> +static bool trans_addi_w(DisasContext *ctx, arg_addi_w *a)
>> +{
>> +    TCGv Rd = cpu_gpr[a->rd];
>> +    TCGv Rj = cpu_gpr[a->rj];
>> +    target_ulong uimm = (target_long)(a->si12);
>> +
>> +    if (a->rd == 0) {
>> +        /* Nop */
>> +        return true;
>> +    }
>> +
>> +    if (a->rj != 0) {
>> +        tcg_gen_addi_tl(Rd, Rj, uimm);
>> +        tcg_gen_ext32s_tl(Rd, Rd);
>> +    } else {
>> +        tcg_gen_movi_tl(Rd, uimm);
>> +    }
>> +
>> +    return true;
>> +}
> 
> Again, there should be a common function for all of the two-register-immediate operations.  The callback here is exactly the same as for trans_add_w.
> 
OK.
>> +static bool trans_xori(DisasContext *ctx, arg_xori *a)
>> +{
>> +    TCGv Rd = cpu_gpr[a->rd];
>> +    TCGv Rj = cpu_gpr[a->rj];
>> +
>> +    target_ulong uimm = (uint16_t)(a->ui12);
> 
> You shouldn't need these sorts of casts.
> 
OK. 

Thank you kindly help.

Thanks
Song Gao
Richard Henderson July 26, 2021, 3:53 p.m. UTC | #6
On 7/26/21 1:56 AM, Song Gao wrote:
> Hi, Richard.
> 
> On 07/23/2021 08:46 AM, Richard Henderson wrote:
>> On 7/20/21 11:53 PM, Song Gao wrote:
>>> +/* Fixed point arithmetic operation instruction translation */
>>> +static bool trans_add_w(DisasContext *ctx, arg_add_w *a)
>>> +{
>>> +    TCGv Rd = cpu_gpr[a->rd];
>>> +    TCGv Rj = cpu_gpr[a->rj];
>>> +    TCGv Rk = cpu_gpr[a->rk];
>>> +
>>> +    if (a->rd == 0) {
>>> +        /* Nop */
>>> +        return true;
>>> +    }
>>> +
>>> +    if (a->rj != 0 && a->rk != 0) {
>>> +        tcg_gen_add_tl(Rd, Rj, Rk);
>>> +        tcg_gen_ext32s_tl(Rd, Rd);
>>> +    } else if (a->rj == 0 && a->rk != 0) {
>>> +        tcg_gen_mov_tl(Rd, Rk);
>>> +    } else if (a->rj != 0 && a->rk == 0) {
>>> +        tcg_gen_mov_tl(Rd, Rj);
>>> +    } else {
>>> +        tcg_gen_movi_tl(Rd, 0);
>>> +    }
>>> +
>>> +    return true;
>>> +}
>>
>> Do not do all of this "if reg(n) zero" testing.
>>
>> Use a common function to perform the gpr lookup, and a small callback function for the operation.  Often, the callback function already exists within include/tcg/tcg-op.h.
>>
>> Please see my riscv cleanup patch set I referenced vs patch 6.
> 
> I am not sure  that 'riscv cleanup' patchs at:
>    
>     https://patchew.org/QEMU/20210709042608.883256-1-richard.henderson@linaro.org
> 
> It seems that  gpr_dst/gpr_src are common function to perform the gpr lookup. is that right?

More than that.  The gen_arith() function, for example, performs all of the bookkeeping 
for a binary operation.

For example,

static bool gen_arith(DisasContext *ctx, arg_fmt_rdrjrk *a,
                       void (*func)(TCGv, TCGv, TCGv))
{
    TCGv dest = gpr_dst(ctx, a->rd);
    TCGv src1 = gpr_src(ctx, a->rj);
    TCGv src2 = gpr_src(ctx, a->rk);

     func(dest, src1, src2);
     return true;
}

#define TRANS(NAME, FUNC, ...) \
     static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \
     { return FUNC(ctx, a, __VA_ARGS__); }

static void gen_add_w(TCGv dest, TCGv src1, TCGv src2)
{
     tcg_gen_add_tl(dest, src1, src2);
     tcg_gen_ext32s_tl(dest, dest);
}

TRANS(add_w, gen_arith, gen_add_w)
TRANS(add_d, gen_arith, tcg_gen_add_tl)


r~
Song Gao July 27, 2021, 1:51 a.m. UTC | #7
Hi, Richard.

On 07/26/2021 11:53 PM, Richard Henderson wrote:
> On 7/26/21 1:56 AM, Song Gao wrote:
>> Hi, Richard.
>>
>> On 07/23/2021 08:46 AM, Richard Henderson wrote:
>>> On 7/20/21 11:53 PM, Song Gao wrote:
>>>> +/* Fixed point arithmetic operation instruction translation */
>>>> +static bool trans_add_w(DisasContext *ctx, arg_add_w *a)
>>>> +{
>>>> +    TCGv Rd = cpu_gpr[a->rd];
>>>> +    TCGv Rj = cpu_gpr[a->rj];
>>>> +    TCGv Rk = cpu_gpr[a->rk];
>>>> +
>>>> +    if (a->rd == 0) {
>>>> +        /* Nop */
>>>> +        return true;
>>>> +    }
>>>> +
>>>> +    if (a->rj != 0 && a->rk != 0) {
>>>> +        tcg_gen_add_tl(Rd, Rj, Rk);
>>>> +        tcg_gen_ext32s_tl(Rd, Rd);
>>>> +    } else if (a->rj == 0 && a->rk != 0) {
>>>> +        tcg_gen_mov_tl(Rd, Rk);
>>>> +    } else if (a->rj != 0 && a->rk == 0) {
>>>> +        tcg_gen_mov_tl(Rd, Rj);
>>>> +    } else {
>>>> +        tcg_gen_movi_tl(Rd, 0);
>>>> +    }
>>>> +
>>>> +    return true;
>>>> +}
>>>
>>> Do not do all of this "if reg(n) zero" testing.
>>>
>>> Use a common function to perform the gpr lookup, and a small callback function for the operation.  Often, the callback function already exists within include/tcg/tcg-op.h.
>>>
>>> Please see my riscv cleanup patch set I referenced vs patch 6.
>>
>> I am not sure  that 'riscv cleanup' patchs at:
>>        https://patchew.org/QEMU/20210709042608.883256-1-richard.henderson@linaro.org
>>
>> It seems that  gpr_dst/gpr_src are common function to perform the gpr lookup. is that right?
> 
> More than that.  The gen_arith() function, for example, performs all of the bookkeeping for a binary operation.
> 
> For example,
> 
> static bool gen_arith(DisasContext *ctx, arg_fmt_rdrjrk *a,
>                       void (*func)(TCGv, TCGv, TCGv))
> {
>    TCGv dest = gpr_dst(ctx, a->rd);
>    TCGv src1 = gpr_src(ctx, a->rj);
>    TCGv src2 = gpr_src(ctx, a->rk);
> 
>     func(dest, src1, src2);
>     return true;
> }
> 
> #define TRANS(NAME, FUNC, ...) \
>     static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \
>     { return FUNC(ctx, a, __VA_ARGS__); }
> 
> static void gen_add_w(TCGv dest, TCGv src1, TCGv src2)
> {
>     tcg_gen_add_tl(dest, src1, src2);
>     tcg_gen_ext32s_tl(dest, dest);
> }
> 
> TRANS(add_w, gen_arith, gen_add_w)
> TRANS(add_d, gen_arith, tcg_gen_add_tl)
> 
> 
OK

Again, thank you kindly help.

Thanks
Song Gao.
diff mbox series

Patch

diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
new file mode 100644
index 0000000..1e0b755
--- /dev/null
+++ b/target/loongarch/insns.decode
@@ -0,0 +1,89 @@ 
+#
+# LoongArch instruction decode definitions.
+#
+# Copyright (c) 2021 Loongson Technology Corporation Limited
+#
+# SPDX-License-Identifier: LGPL-2.1+
+#
+
+#
+# Fields
+#
+%rd      0:5
+%rj      5:5
+%rk      10:5
+%sa2     15:2
+%si12    10:s12
+%ui12    10:12
+%si16    10:s16
+%si20    5:s20
+
+#
+# Argument sets
+#
+&fmt_rdrjrk         rd rj rk
+&fmt_rdrjsi12       rd rj si12
+&fmt_rdrjrksa2      rd rj rk sa2
+&fmt_rdrjsi16       rd rj si16
+&fmt_rdrjui12       rd rj ui12
+&fmt_rdsi20         rd si20
+
+#
+# Formats
+#
+@fmt_rdrjrk          .... ........ ..... ..... ..... .....    &fmt_rdrjrk         %rd %rj %rk
+@fmt_rdrjsi12        .... ...... ............ ..... .....     &fmt_rdrjsi12       %rd %rj %si12
+@fmt_rdrjui12        .... ...... ............ ..... .....     &fmt_rdrjui12       %rd %rj %ui12
+@fmt_rdrjrksa2       .... ........ ... .. ..... ..... .....   &fmt_rdrjrksa2      %rd %rj %rk %sa2
+@fmt_rdrjsi16        .... .. ................ ..... .....     &fmt_rdrjsi16       %rd %rj %si16
+@fmt_rdsi20          .... ... .................... .....      &fmt_rdsi20         %rd %si20
+
+#
+# Fixed point arithmetic operation instruction
+#
+add_w            0000 00000001 00000 ..... ..... .....    @fmt_rdrjrk
+add_d            0000 00000001 00001 ..... ..... .....    @fmt_rdrjrk
+sub_w            0000 00000001 00010 ..... ..... .....    @fmt_rdrjrk
+sub_d            0000 00000001 00011 ..... ..... .....    @fmt_rdrjrk
+slt              0000 00000001 00100 ..... ..... .....    @fmt_rdrjrk
+sltu             0000 00000001 00101 ..... ..... .....    @fmt_rdrjrk
+slti             0000 001000 ............ ..... .....     @fmt_rdrjsi12
+sltui            0000 001001 ............ ..... .....     @fmt_rdrjsi12
+nor              0000 00000001 01000 ..... ..... .....    @fmt_rdrjrk
+and              0000 00000001 01001 ..... ..... .....    @fmt_rdrjrk
+or               0000 00000001 01010 ..... ..... .....    @fmt_rdrjrk
+xor              0000 00000001 01011 ..... ..... .....    @fmt_rdrjrk
+orn              0000 00000001 01100 ..... ..... .....    @fmt_rdrjrk
+andn             0000 00000001 01101 ..... ..... .....    @fmt_rdrjrk
+mul_w            0000 00000001 11000 ..... ..... .....    @fmt_rdrjrk
+mulh_w           0000 00000001 11001 ..... ..... .....    @fmt_rdrjrk
+mulh_wu          0000 00000001 11010 ..... ..... .....    @fmt_rdrjrk
+mul_d            0000 00000001 11011 ..... ..... .....    @fmt_rdrjrk
+mulh_d           0000 00000001 11100 ..... ..... .....    @fmt_rdrjrk
+mulh_du          0000 00000001 11101 ..... ..... .....    @fmt_rdrjrk
+mulw_d_w         0000 00000001 11110 ..... ..... .....    @fmt_rdrjrk
+mulw_d_wu        0000 00000001 11111 ..... ..... .....    @fmt_rdrjrk
+div_w            0000 00000010 00000 ..... ..... .....    @fmt_rdrjrk
+mod_w            0000 00000010 00001 ..... ..... .....    @fmt_rdrjrk
+div_wu           0000 00000010 00010 ..... ..... .....    @fmt_rdrjrk
+mod_wu           0000 00000010 00011 ..... ..... .....    @fmt_rdrjrk
+div_d            0000 00000010 00100 ..... ..... .....    @fmt_rdrjrk
+mod_d            0000 00000010 00101 ..... ..... .....    @fmt_rdrjrk
+div_du           0000 00000010 00110 ..... ..... .....    @fmt_rdrjrk
+mod_du           0000 00000010 00111 ..... ..... .....    @fmt_rdrjrk
+alsl_w           0000 00000000 010 .. ..... ..... .....   @fmt_rdrjrksa2
+alsl_wu          0000 00000000 011 .. ..... ..... .....   @fmt_rdrjrksa2
+alsl_d           0000 00000010 110 .. ..... ..... .....   @fmt_rdrjrksa2
+lu12i_w          0001 010 .................... .....      @fmt_rdsi20
+lu32i_d          0001 011 .................... .....      @fmt_rdsi20
+lu52i_d          0000 001100 ............ ..... .....     @fmt_rdrjsi12
+pcaddi           0001 100 .................... .....      @fmt_rdsi20
+pcalau12i        0001 101 .................... .....      @fmt_rdsi20
+pcaddu12i        0001 110 .................... .....      @fmt_rdsi20
+pcaddu18i        0001 111 .................... .....      @fmt_rdsi20
+addi_w           0000 001010 ............ ..... .....     @fmt_rdrjsi12
+addi_d           0000 001011 ............ ..... .....     @fmt_rdrjsi12
+addu16i_d        0001 00 ................ ..... .....     @fmt_rdrjsi16
+andi             0000 001101 ............ ..... .....     @fmt_rdrjui12
+ori              0000 001110 ............ ..... .....     @fmt_rdrjui12
+xori             0000 001111 ............ ..... .....     @fmt_rdrjui12
diff --git a/target/loongarch/trans.inc.c b/target/loongarch/trans.inc.c
new file mode 100644
index 0000000..8faef62
--- /dev/null
+++ b/target/loongarch/trans.inc.c
@@ -0,0 +1,1090 @@ 
+/*
+ * LoongArch translate functions
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ *
+ * SPDX-License-Identifier: LGPL-2.1+
+ */
+
+/* Fixed point arithmetic operation instruction translation */
+static bool trans_add_w(DisasContext *ctx, arg_add_w *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    TCGv Rk = cpu_gpr[a->rk];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (a->rj != 0 && a->rk != 0) {
+        tcg_gen_add_tl(Rd, Rj, Rk);
+        tcg_gen_ext32s_tl(Rd, Rd);
+    } else if (a->rj == 0 && a->rk != 0) {
+        tcg_gen_mov_tl(Rd, Rk);
+    } else if (a->rj != 0 && a->rk == 0) {
+        tcg_gen_mov_tl(Rd, Rj);
+    } else {
+        tcg_gen_movi_tl(Rd, 0);
+    }
+
+    return true;
+}
+
+static bool trans_add_d(DisasContext *ctx, arg_add_d *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    TCGv Rk = cpu_gpr[a->rk];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    check_loongarch_64(ctx);
+    if (a->rj != 0 && a->rk != 0) {
+        tcg_gen_add_tl(Rd, Rj, Rk);
+    } else if (a->rj == 0 && a->rk != 0) {
+        tcg_gen_mov_tl(Rd, Rk);
+    } else if (a->rj != 0 && a->rk == 0) {
+        tcg_gen_mov_tl(Rd, Rj);
+    } else {
+        tcg_gen_movi_tl(Rd, 0);
+    }
+
+    return true;
+}
+
+static bool trans_sub_w(DisasContext *ctx, arg_sub_w *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    TCGv Rk = cpu_gpr[a->rk];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (a->rj != 0 && a->rk != 0) {
+        tcg_gen_sub_tl(Rd, Rj, Rk);
+        tcg_gen_ext32s_tl(Rd, Rd);
+    } else if (a->rj == 0 && a->rk != 0) {
+        tcg_gen_neg_tl(Rd, Rk);
+        tcg_gen_ext32s_tl(Rd, Rd);
+    } else if (a->rj != 0 && a->rk == 0) {
+        tcg_gen_mov_tl(Rd, Rj);
+    } else {
+        tcg_gen_movi_tl(Rd, 0);
+    }
+
+    return true;
+}
+
+static bool trans_sub_d(DisasContext *ctx, arg_sub_d *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    TCGv Rk = cpu_gpr[a->rk];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    check_loongarch_64(ctx);
+    if (a->rj != 0 && a->rk != 0) {
+        tcg_gen_sub_tl(Rd, Rj, Rk);
+    } else if (a->rj == 0 && a->rk != 0) {
+        tcg_gen_neg_tl(Rd, Rk);
+    } else if (a->rj != 0 && a->rk == 0) {
+        tcg_gen_mov_tl(Rd, Rj);
+    } else {
+        tcg_gen_movi_tl(Rd, 0);
+    }
+
+    return true;
+}
+
+static bool trans_slt(DisasContext *ctx, arg_slt *a)
+{
+    TCGv t0, t1;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+
+    tcg_gen_setcond_tl(TCG_COND_LT, Rd, t0, t1);
+
+    return true;
+}
+
+static bool trans_sltu(DisasContext *ctx, arg_sltu *a)
+{
+
+    TCGv t0, t1;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+
+    tcg_gen_setcond_tl(TCG_COND_LTU, Rd, t0, t1);
+
+    return true;
+}
+
+static bool trans_slti(DisasContext *ctx, arg_slti *a)
+{
+    TCGv t0;
+    TCGv Rd = cpu_gpr[a->rd];
+    target_ulong uimm = (target_long)(a->si12);
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+
+    tcg_gen_setcondi_tl(TCG_COND_LT, Rd, t0, uimm);
+
+    return true;
+}
+
+static bool trans_sltui(DisasContext *ctx, arg_sltui *a)
+{
+    TCGv t0;
+    TCGv Rd = cpu_gpr[a->rd];
+    target_ulong uimm = (target_long)(a->si12);
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+
+    tcg_gen_setcondi_tl(TCG_COND_LTU, Rd, t0, uimm);
+
+    return true;
+}
+
+static bool trans_nor(DisasContext *ctx, arg_nor *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    TCGv Rk = cpu_gpr[a->rk];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (a->rj != 0 && a->rk != 0) {
+        tcg_gen_nor_tl(Rd, Rj, Rk);
+    } else if (a->rj == 0 && a->rk != 0) {
+        tcg_gen_not_tl(Rd, Rk);
+    } else if (a->rj != 0 && a->rk == 0) {
+        tcg_gen_not_tl(Rd, Rj);
+    } else {
+        tcg_gen_movi_tl(Rd, ~((target_ulong)0));
+    }
+
+    return true;
+}
+
+static bool trans_and(DisasContext *ctx, arg_and *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    TCGv Rk = cpu_gpr[a->rk];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (likely(a->rj != 0 && a->rk != 0)) {
+        tcg_gen_and_tl(Rd, Rj, Rk);
+    } else {
+        tcg_gen_movi_tl(Rd, 0);
+    }
+
+    return true;
+}
+
+static bool trans_or(DisasContext *ctx, arg_or *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    TCGv Rk = cpu_gpr[a->rk];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (likely(a->rj != 0 && a->rk != 0)) {
+        tcg_gen_or_tl(Rd, Rj, Rk);
+    } else if (a->rj == 0 && a->rk != 0) {
+        tcg_gen_mov_tl(Rd, Rk);
+    } else if (a->rj != 0 && a->rk == 0) {
+        tcg_gen_mov_tl(Rd, Rj);
+    } else {
+        tcg_gen_movi_tl(Rd, 0);
+    }
+
+    return true;
+}
+
+static bool trans_xor(DisasContext *ctx, arg_xor *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    TCGv Rk = cpu_gpr[a->rk];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (likely(a->rj != 0 && a->rk != 0)) {
+        tcg_gen_xor_tl(Rd, Rj, Rk);
+    } else if (a->rj == 0 && a->rk != 0) {
+        tcg_gen_mov_tl(Rd, Rk);
+    } else if (a->rj != 0 && a->rk == 0) {
+        tcg_gen_mov_tl(Rd, Rj);
+    } else {
+        tcg_gen_movi_tl(Rd, 0);
+    }
+
+    return true;
+}
+
+static bool trans_orn(DisasContext *ctx, arg_orn *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    TCGv t0 = tcg_temp_new();
+    gen_load_gpr(t0, a->rk);
+
+    tcg_gen_not_tl(t0, t0);
+    tcg_gen_or_tl(Rd, Rj, t0);
+
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_andn(DisasContext *ctx, arg_andn *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    TCGv t0 = tcg_temp_new();
+    gen_load_gpr(t0, a->rk);
+
+    tcg_gen_not_tl(t0, t0);
+    tcg_gen_and_tl(Rd, Rj, t0);
+
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_mul_w(DisasContext *ctx, arg_mul_w *a)
+{
+    TCGv t0, t1;
+    TCGv_i32 t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+    t2 = tcg_temp_new_i32();
+    t3 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t2, t0);
+    tcg_gen_trunc_tl_i32(t3, t1);
+    tcg_gen_mul_i32(t2, t2, t3);
+    tcg_gen_ext_i32_tl(Rd, t2);
+
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+
+    return true;
+}
+
+static bool trans_mulh_w(DisasContext *ctx, arg_mulh_w *a)
+{
+    TCGv t0, t1;
+    TCGv_i32 t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+    t2 = tcg_temp_new_i32();
+    t3 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t2, t0);
+    tcg_gen_trunc_tl_i32(t3, t1);
+    tcg_gen_muls2_i32(t2, t3, t2, t3);
+    tcg_gen_ext_i32_tl(Rd, t3);
+
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+
+    return true;
+}
+
+static bool trans_mulh_wu(DisasContext *ctx, arg_mulh_wu *a)
+{
+    TCGv t0, t1;
+    TCGv_i32 t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+    t2 = tcg_temp_new_i32();
+    t3 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t2, t0);
+    tcg_gen_trunc_tl_i32(t3, t1);
+    tcg_gen_mulu2_i32(t2, t3, t2, t3);
+    tcg_gen_ext_i32_tl(Rd, t3);
+
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+
+    return true;
+}
+
+static bool trans_mul_d(DisasContext *ctx, arg_mul_d *a)
+{
+    TCGv t0, t1;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+
+    check_loongarch_64(ctx);
+    tcg_gen_mul_i64(Rd, t0, t1);
+
+    return true;
+}
+
+static bool trans_mulh_d(DisasContext *ctx, arg_mulh_d *a)
+{
+    TCGv t0, t1, t2;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+    t2 = tcg_temp_new();
+
+    check_loongarch_64(ctx);
+    tcg_gen_muls2_i64(t2, Rd, t0, t1);
+
+    tcg_temp_free(t2);
+
+    return true;
+}
+
+static bool trans_mulh_du(DisasContext *ctx, arg_mulh_du *a)
+{
+    TCGv t0, t1, t2;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+    t2 = tcg_temp_new();
+
+    check_loongarch_64(ctx);
+    tcg_gen_mulu2_i64(t2, Rd, t0, t1);
+
+    tcg_temp_free(t2);
+
+    return true;
+}
+
+static bool trans_mulw_d_w(DisasContext *ctx, arg_mulw_d_w *a)
+{
+    TCGv_i64 t0, t1, t2;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+
+    gen_load_gpr(t0, a->rj);
+    gen_load_gpr(t1, a->rk);
+
+    tcg_gen_ext32s_i64(t0, t0);
+    tcg_gen_ext32s_i64(t1, t1);
+    tcg_gen_mul_i64(t2, t0, t1);
+    tcg_gen_mov_tl(Rd, t2);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+
+    return true;
+}
+
+static bool trans_mulw_d_wu(DisasContext *ctx, arg_mulw_d_wu *a)
+{
+    TCGv_i64 t0, t1, t2;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+
+    gen_load_gpr(t0, a->rj);
+    gen_load_gpr(t1, a->rk);
+
+    tcg_gen_ext32u_i64(t0, t0);
+    tcg_gen_ext32u_i64(t1, t1);
+    tcg_gen_mul_i64(t2, t0, t1);
+    tcg_gen_mov_tl(Rd, t2);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+
+    return true;
+}
+
+static bool trans_div_w(DisasContext *ctx, arg_div_w *a)
+{
+    TCGv t0, t1, t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    gen_load_gpr(t0, a->rj);
+    gen_load_gpr(t1, a->rk);
+
+    tcg_gen_ext32s_tl(t0, t0);
+    tcg_gen_ext32s_tl(t1, t1);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+    tcg_gen_and_tl(t2, t2, t3);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+    tcg_gen_or_tl(t2, t2, t3);
+    tcg_gen_movi_tl(t3, 0);
+    tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+    tcg_gen_div_tl(Rd, t0, t1);
+    tcg_gen_ext32s_tl(Rd, Rd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+
+    return true;
+}
+
+static bool trans_mod_w(DisasContext *ctx, arg_mod_w *a)
+{
+    TCGv t0, t1, t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    gen_load_gpr(t0, a->rj);
+    gen_load_gpr(t1, a->rk);
+
+    tcg_gen_ext32s_tl(t0, t0);
+    tcg_gen_ext32s_tl(t1, t1);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+    tcg_gen_and_tl(t2, t2, t3);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+    tcg_gen_or_tl(t2, t2, t3);
+    tcg_gen_movi_tl(t3, 0);
+    tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+    tcg_gen_rem_tl(Rd, t0, t1);
+    tcg_gen_ext32s_tl(Rd, Rd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+
+    return true;
+}
+
+static bool trans_div_wu(DisasContext *ctx, arg_div_wu *a)
+{
+    TCGv t0, t1, t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_const_tl(0);
+    t3 = tcg_const_tl(1);
+
+    gen_load_gpr(t0, a->rj);
+    gen_load_gpr(t1, a->rk);
+
+    tcg_gen_ext32u_tl(t0, t0);
+    tcg_gen_ext32u_tl(t1, t1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+    tcg_gen_divu_tl(Rd, t0, t1);
+    tcg_gen_ext32s_tl(Rd, Rd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+
+    return true;
+}
+
+static bool trans_mod_wu(DisasContext *ctx, arg_mod_wu *a)
+{
+    TCGv t0, t1, t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_const_tl(0);
+    t3 = tcg_const_tl(1);
+
+    gen_load_gpr(t0, a->rj);
+    gen_load_gpr(t1, a->rk);
+
+    tcg_gen_ext32u_tl(t0, t0);
+    tcg_gen_ext32u_tl(t1, t1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+    tcg_gen_remu_tl(Rd, t0, t1);
+    tcg_gen_ext32s_tl(Rd, Rd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+
+    return true;
+}
+
+static bool trans_div_d(DisasContext *ctx, arg_div_d *a)
+{
+    TCGv t0, t1, t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    check_loongarch_64(ctx);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
+    tcg_gen_and_tl(t2, t2, t3);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+    tcg_gen_or_tl(t2, t2, t3);
+    tcg_gen_movi_tl(t3, 0);
+    tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+    tcg_gen_div_tl(Rd, t0, t1);
+
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+
+    return true;
+}
+
+static bool trans_mod_d(DisasContext *ctx, arg_mod_d *a)
+{
+    TCGv t0, t1, t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    check_loongarch_64(ctx);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
+    tcg_gen_and_tl(t2, t2, t3);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+    tcg_gen_or_tl(t2, t2, t3);
+    tcg_gen_movi_tl(t3, 0);
+    tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+    tcg_gen_rem_tl(Rd, t0, t1);
+
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+
+    return true;
+}
+
+static bool trans_div_du(DisasContext *ctx, arg_div_du *a)
+{
+    TCGv t0, t1, t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+    t2 = tcg_const_tl(0);
+    t3 = tcg_const_tl(1);
+
+    check_loongarch_64(ctx);
+    tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+    tcg_gen_divu_i64(Rd, t0, t1);
+
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+
+    return true;
+}
+
+static bool trans_mod_du(DisasContext *ctx, arg_mod_du *a)
+{
+    TCGv t0, t1, t2, t3;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = get_gpr(a->rj);
+    t1 = get_gpr(a->rk);
+    t2 = tcg_const_tl(0);
+    t3 = tcg_const_tl(1);
+
+    check_loongarch_64(ctx);
+    tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+    tcg_gen_remu_i64(Rd, t0, t1);
+
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+
+    return true;
+}
+
+static bool trans_alsl_w(DisasContext *ctx, arg_alsl_w *a)
+{
+    TCGv t0, t1;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = get_gpr(a->rk);
+
+    gen_load_gpr(t0, a->rj);
+
+    tcg_gen_shli_tl(t0, t0, a->sa2 + 1);
+    tcg_gen_add_tl(Rd, t0, t1);
+    tcg_gen_ext32s_tl(Rd, Rd);
+
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_alsl_wu(DisasContext *ctx, arg_alsl_wu *a)
+{
+    TCGv t0, t1;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = get_gpr(a->rk);
+
+    gen_load_gpr(t0, a->rj);
+
+    tcg_gen_shli_tl(t0, t0, a->sa2 + 1);
+    tcg_gen_add_tl(t0, t0, t1);
+    tcg_gen_ext32u_tl(Rd, t0);
+
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_alsl_d(DisasContext *ctx, arg_alsl_d *a)
+{
+    TCGv t0, t1;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = get_gpr(a->rk);
+
+    gen_load_gpr(t0, a->rj);
+
+    check_loongarch_64(ctx);
+    tcg_gen_shli_tl(t0, t0, a->sa2 + 1);
+    tcg_gen_add_tl(Rd, t0, t1);
+
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_lu12i_w(DisasContext *ctx, arg_lu12i_w *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    tcg_gen_movi_tl(Rd, a->si20 << 12);
+
+    return true;
+}
+
+static bool trans_lu32i_d(DisasContext *ctx, arg_lu32i_d *a)
+{
+    TCGv_i64 t0, t1;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    tcg_gen_movi_tl(t0, a->si20);
+    tcg_gen_concat_tl_i64(t1, Rd, t0);
+    tcg_gen_mov_tl(Rd, t1);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+
+    return true;
+}
+
+static bool trans_lu52i_d(DisasContext *ctx, arg_lu52i_d *a)
+{
+    TCGv t0, t1;
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    gen_load_gpr(t1, a->rj);
+
+    tcg_gen_movi_tl(t0, a->si12);
+    tcg_gen_shli_tl(t0, t0, 52);
+    tcg_gen_andi_tl(t1, t1, 0xfffffffffffffU);
+    tcg_gen_or_tl(Rd, t0, t1);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+
+    return true;
+}
+
+static bool trans_pcaddi(DisasContext *ctx, arg_pcaddi *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    target_ulong pc = ctx->base.pc_next;
+    target_ulong addr = pc + (a->si20 << 2);
+    tcg_gen_movi_tl(Rd, addr);
+
+    return true;
+}
+
+static bool trans_pcalau12i(DisasContext *ctx, arg_pcalau12i *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    target_ulong pc = ctx->base.pc_next;
+    target_ulong addr = (pc + (a->si20 << 12)) & ~0xfff;
+    tcg_gen_movi_tl(Rd, addr);
+
+    return true;
+}
+
+static bool trans_pcaddu12i(DisasContext *ctx, arg_pcaddu12i *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    target_ulong pc = ctx->base.pc_next;
+    target_ulong addr = pc + (a->si20 << 12);
+    tcg_gen_movi_tl(Rd, addr);
+
+    return true;
+}
+
+static bool trans_pcaddu18i(DisasContext *ctx, arg_pcaddu18i *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    target_ulong pc = ctx->base.pc_next;
+    target_ulong addr = pc + ((target_ulong)(a->si20) << 18);
+    tcg_gen_movi_tl(Rd, addr);
+
+    return true;
+}
+
+static bool trans_addi_w(DisasContext *ctx, arg_addi_w *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    target_ulong uimm = (target_long)(a->si12);
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (a->rj != 0) {
+        tcg_gen_addi_tl(Rd, Rj, uimm);
+        tcg_gen_ext32s_tl(Rd, Rd);
+    } else {
+        tcg_gen_movi_tl(Rd, uimm);
+    }
+
+    return true;
+}
+
+static bool trans_addi_d(DisasContext *ctx, arg_addi_d *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+    target_ulong uimm = (target_long)(a->si12);
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    check_loongarch_64(ctx);
+    if (a->rj != 0) {
+        tcg_gen_addi_tl(Rd, Rj, uimm);
+    } else {
+        tcg_gen_movi_tl(Rd, uimm);
+    }
+
+    return true;
+}
+
+static bool trans_addu16i_d(DisasContext *ctx, arg_addu16i_d *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (a->rj != 0) {
+        tcg_gen_addi_tl(Rd, Rj, a->si16 << 16);
+    } else {
+        tcg_gen_movi_tl(Rd, a->si16 << 16);
+    }
+    return true;
+}
+
+static bool trans_andi(DisasContext *ctx, arg_andi *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+
+    target_ulong uimm = (uint16_t)(a->ui12);
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (likely(a->rj != 0)) {
+        tcg_gen_andi_tl(Rd, Rj, uimm);
+    } else {
+        tcg_gen_movi_tl(Rd, 0);
+    }
+
+    return true;
+}
+
+static bool trans_ori(DisasContext *ctx, arg_ori *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+
+    target_ulong uimm = (uint16_t)(a->ui12);
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (a->rj != 0) {
+        tcg_gen_ori_tl(Rd, Rj, uimm);
+    } else {
+        tcg_gen_movi_tl(Rd, uimm);
+    }
+
+    return true;
+}
+
+static bool trans_xori(DisasContext *ctx, arg_xori *a)
+{
+    TCGv Rd = cpu_gpr[a->rd];
+    TCGv Rj = cpu_gpr[a->rj];
+
+    target_ulong uimm = (uint16_t)(a->ui12);
+
+    if (a->rd == 0) {
+        /* Nop */
+        return true;
+    }
+
+    if (likely(a->rj != 0)) {
+        tcg_gen_xori_tl(Rd, Rj, uimm);
+    } else {
+        tcg_gen_movi_tl(Rd, uimm);
+    }
+
+    return true;
+}
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index 531f7e1..b60bdc2 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -57,6 +57,15 @@  void gen_load_gpr(TCGv t, int reg)
     }
 }
 
+TCGv get_gpr(int regno)
+{
+    if (regno == 0) {
+        return tcg_constant_tl(0);
+    } else {
+        return cpu_gpr[regno];
+    }
+}
+
 static inline void gen_save_pc(target_ulong pc)
 {
     tcg_gen_movi_tl(cpu_PC, pc);
@@ -287,6 +296,9 @@  static bool loongarch_tr_breakpoint_check(DisasContextBase *dcbase,
     return true;
 }
 
+#include "decode-insns.c.inc"
+#include "trans.inc.c"
+
 static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
     CPULoongArchState *env = cs->env_ptr;
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index 333c3bf..ef4d4e7 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -35,6 +35,7 @@  void check_fpu_enabled(DisasContext *ctx);
 
 void gen_base_offset_addr(TCGv addr, int base, int offset);
 void gen_load_gpr(TCGv t, int reg);
+TCGv get_gpr(int regno);
 void gen_load_fpr32(TCGv_i32 t, int reg);
 void gen_load_fpr64(TCGv_i64 t, int reg);
 void gen_store_fpr32(TCGv_i32 t, int reg);