diff mbox series

[18/26] target/mips: Extract Loongson SIMD translation routines

Message ID 20201120210844.2625602-19-f4bug@amsat.org (mailing list archive)
State New, archived
Headers show
Series target/mips: Explode 60% of the 32K-lines translate.c | expand

Commit Message

Philippe Mathieu-Daudé Nov. 20, 2020, 9:08 p.m. UTC
LoongSIMD (formerly LoongMMI in Loongson 2E/F) is the
128-bit SIMD extension from the LoongISA.

Extract 600 lines of translation routines to
'vendor-loong-simd_translate.c.inc'.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
 target/mips/translate.c                       | 598 +----------------
 target/mips/vendor-loong-simd_translate.c.inc | 611 ++++++++++++++++++
 2 files changed, 612 insertions(+), 597 deletions(-)
 create mode 100644 target/mips/vendor-loong-simd_translate.c.inc

Comments

Philippe Mathieu-Daudé Nov. 21, 2020, 2:30 p.m. UTC | #1
On 11/20/20 10:08 PM, Philippe Mathieu-Daudé wrote:
> LoongSIMD (formerly LoongMMI in Loongson 2E/F) is the
> 128-bit SIMD extension from the LoongISA.
> 
> Extract 600 lines of translation routines to
> 'vendor-loong-simd_translate.c.inc'.
> 
> Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
> ---
>  target/mips/translate.c                       | 598 +----------------
>  target/mips/vendor-loong-simd_translate.c.inc | 611 ++++++++++++++++++
>  2 files changed, 612 insertions(+), 597 deletions(-)
>  create mode 100644 target/mips/vendor-loong-simd_translate.c.inc
> 
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index b01a16e9da4..105a104bb0c 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -330,19 +330,6 @@ enum {
>      OPC_MUL      = 0x02 | OPC_SPECIAL2,
>      OPC_MSUB     = 0x04 | OPC_SPECIAL2,
>      OPC_MSUBU    = 0x05 | OPC_SPECIAL2,
> -    /* Loongson 2F */
> -    OPC_MULT_G_2F   = 0x10 | OPC_SPECIAL2,
> -    OPC_DMULT_G_2F  = 0x11 | OPC_SPECIAL2,
> -    OPC_MULTU_G_2F  = 0x12 | OPC_SPECIAL2,
> -    OPC_DMULTU_G_2F = 0x13 | OPC_SPECIAL2,
> -    OPC_DIV_G_2F    = 0x14 | OPC_SPECIAL2,
> -    OPC_DDIV_G_2F   = 0x15 | OPC_SPECIAL2,
> -    OPC_DIVU_G_2F   = 0x16 | OPC_SPECIAL2,
> -    OPC_DDIVU_G_2F  = 0x17 | OPC_SPECIAL2,
> -    OPC_MOD_G_2F    = 0x1c | OPC_SPECIAL2,
> -    OPC_DMOD_G_2F   = 0x1d | OPC_SPECIAL2,
> -    OPC_MODU_G_2F   = 0x1e | OPC_SPECIAL2,
> -    OPC_DMODU_G_2F  = 0x1f | OPC_SPECIAL2,
>      /* Misc */
>      OPC_CLZ      = 0x20 | OPC_SPECIAL2,
>      OPC_CLO      = 0x21 | OPC_SPECIAL2,
> @@ -4569,590 +4556,6 @@ static void gen_cl(DisasContext *ctx, uint32_t opc,
>      }
>  }
>  
> -/* Godson integer instructions */
> -static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
> -                                 int rd, int rs, int rt)
> -{

This isn't part of the LoongSIMD ...

[...]
> -}
> -
> -/* Loongson multimedia instructions */
> -static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
> -{
[...]

... but this part is.
Richard Henderson Nov. 21, 2020, 8:09 p.m. UTC | #2
On 11/21/20 6:30 AM, Philippe Mathieu-Daudé wrote:
>> -/* Godson integer instructions */
>> -static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
>> -                                 int rd, int rs, int rt)
>> -{
> 
> This isn't part of the LoongSIMD ...
> 
> [...]
>> -}
>> -
>> -/* Loongson multimedia instructions */
>> -static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
>> -{
> [...]
> 
> ... but this part is.
> 

With gen_loongson_integer not moved,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~
diff mbox series

Patch

diff --git a/target/mips/translate.c b/target/mips/translate.c
index b01a16e9da4..105a104bb0c 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -330,19 +330,6 @@  enum {
     OPC_MUL      = 0x02 | OPC_SPECIAL2,
     OPC_MSUB     = 0x04 | OPC_SPECIAL2,
     OPC_MSUBU    = 0x05 | OPC_SPECIAL2,
-    /* Loongson 2F */
-    OPC_MULT_G_2F   = 0x10 | OPC_SPECIAL2,
-    OPC_DMULT_G_2F  = 0x11 | OPC_SPECIAL2,
-    OPC_MULTU_G_2F  = 0x12 | OPC_SPECIAL2,
-    OPC_DMULTU_G_2F = 0x13 | OPC_SPECIAL2,
-    OPC_DIV_G_2F    = 0x14 | OPC_SPECIAL2,
-    OPC_DDIV_G_2F   = 0x15 | OPC_SPECIAL2,
-    OPC_DIVU_G_2F   = 0x16 | OPC_SPECIAL2,
-    OPC_DDIVU_G_2F  = 0x17 | OPC_SPECIAL2,
-    OPC_MOD_G_2F    = 0x1c | OPC_SPECIAL2,
-    OPC_DMOD_G_2F   = 0x1d | OPC_SPECIAL2,
-    OPC_MODU_G_2F   = 0x1e | OPC_SPECIAL2,
-    OPC_DMODU_G_2F  = 0x1f | OPC_SPECIAL2,
     /* Misc */
     OPC_CLZ      = 0x20 | OPC_SPECIAL2,
     OPC_CLO      = 0x21 | OPC_SPECIAL2,
@@ -4569,590 +4556,6 @@  static void gen_cl(DisasContext *ctx, uint32_t opc,
     }
 }
 
-/* Godson integer instructions */
-static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
-                                 int rd, int rs, int rt)
-{
-    TCGv t0, t1;
-
-    if (rd == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    switch (opc) {
-    case OPC_MULT_G_2E:
-    case OPC_MULT_G_2F:
-    case OPC_MULTU_G_2E:
-    case OPC_MULTU_G_2F:
-#if defined(TARGET_MIPS64)
-    case OPC_DMULT_G_2E:
-    case OPC_DMULT_G_2F:
-    case OPC_DMULTU_G_2E:
-    case OPC_DMULTU_G_2F:
-#endif
-        t0 = tcg_temp_new();
-        t1 = tcg_temp_new();
-        break;
-    default:
-        t0 = tcg_temp_local_new();
-        t1 = tcg_temp_local_new();
-        break;
-    }
-
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-
-    switch (opc) {
-    case OPC_MULT_G_2E:
-    case OPC_MULT_G_2F:
-        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
-        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-        break;
-    case OPC_MULTU_G_2E:
-    case OPC_MULTU_G_2F:
-        tcg_gen_ext32u_tl(t0, t0);
-        tcg_gen_ext32u_tl(t1, t1);
-        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
-        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-        break;
-    case OPC_DIV_G_2E:
-    case OPC_DIV_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            TCGLabel *l3 = gen_new_label();
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l3);
-            gen_set_label(l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
-            tcg_gen_mov_tl(cpu_gpr[rd], t0);
-            tcg_gen_br(l3);
-            gen_set_label(l2);
-            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            gen_set_label(l3);
-        }
-        break;
-    case OPC_DIVU_G_2E:
-    case OPC_DIVU_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            gen_set_label(l2);
-        }
-        break;
-    case OPC_MOD_G_2E:
-    case OPC_MOD_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            TCGLabel *l3 = gen_new_label();
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l3);
-            gen_set_label(l2);
-            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            gen_set_label(l3);
-        }
-        break;
-    case OPC_MODU_G_2E:
-    case OPC_MODU_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            gen_set_label(l2);
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DMULT_G_2E:
-    case OPC_DMULT_G_2F:
-        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
-        break;
-    case OPC_DMULTU_G_2E:
-    case OPC_DMULTU_G_2F:
-        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
-        break;
-    case OPC_DDIV_G_2E:
-    case OPC_DDIV_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            TCGLabel *l3 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l3);
-            gen_set_label(l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-            tcg_gen_mov_tl(cpu_gpr[rd], t0);
-            tcg_gen_br(l3);
-            gen_set_label(l2);
-            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
-            gen_set_label(l3);
-        }
-        break;
-    case OPC_DDIVU_G_2E:
-    case OPC_DDIVU_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
-            gen_set_label(l2);
-        }
-        break;
-    case OPC_DMOD_G_2E:
-    case OPC_DMOD_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            TCGLabel *l3 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l3);
-            gen_set_label(l2);
-            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
-            gen_set_label(l3);
-        }
-        break;
-    case OPC_DMODU_G_2E:
-    case OPC_DMODU_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
-            gen_set_label(l2);
-        }
-        break;
-#endif
-    }
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-/* Loongson multimedia instructions */
-static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
-{
-    uint32_t opc, shift_max;
-    TCGv_i64 t0, t1;
-    TCGCond cond;
-
-    opc = MASK_LMMI(ctx->opcode);
-    switch (opc) {
-    case OPC_ADD_CP2:
-    case OPC_SUB_CP2:
-    case OPC_DADD_CP2:
-    case OPC_DSUB_CP2:
-        t0 = tcg_temp_local_new_i64();
-        t1 = tcg_temp_local_new_i64();
-        break;
-    default:
-        t0 = tcg_temp_new_i64();
-        t1 = tcg_temp_new_i64();
-        break;
-    }
-
-    check_cp1_enabled(ctx);
-    gen_load_fpr64(ctx, t0, rs);
-    gen_load_fpr64(ctx, t1, rt);
-
-    switch (opc) {
-    case OPC_PADDSH:
-        gen_helper_paddsh(t0, t0, t1);
-        break;
-    case OPC_PADDUSH:
-        gen_helper_paddush(t0, t0, t1);
-        break;
-    case OPC_PADDH:
-        gen_helper_paddh(t0, t0, t1);
-        break;
-    case OPC_PADDW:
-        gen_helper_paddw(t0, t0, t1);
-        break;
-    case OPC_PADDSB:
-        gen_helper_paddsb(t0, t0, t1);
-        break;
-    case OPC_PADDUSB:
-        gen_helper_paddusb(t0, t0, t1);
-        break;
-    case OPC_PADDB:
-        gen_helper_paddb(t0, t0, t1);
-        break;
-
-    case OPC_PSUBSH:
-        gen_helper_psubsh(t0, t0, t1);
-        break;
-    case OPC_PSUBUSH:
-        gen_helper_psubush(t0, t0, t1);
-        break;
-    case OPC_PSUBH:
-        gen_helper_psubh(t0, t0, t1);
-        break;
-    case OPC_PSUBW:
-        gen_helper_psubw(t0, t0, t1);
-        break;
-    case OPC_PSUBSB:
-        gen_helper_psubsb(t0, t0, t1);
-        break;
-    case OPC_PSUBUSB:
-        gen_helper_psubusb(t0, t0, t1);
-        break;
-    case OPC_PSUBB:
-        gen_helper_psubb(t0, t0, t1);
-        break;
-
-    case OPC_PSHUFH:
-        gen_helper_pshufh(t0, t0, t1);
-        break;
-    case OPC_PACKSSWH:
-        gen_helper_packsswh(t0, t0, t1);
-        break;
-    case OPC_PACKSSHB:
-        gen_helper_packsshb(t0, t0, t1);
-        break;
-    case OPC_PACKUSHB:
-        gen_helper_packushb(t0, t0, t1);
-        break;
-
-    case OPC_PUNPCKLHW:
-        gen_helper_punpcklhw(t0, t0, t1);
-        break;
-    case OPC_PUNPCKHHW:
-        gen_helper_punpckhhw(t0, t0, t1);
-        break;
-    case OPC_PUNPCKLBH:
-        gen_helper_punpcklbh(t0, t0, t1);
-        break;
-    case OPC_PUNPCKHBH:
-        gen_helper_punpckhbh(t0, t0, t1);
-        break;
-    case OPC_PUNPCKLWD:
-        gen_helper_punpcklwd(t0, t0, t1);
-        break;
-    case OPC_PUNPCKHWD:
-        gen_helper_punpckhwd(t0, t0, t1);
-        break;
-
-    case OPC_PAVGH:
-        gen_helper_pavgh(t0, t0, t1);
-        break;
-    case OPC_PAVGB:
-        gen_helper_pavgb(t0, t0, t1);
-        break;
-    case OPC_PMAXSH:
-        gen_helper_pmaxsh(t0, t0, t1);
-        break;
-    case OPC_PMINSH:
-        gen_helper_pminsh(t0, t0, t1);
-        break;
-    case OPC_PMAXUB:
-        gen_helper_pmaxub(t0, t0, t1);
-        break;
-    case OPC_PMINUB:
-        gen_helper_pminub(t0, t0, t1);
-        break;
-
-    case OPC_PCMPEQW:
-        gen_helper_pcmpeqw(t0, t0, t1);
-        break;
-    case OPC_PCMPGTW:
-        gen_helper_pcmpgtw(t0, t0, t1);
-        break;
-    case OPC_PCMPEQH:
-        gen_helper_pcmpeqh(t0, t0, t1);
-        break;
-    case OPC_PCMPGTH:
-        gen_helper_pcmpgth(t0, t0, t1);
-        break;
-    case OPC_PCMPEQB:
-        gen_helper_pcmpeqb(t0, t0, t1);
-        break;
-    case OPC_PCMPGTB:
-        gen_helper_pcmpgtb(t0, t0, t1);
-        break;
-
-    case OPC_PSLLW:
-        gen_helper_psllw(t0, t0, t1);
-        break;
-    case OPC_PSLLH:
-        gen_helper_psllh(t0, t0, t1);
-        break;
-    case OPC_PSRLW:
-        gen_helper_psrlw(t0, t0, t1);
-        break;
-    case OPC_PSRLH:
-        gen_helper_psrlh(t0, t0, t1);
-        break;
-    case OPC_PSRAW:
-        gen_helper_psraw(t0, t0, t1);
-        break;
-    case OPC_PSRAH:
-        gen_helper_psrah(t0, t0, t1);
-        break;
-
-    case OPC_PMULLH:
-        gen_helper_pmullh(t0, t0, t1);
-        break;
-    case OPC_PMULHH:
-        gen_helper_pmulhh(t0, t0, t1);
-        break;
-    case OPC_PMULHUH:
-        gen_helper_pmulhuh(t0, t0, t1);
-        break;
-    case OPC_PMADDHW:
-        gen_helper_pmaddhw(t0, t0, t1);
-        break;
-
-    case OPC_PASUBUB:
-        gen_helper_pasubub(t0, t0, t1);
-        break;
-    case OPC_BIADD:
-        gen_helper_biadd(t0, t0);
-        break;
-    case OPC_PMOVMSKB:
-        gen_helper_pmovmskb(t0, t0);
-        break;
-
-    case OPC_PADDD:
-        tcg_gen_add_i64(t0, t0, t1);
-        break;
-    case OPC_PSUBD:
-        tcg_gen_sub_i64(t0, t0, t1);
-        break;
-    case OPC_XOR_CP2:
-        tcg_gen_xor_i64(t0, t0, t1);
-        break;
-    case OPC_NOR_CP2:
-        tcg_gen_nor_i64(t0, t0, t1);
-        break;
-    case OPC_AND_CP2:
-        tcg_gen_and_i64(t0, t0, t1);
-        break;
-    case OPC_OR_CP2:
-        tcg_gen_or_i64(t0, t0, t1);
-        break;
-
-    case OPC_PANDN:
-        tcg_gen_andc_i64(t0, t1, t0);
-        break;
-
-    case OPC_PINSRH_0:
-        tcg_gen_deposit_i64(t0, t0, t1, 0, 16);
-        break;
-    case OPC_PINSRH_1:
-        tcg_gen_deposit_i64(t0, t0, t1, 16, 16);
-        break;
-    case OPC_PINSRH_2:
-        tcg_gen_deposit_i64(t0, t0, t1, 32, 16);
-        break;
-    case OPC_PINSRH_3:
-        tcg_gen_deposit_i64(t0, t0, t1, 48, 16);
-        break;
-
-    case OPC_PEXTRH:
-        tcg_gen_andi_i64(t1, t1, 3);
-        tcg_gen_shli_i64(t1, t1, 4);
-        tcg_gen_shr_i64(t0, t0, t1);
-        tcg_gen_ext16u_i64(t0, t0);
-        break;
-
-    case OPC_ADDU_CP2:
-        tcg_gen_add_i64(t0, t0, t1);
-        tcg_gen_ext32s_i64(t0, t0);
-        break;
-    case OPC_SUBU_CP2:
-        tcg_gen_sub_i64(t0, t0, t1);
-        tcg_gen_ext32s_i64(t0, t0);
-        break;
-
-    case OPC_SLL_CP2:
-        shift_max = 32;
-        goto do_shift;
-    case OPC_SRL_CP2:
-        shift_max = 32;
-        goto do_shift;
-    case OPC_SRA_CP2:
-        shift_max = 32;
-        goto do_shift;
-    case OPC_DSLL_CP2:
-        shift_max = 64;
-        goto do_shift;
-    case OPC_DSRL_CP2:
-        shift_max = 64;
-        goto do_shift;
-    case OPC_DSRA_CP2:
-        shift_max = 64;
-        goto do_shift;
-    do_shift:
-        /* Make sure shift count isn't TCG undefined behaviour.  */
-        tcg_gen_andi_i64(t1, t1, shift_max - 1);
-
-        switch (opc) {
-        case OPC_SLL_CP2:
-        case OPC_DSLL_CP2:
-            tcg_gen_shl_i64(t0, t0, t1);
-            break;
-        case OPC_SRA_CP2:
-        case OPC_DSRA_CP2:
-            /*
-             * Since SRA is UndefinedResult without sign-extended inputs,
-             * we can treat SRA and DSRA the same.
-             */
-            tcg_gen_sar_i64(t0, t0, t1);
-            break;
-        case OPC_SRL_CP2:
-            /* We want to shift in zeros for SRL; zero-extend first.  */
-            tcg_gen_ext32u_i64(t0, t0);
-            /* FALLTHRU */
-        case OPC_DSRL_CP2:
-            tcg_gen_shr_i64(t0, t0, t1);
-            break;
-        }
-
-        if (shift_max == 32) {
-            tcg_gen_ext32s_i64(t0, t0);
-        }
-
-        /* Shifts larger than MAX produce zero.  */
-        tcg_gen_setcondi_i64(TCG_COND_LTU, t1, t1, shift_max);
-        tcg_gen_neg_i64(t1, t1);
-        tcg_gen_and_i64(t0, t0, t1);
-        break;
-
-    case OPC_ADD_CP2:
-    case OPC_DADD_CP2:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGLabel *lab = gen_new_label();
-
-            tcg_gen_mov_i64(t2, t0);
-            tcg_gen_add_i64(t0, t1, t2);
-            if (opc == OPC_ADD_CP2) {
-                tcg_gen_ext32s_i64(t0, t0);
-            }
-            tcg_gen_xor_i64(t1, t1, t2);
-            tcg_gen_xor_i64(t2, t2, t0);
-            tcg_gen_andc_i64(t1, t2, t1);
-            tcg_temp_free_i64(t2);
-            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(lab);
-            break;
-        }
-
-    case OPC_SUB_CP2:
-    case OPC_DSUB_CP2:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGLabel *lab = gen_new_label();
-
-            tcg_gen_mov_i64(t2, t0);
-            tcg_gen_sub_i64(t0, t1, t2);
-            if (opc == OPC_SUB_CP2) {
-                tcg_gen_ext32s_i64(t0, t0);
-            }
-            tcg_gen_xor_i64(t1, t1, t2);
-            tcg_gen_xor_i64(t2, t2, t0);
-            tcg_gen_and_i64(t1, t1, t2);
-            tcg_temp_free_i64(t2);
-            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(lab);
-            break;
-        }
-
-    case OPC_PMULUW:
-        tcg_gen_ext32u_i64(t0, t0);
-        tcg_gen_ext32u_i64(t1, t1);
-        tcg_gen_mul_i64(t0, t0, t1);
-        break;
-
-    case OPC_SEQU_CP2:
-    case OPC_SEQ_CP2:
-        cond = TCG_COND_EQ;
-        goto do_cc_cond;
-        break;
-    case OPC_SLTU_CP2:
-        cond = TCG_COND_LTU;
-        goto do_cc_cond;
-        break;
-    case OPC_SLT_CP2:
-        cond = TCG_COND_LT;
-        goto do_cc_cond;
-        break;
-    case OPC_SLEU_CP2:
-        cond = TCG_COND_LEU;
-        goto do_cc_cond;
-        break;
-    case OPC_SLE_CP2:
-        cond = TCG_COND_LE;
-    do_cc_cond:
-        {
-            int cc = (ctx->opcode >> 8) & 0x7;
-            TCGv_i64 t64 = tcg_temp_new_i64();
-            TCGv_i32 t32 = tcg_temp_new_i32();
-
-            tcg_gen_setcond_i64(cond, t64, t0, t1);
-            tcg_gen_extrl_i64_i32(t32, t64);
-            tcg_gen_deposit_i32(fpu_fcr31, fpu_fcr31, t32,
-                                get_fp_bit(cc), 1);
-
-            tcg_temp_free_i32(t32);
-            tcg_temp_free_i64(t64);
-        }
-        goto no_rd;
-        break;
-    default:
-        MIPS_INVAL("loongson_cp2");
-        generate_exception_end(ctx, EXCP_RI);
-        return;
-    }
-
-    gen_store_fpr64(ctx, t0, rd);
-
-no_rd:
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
 static void gen_loongson_lswc2(DisasContext *ctx, int rt,
                                int rs, int rd)
 {
@@ -12939,6 +12342,7 @@  out:
 #include "mod-mips-dsp_translate.c.inc"
 
 #include "vendor-vr54xx_translate.c.inc"
+#include "vendor-loong-simd_translate.c.inc"
 
 static void decode_opc_special_r6(CPUMIPSState *env, DisasContext *ctx)
 {
diff --git a/target/mips/vendor-loong-simd_translate.c.inc b/target/mips/vendor-loong-simd_translate.c.inc
new file mode 100644
index 00000000000..6ea7e9a9b82
--- /dev/null
+++ b/target/mips/vendor-loong-simd_translate.c.inc
@@ -0,0 +1,611 @@ 
+/*
+ *  Loongson SIMD translation routines.
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *  Copyright (c) 2006 Marius Groeger (FPU operations)
+ *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
+ *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
+ *  Copyright (c) 2011 Richard Henderson <rth@twiddle.net>
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+enum {
+    /* Loongson 2F */
+    OPC_MULT_G_2F   = 0x10 | OPC_SPECIAL2,
+    OPC_DMULT_G_2F  = 0x11 | OPC_SPECIAL2,
+    OPC_MULTU_G_2F  = 0x12 | OPC_SPECIAL2,
+    OPC_DMULTU_G_2F = 0x13 | OPC_SPECIAL2,
+    OPC_DIV_G_2F    = 0x14 | OPC_SPECIAL2,
+    OPC_DDIV_G_2F   = 0x15 | OPC_SPECIAL2,
+    OPC_DIVU_G_2F   = 0x16 | OPC_SPECIAL2,
+    OPC_DDIVU_G_2F  = 0x17 | OPC_SPECIAL2,
+    OPC_MOD_G_2F    = 0x1c | OPC_SPECIAL2,
+    OPC_DMOD_G_2F   = 0x1d | OPC_SPECIAL2,
+    OPC_MODU_G_2F   = 0x1e | OPC_SPECIAL2,
+    OPC_DMODU_G_2F  = 0x1f | OPC_SPECIAL2,
+};
+
+/* Godson integer instructions */
+static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
+                                 int rd, int rs, int rt)
+{
+    TCGv t0, t1;
+
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    switch (opc) {
+    case OPC_MULT_G_2E:
+    case OPC_MULT_G_2F:
+    case OPC_MULTU_G_2E:
+    case OPC_MULTU_G_2F:
+#if defined(TARGET_MIPS64)
+    case OPC_DMULT_G_2E:
+    case OPC_DMULT_G_2F:
+    case OPC_DMULTU_G_2E:
+    case OPC_DMULTU_G_2F:
+#endif
+        t0 = tcg_temp_new();
+        t1 = tcg_temp_new();
+        break;
+    default:
+        t0 = tcg_temp_local_new();
+        t1 = tcg_temp_local_new();
+        break;
+    }
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    switch (opc) {
+    case OPC_MULT_G_2E:
+    case OPC_MULT_G_2F:
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        break;
+    case OPC_MULTU_G_2E:
+    case OPC_MULTU_G_2F:
+        tcg_gen_ext32u_tl(t0, t0);
+        tcg_gen_ext32u_tl(t1, t1);
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        break;
+    case OPC_DIV_G_2E:
+    case OPC_DIV_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            TCGLabel *l3 = gen_new_label();
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
+            tcg_gen_mov_tl(cpu_gpr[rd], t0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l3);
+        }
+        break;
+    case OPC_DIVU_G_2E:
+    case OPC_DIVU_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l2);
+        }
+        break;
+    case OPC_MOD_G_2E:
+    case OPC_MOD_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            TCGLabel *l3 = gen_new_label();
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l3);
+        }
+        break;
+    case OPC_MODU_G_2E:
+    case OPC_MODU_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l2);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DMULT_G_2E:
+    case OPC_DMULT_G_2F:
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        break;
+    case OPC_DMULTU_G_2E:
+    case OPC_DMULTU_G_2F:
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        break;
+    case OPC_DDIV_G_2E:
+    case OPC_DDIV_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            TCGLabel *l3 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
+            tcg_gen_mov_tl(cpu_gpr[rd], t0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l3);
+        }
+        break;
+    case OPC_DDIVU_G_2E:
+    case OPC_DDIVU_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l2);
+        }
+        break;
+    case OPC_DMOD_G_2E:
+    case OPC_DMOD_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            TCGLabel *l3 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l3);
+        }
+        break;
+    case OPC_DMODU_G_2E:
+    case OPC_DMODU_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l2);
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/* Loongson multimedia instructions */
+static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
+{
+    uint32_t opc, shift_max;
+    TCGv_i64 t0, t1;
+    TCGCond cond;
+
+    opc = MASK_LMMI(ctx->opcode);
+    switch (opc) {
+    case OPC_ADD_CP2:
+    case OPC_SUB_CP2:
+    case OPC_DADD_CP2:
+    case OPC_DSUB_CP2:
+        t0 = tcg_temp_local_new_i64();
+        t1 = tcg_temp_local_new_i64();
+        break;
+    default:
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+        break;
+    }
+
+    check_cp1_enabled(ctx);
+    gen_load_fpr64(ctx, t0, rs);
+    gen_load_fpr64(ctx, t1, rt);
+
+    switch (opc) {
+    case OPC_PADDSH:
+        gen_helper_paddsh(t0, t0, t1);
+        break;
+    case OPC_PADDUSH:
+        gen_helper_paddush(t0, t0, t1);
+        break;
+    case OPC_PADDH:
+        gen_helper_paddh(t0, t0, t1);
+        break;
+    case OPC_PADDW:
+        gen_helper_paddw(t0, t0, t1);
+        break;
+    case OPC_PADDSB:
+        gen_helper_paddsb(t0, t0, t1);
+        break;
+    case OPC_PADDUSB:
+        gen_helper_paddusb(t0, t0, t1);
+        break;
+    case OPC_PADDB:
+        gen_helper_paddb(t0, t0, t1);
+        break;
+
+    case OPC_PSUBSH:
+        gen_helper_psubsh(t0, t0, t1);
+        break;
+    case OPC_PSUBUSH:
+        gen_helper_psubush(t0, t0, t1);
+        break;
+    case OPC_PSUBH:
+        gen_helper_psubh(t0, t0, t1);
+        break;
+    case OPC_PSUBW:
+        gen_helper_psubw(t0, t0, t1);
+        break;
+    case OPC_PSUBSB:
+        gen_helper_psubsb(t0, t0, t1);
+        break;
+    case OPC_PSUBUSB:
+        gen_helper_psubusb(t0, t0, t1);
+        break;
+    case OPC_PSUBB:
+        gen_helper_psubb(t0, t0, t1);
+        break;
+
+    case OPC_PSHUFH:
+        gen_helper_pshufh(t0, t0, t1);
+        break;
+    case OPC_PACKSSWH:
+        gen_helper_packsswh(t0, t0, t1);
+        break;
+    case OPC_PACKSSHB:
+        gen_helper_packsshb(t0, t0, t1);
+        break;
+    case OPC_PACKUSHB:
+        gen_helper_packushb(t0, t0, t1);
+        break;
+
+    case OPC_PUNPCKLHW:
+        gen_helper_punpcklhw(t0, t0, t1);
+        break;
+    case OPC_PUNPCKHHW:
+        gen_helper_punpckhhw(t0, t0, t1);
+        break;
+    case OPC_PUNPCKLBH:
+        gen_helper_punpcklbh(t0, t0, t1);
+        break;
+    case OPC_PUNPCKHBH:
+        gen_helper_punpckhbh(t0, t0, t1);
+        break;
+    case OPC_PUNPCKLWD:
+        gen_helper_punpcklwd(t0, t0, t1);
+        break;
+    case OPC_PUNPCKHWD:
+        gen_helper_punpckhwd(t0, t0, t1);
+        break;
+
+    case OPC_PAVGH:
+        gen_helper_pavgh(t0, t0, t1);
+        break;
+    case OPC_PAVGB:
+        gen_helper_pavgb(t0, t0, t1);
+        break;
+    case OPC_PMAXSH:
+        gen_helper_pmaxsh(t0, t0, t1);
+        break;
+    case OPC_PMINSH:
+        gen_helper_pminsh(t0, t0, t1);
+        break;
+    case OPC_PMAXUB:
+        gen_helper_pmaxub(t0, t0, t1);
+        break;
+    case OPC_PMINUB:
+        gen_helper_pminub(t0, t0, t1);
+        break;
+
+    case OPC_PCMPEQW:
+        gen_helper_pcmpeqw(t0, t0, t1);
+        break;
+    case OPC_PCMPGTW:
+        gen_helper_pcmpgtw(t0, t0, t1);
+        break;
+    case OPC_PCMPEQH:
+        gen_helper_pcmpeqh(t0, t0, t1);
+        break;
+    case OPC_PCMPGTH:
+        gen_helper_pcmpgth(t0, t0, t1);
+        break;
+    case OPC_PCMPEQB:
+        gen_helper_pcmpeqb(t0, t0, t1);
+        break;
+    case OPC_PCMPGTB:
+        gen_helper_pcmpgtb(t0, t0, t1);
+        break;
+
+    case OPC_PSLLW:
+        gen_helper_psllw(t0, t0, t1);
+        break;
+    case OPC_PSLLH:
+        gen_helper_psllh(t0, t0, t1);
+        break;
+    case OPC_PSRLW:
+        gen_helper_psrlw(t0, t0, t1);
+        break;
+    case OPC_PSRLH:
+        gen_helper_psrlh(t0, t0, t1);
+        break;
+    case OPC_PSRAW:
+        gen_helper_psraw(t0, t0, t1);
+        break;
+    case OPC_PSRAH:
+        gen_helper_psrah(t0, t0, t1);
+        break;
+
+    case OPC_PMULLH:
+        gen_helper_pmullh(t0, t0, t1);
+        break;
+    case OPC_PMULHH:
+        gen_helper_pmulhh(t0, t0, t1);
+        break;
+    case OPC_PMULHUH:
+        gen_helper_pmulhuh(t0, t0, t1);
+        break;
+    case OPC_PMADDHW:
+        gen_helper_pmaddhw(t0, t0, t1);
+        break;
+
+    case OPC_PASUBUB:
+        gen_helper_pasubub(t0, t0, t1);
+        break;
+    case OPC_BIADD:
+        gen_helper_biadd(t0, t0);
+        break;
+    case OPC_PMOVMSKB:
+        gen_helper_pmovmskb(t0, t0);
+        break;
+
+    case OPC_PADDD:
+        tcg_gen_add_i64(t0, t0, t1);
+        break;
+    case OPC_PSUBD:
+        tcg_gen_sub_i64(t0, t0, t1);
+        break;
+    case OPC_XOR_CP2:
+        tcg_gen_xor_i64(t0, t0, t1);
+        break;
+    case OPC_NOR_CP2:
+        tcg_gen_nor_i64(t0, t0, t1);
+        break;
+    case OPC_AND_CP2:
+        tcg_gen_and_i64(t0, t0, t1);
+        break;
+    case OPC_OR_CP2:
+        tcg_gen_or_i64(t0, t0, t1);
+        break;
+
+    case OPC_PANDN:
+        tcg_gen_andc_i64(t0, t1, t0);
+        break;
+
+    case OPC_PINSRH_0:
+        tcg_gen_deposit_i64(t0, t0, t1, 0, 16);
+        break;
+    case OPC_PINSRH_1:
+        tcg_gen_deposit_i64(t0, t0, t1, 16, 16);
+        break;
+    case OPC_PINSRH_2:
+        tcg_gen_deposit_i64(t0, t0, t1, 32, 16);
+        break;
+    case OPC_PINSRH_3:
+        tcg_gen_deposit_i64(t0, t0, t1, 48, 16);
+        break;
+
+    case OPC_PEXTRH:
+        tcg_gen_andi_i64(t1, t1, 3);
+        tcg_gen_shli_i64(t1, t1, 4);
+        tcg_gen_shr_i64(t0, t0, t1);
+        tcg_gen_ext16u_i64(t0, t0);
+        break;
+
+    case OPC_ADDU_CP2:
+        tcg_gen_add_i64(t0, t0, t1);
+        tcg_gen_ext32s_i64(t0, t0);
+        break;
+    case OPC_SUBU_CP2:
+        tcg_gen_sub_i64(t0, t0, t1);
+        tcg_gen_ext32s_i64(t0, t0);
+        break;
+
+    case OPC_SLL_CP2:
+        shift_max = 32;
+        goto do_shift;
+    case OPC_SRL_CP2:
+        shift_max = 32;
+        goto do_shift;
+    case OPC_SRA_CP2:
+        shift_max = 32;
+        goto do_shift;
+    case OPC_DSLL_CP2:
+        shift_max = 64;
+        goto do_shift;
+    case OPC_DSRL_CP2:
+        shift_max = 64;
+        goto do_shift;
+    case OPC_DSRA_CP2:
+        shift_max = 64;
+        goto do_shift;
+    do_shift:
+        /* Make sure shift count isn't TCG undefined behaviour.  */
+        tcg_gen_andi_i64(t1, t1, shift_max - 1);
+
+        switch (opc) {
+        case OPC_SLL_CP2:
+        case OPC_DSLL_CP2:
+            tcg_gen_shl_i64(t0, t0, t1);
+            break;
+        case OPC_SRA_CP2:
+        case OPC_DSRA_CP2:
+            /*
+             * Since SRA is UndefinedResult without sign-extended inputs,
+             * we can treat SRA and DSRA the same.
+             */
+            tcg_gen_sar_i64(t0, t0, t1);
+            break;
+        case OPC_SRL_CP2:
+            /* We want to shift in zeros for SRL; zero-extend first.  */
+            tcg_gen_ext32u_i64(t0, t0);
+            /* FALLTHRU */
+        case OPC_DSRL_CP2:
+            tcg_gen_shr_i64(t0, t0, t1);
+            break;
+        }
+
+        if (shift_max == 32) {
+            tcg_gen_ext32s_i64(t0, t0);
+        }
+
+        /* Shifts larger than MAX produce zero.  */
+        tcg_gen_setcondi_i64(TCG_COND_LTU, t1, t1, shift_max);
+        tcg_gen_neg_i64(t1, t1);
+        tcg_gen_and_i64(t0, t0, t1);
+        break;
+
+    case OPC_ADD_CP2:
+    case OPC_DADD_CP2:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGLabel *lab = gen_new_label();
+
+            tcg_gen_mov_i64(t2, t0);
+            tcg_gen_add_i64(t0, t1, t2);
+            if (opc == OPC_ADD_CP2) {
+                tcg_gen_ext32s_i64(t0, t0);
+            }
+            tcg_gen_xor_i64(t1, t1, t2);
+            tcg_gen_xor_i64(t2, t2, t0);
+            tcg_gen_andc_i64(t1, t2, t1);
+            tcg_temp_free_i64(t2);
+            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(lab);
+            break;
+        }
+
+    case OPC_SUB_CP2:
+    case OPC_DSUB_CP2:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGLabel *lab = gen_new_label();
+
+            tcg_gen_mov_i64(t2, t0);
+            tcg_gen_sub_i64(t0, t1, t2);
+            if (opc == OPC_SUB_CP2) {
+                tcg_gen_ext32s_i64(t0, t0);
+            }
+            tcg_gen_xor_i64(t1, t1, t2);
+            tcg_gen_xor_i64(t2, t2, t0);
+            tcg_gen_and_i64(t1, t1, t2);
+            tcg_temp_free_i64(t2);
+            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(lab);
+            break;
+        }
+
+    case OPC_PMULUW:
+        tcg_gen_ext32u_i64(t0, t0);
+        tcg_gen_ext32u_i64(t1, t1);
+        tcg_gen_mul_i64(t0, t0, t1);
+        break;
+
+    case OPC_SEQU_CP2:
+    case OPC_SEQ_CP2:
+        cond = TCG_COND_EQ;
+        goto do_cc_cond;
+        break;
+    case OPC_SLTU_CP2:
+        cond = TCG_COND_LTU;
+        goto do_cc_cond;
+        break;
+    case OPC_SLT_CP2:
+        cond = TCG_COND_LT;
+        goto do_cc_cond;
+        break;
+    case OPC_SLEU_CP2:
+        cond = TCG_COND_LEU;
+        goto do_cc_cond;
+        break;
+    case OPC_SLE_CP2:
+        cond = TCG_COND_LE;
+    do_cc_cond:
+        {
+            int cc = (ctx->opcode >> 8) & 0x7;
+            TCGv_i64 t64 = tcg_temp_new_i64();
+            TCGv_i32 t32 = tcg_temp_new_i32();
+
+            tcg_gen_setcond_i64(cond, t64, t0, t1);
+            tcg_gen_extrl_i64_i32(t32, t64);
+            tcg_gen_deposit_i32(fpu_fcr31, fpu_fcr31, t32,
+                                get_fp_bit(cc), 1);
+
+            tcg_temp_free_i32(t32);
+            tcg_temp_free_i64(t64);
+        }
+        goto no_rd;
+        break;
+    default:
+        MIPS_INVAL("loongson_cp2");
+        generate_exception_end(ctx, EXCP_RI);
+        return;
+    }
+
+    gen_store_fpr64(ctx, t0, rd);
+
+no_rd:
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}