diff mbox series

[15/33] target/ppc: Implement Vector Insert from GPR using GPR index insns

Message ID 20211021194547.672988-16-matheus.ferst@eldorado.org.br (mailing list archive)
State New, archived
Headers show
Series PowerISA v3.1 instruction batch | expand

Commit Message

Matheus K. Ferst Oct. 21, 2021, 7:45 p.m. UTC
From: Matheus Ferst <matheus.ferst@eldorado.org.br>

Implements the following PowerISA v3.1 instructions:
vinsblx: Vector Insert Byte from GPR using GPR-specified Left-Index
vinshlx: Vector Insert Halfword from GPR using GPR-specified Left-Index
vinswlx: Vector Insert Word from GPR using GPR-specified Left-Index
vinsdlx: Vector Insert Doubleword from GPR using GPR-specified
         Left-Index
vinsbrx: Vector Insert Byte from GPR using GPR-specified Right-Index
vinshrx: Vector Insert Halfword from GPR using GPR-specified
         Right-Index
vinswrx: Vector Insert Word from GPR using GPR-specified Right-Index
vinsdrx: Vector Insert Doubleword from GPR using GPR-specified
         Right-Index

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
 target/ppc/helper.h                 |  4 +++
 target/ppc/insn32.decode            |  9 +++++
 target/ppc/int_helper.c             | 30 ++++++++++++++++
 target/ppc/translate/vmx-impl.c.inc | 55 +++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+)

Comments

Richard Henderson Oct. 23, 2021, 4:37 a.m. UTC | #1
On 10/21/21 12:45 PM, matheus.ferst@eldorado.org.br wrote:
> +#if defined(HOST_WORDS_BIGENDIAN)
> +#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->VsrB(IDX))
> +#else
> +#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->VsrB(IDX) - (SIZE) + 1)
> +#endif

This is a bit confusing.  There's host adjustment in VsrB *and* here.

> +#define VINSX(SUFFIX, TYPE) \
> +void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t,       \
> +                                         uint64_t val, target_ulong index)     \
> +{                                                                              \
> +    const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE);                       \
> +    target_long idx = index;                                                   \
> +                                                                               \
> +    if (idx < 0 || idx > maxidx) {                                             \
> +        char c = idx < 0 ? 'R' : 'L';                                          \
> +        idx =  idx < 0 ? sizeof(TYPE) - idx : idx;                             \
> +        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINS" #SUFFIX "%cX"  \
> +                      " at 0x" TARGET_FMT_lx ", RA = " TARGET_FMT_ld " > %d\n",\
> +                      c, env->nip, idx, maxidx);                               \

nip is not up to date.

> +    } else {                                                                   \
> +        *(TYPE *)ELEM_ADDR(t, idx, sizeof(TYPE)) = (TYPE)val;                  \

This is a potentially misaligned store.  You need st*_he_p. 
 
 



r~
Richard Henderson Oct. 23, 2021, 4:40 a.m. UTC | #2
On 10/21/21 12:45 PM, matheus.ferst@eldorado.org.br wrote:
> +#if defined(TARGET_PPC64)
> +    return do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], cpu_gpr[a->vrb],
> +                    gen_helper);
> +#else
> +    bool ok;
> +    TCGv_i64 val;
> +
> +    val = tcg_temp_new_i64();
> +    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
> +
> +    ok = do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], val, gen_helper);
> +
> +    tcg_temp_free_i64(val);
> +    return ok;
> +#endif

Oh, and what's all this?

Either this isn't defined for !PPC64 at all, or you should just use target_ulong and not 
do any ifdeffing at all.

r~
BALATON Zoltan Oct. 23, 2021, 10:12 a.m. UTC | #3
On Fri, 22 Oct 2021, Richard Henderson wrote:
> On 10/21/21 12:45 PM, matheus.ferst@eldorado.org.br wrote:
>> +#if defined(TARGET_PPC64)
>> +    return do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], 
>> cpu_gpr[a->vrb],
>> +                    gen_helper);
>> +#else
>> +    bool ok;
>> +    TCGv_i64 val;
>> +
>> +    val = tcg_temp_new_i64();
>> +    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
>> +
>> +    ok = do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], val, 
>> gen_helper);
>> +
>> +    tcg_temp_free_i64(val);
>> +    return ok;
>> +#endif
>
> Oh, and what's all this?
>
> Either this isn't defined for !PPC64 at all, or you should just use 
> target_ulong and not do any ifdeffing at all.

You mentioning target_ulong reminded me a question I had. Currently we 
have qemu-system-ppc and qemu-system-ppc64 but the latter includes all 
machines of the former too so you could run for example sam460ex with 
qemu-system-ppc64 (except mac99 which behaves differently based on which 
executable it's part of but you could use mac99 -cpu G4 with 
qemu-system-ppc64 as well). But isn't target_ulong different in these 
executables and could that cause a problem with this? I've always used 
qemu-system-ppc for 32 bit machines but we could have one just executable 
for all machines if there's no need for both.

Regards,
BALATON Zoltan
Richard Henderson Oct. 23, 2021, 6:36 p.m. UTC | #4
On 10/23/21 3:12 AM, BALATON Zoltan wrote:
> You mentioning target_ulong reminded me a question I had. Currently we have 
> qemu-system-ppc and qemu-system-ppc64 but the latter includes all machines of the former 
> too so you could run for example sam460ex with qemu-system-ppc64 (except mac99 which 
> behaves differently based on which executable it's part of but you could use mac99 -cpu G4 
> with qemu-system-ppc64 as well). But isn't target_ulong different in these executables and 
> could that cause a problem with this? I've always used qemu-system-ppc for 32 bit machines 
> but we could have one just executable for all machines if there's no need for both.

Yes, we can, and probably should, have one executable for all PPC system emulation.  RISCV 
is actively working toward that, and I think it would be fairly easy for ARM and x86 to 
follow.

It's something relatively easy to do that reduces the size of the test matrix.


r~
BALATON Zoltan Oct. 23, 2021, 8:02 p.m. UTC | #5
On Sat, 23 Oct 2021, Richard Henderson wrote:
> On 10/23/21 3:12 AM, BALATON Zoltan wrote:
>> You mentioning target_ulong reminded me a question I had. Currently we have 
>> qemu-system-ppc and qemu-system-ppc64 but the latter includes all machines 
>> of the former too so you could run for example sam460ex with 
>> qemu-system-ppc64 (except mac99 which behaves differently based on which 
>> executable it's part of but you could use mac99 -cpu G4 with 
>> qemu-system-ppc64 as well). But isn't target_ulong different in these 
>> executables and could that cause a problem with this? I've always used 
>> qemu-system-ppc for 32 bit machines but we could have one just executable 
>> for all machines if there's no need for both.
>
> Yes, we can, and probably should, have one executable for all PPC system 
> emulation.  RISCV is actively working toward that, and I think it would be 
> fairly easy for ARM and x86 to follow.
>
> It's something relatively easy to do that reduces the size of the test 
> matrix.

So may question was not if it's possible but if having target_ulong 
different from what we had in qemu-system-ppc could cause any problems? I 
have no experience running 32-bit guests with qemu-system-ppc64 but 
previously when this came up one difference pointed out was that 
target_ulong would change if I remember the discussion correctly, but 
nobody now if that could be a problem.

Regards,
BALATON Zoltan
Richard Henderson Oct. 23, 2021, 8:09 p.m. UTC | #6
On 10/23/21 1:02 PM, BALATON Zoltan wrote:
> So may question was not if it's possible but if having target_ulong different from what we 
> had in qemu-system-ppc could cause any problems? I have no experience running 32-bit 
> guests with qemu-system-ppc64 but previously when this came up one difference pointed out 
> was that target_ulong would change if I remember the discussion correctly, but nobody now 
> if that could be a problem.

It shouldn't be a problem.  We take care of NARROW_MODE, so that you can boot a ppc64 
guest kernel, and then run ppc32 user binaries under that.

If you do find a bug under those conditions, report it.

r~
Matheus K. Ferst Oct. 26, 2021, 2:33 p.m. UTC | #7
On 23/10/2021 01:40, Richard Henderson wrote:
> [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você 
> possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de 
> e-mail suspeito entre imediatamente em contato com o DTI.
> 
> On 10/21/21 12:45 PM, matheus.ferst@eldorado.org.br wrote:
>> +#if defined(TARGET_PPC64)
>> +    return do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], 
>> cpu_gpr[a->vrb],
>> +                    gen_helper);
>> +#else
>> +    bool ok;
>> +    TCGv_i64 val;
>> +
>> +    val = tcg_temp_new_i64();
>> +    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
>> +
>> +    ok = do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], val, 
>> gen_helper);
>> +
>> +    tcg_temp_free_i64(val);
>> +    return ok;
>> +#endif
> 
> Oh, and what's all this?
> 
> Either this isn't defined for !PPC64 at all, or you should just use 
> target_ulong and not
> do any ifdeffing at all.
> 
> r~

The helper receives i64 because it's also used by Vector Insert From VSR 
in patch 17. We can drop the ifdef and always tcg_gen_extu_tl_i64 though.
diff mbox series

Patch

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 86715c491e..45c74b540f 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -230,6 +230,10 @@  DEF_HELPER_3(vinsertb, void, avr, avr, i32)
 DEF_HELPER_3(vinserth, void, avr, avr, i32)
 DEF_HELPER_3(vinsertw, void, avr, avr, i32)
 DEF_HELPER_3(vinsertd, void, avr, avr, i32)
+DEF_HELPER_4(VINSBLX, void, env, avr, i64, tl)
+DEF_HELPER_4(VINSHLX, void, env, avr, i64, tl)
+DEF_HELPER_4(VINSWLX, void, env, avr, i64, tl)
+DEF_HELPER_4(VINSDLX, void, env, avr, i64, tl)
 DEF_HELPER_2(vextsb2w, void, avr, avr)
 DEF_HELPER_2(vextsh2w, void, avr, avr)
 DEF_HELPER_2(vextsb2d, void, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 257b11113d..b794424496 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -344,5 +344,14 @@  VPEXTD          000100 ..... ..... ..... 10110001101    @VX
 
 ## Vector Permute and Formatting Instruction
 
+VINSBLX         000100 ..... ..... ..... 01000001111    @VX
+VINSBRX         000100 ..... ..... ..... 01100001111    @VX
+VINSHLX         000100 ..... ..... ..... 01001001111    @VX
+VINSHRX         000100 ..... ..... ..... 01101001111    @VX
+VINSWLX         000100 ..... ..... ..... 01010001111    @VX
+VINSWRX         000100 ..... ..... ..... 01110001111    @VX
+VINSDLX         000100 ..... ..... ..... 01011001111    @VX
+VINSDRX         000100 ..... ..... ..... 01111001111    @VX
+
 VSLDBI          000100 ..... ..... ..... 00 ... 010110  @VN
 VSRDBI          000100 ..... ..... ..... 01 ... 010110  @VN
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index d90a397bca..63263dd912 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1666,6 +1666,36 @@  VINSERT(h, u16)
 VINSERT(w, u32)
 VINSERT(d, u64)
 #undef VINSERT
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->VsrB(IDX))
+#else
+#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->VsrB(IDX) - (SIZE) + 1)
+#endif
+
+#define VINSX(SUFFIX, TYPE) \
+void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t,       \
+                                         uint64_t val, target_ulong index)     \
+{                                                                              \
+    const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE);                       \
+    target_long idx = index;                                                   \
+                                                                               \
+    if (idx < 0 || idx > maxidx) {                                             \
+        char c = idx < 0 ? 'R' : 'L';                                          \
+        idx =  idx < 0 ? sizeof(TYPE) - idx : idx;                             \
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINS" #SUFFIX "%cX"  \
+                      " at 0x" TARGET_FMT_lx ", RA = " TARGET_FMT_ld " > %d\n",\
+                      c, env->nip, idx, maxidx);                               \
+    } else {                                                                   \
+        *(TYPE *)ELEM_ADDR(t, idx, sizeof(TYPE)) = (TYPE)val;                  \
+    }                                                                          \
+}
+VINSX(B, uint8_t)
+VINSX(H, uint16_t)
+VINSX(W, uint32_t)
+VINSX(D, uint64_t)
+#undef ELEM_ADDR
+#undef VINSX
 #if defined(HOST_WORDS_BIGENDIAN)
 #define VEXTRACT(suffix, element)                                            \
     void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index e19793f295..0c5f0dcf32 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1238,6 +1238,61 @@  GEN_VXFORM_DUAL(vspltish, PPC_ALTIVEC, PPC_NONE,
 GEN_VXFORM_DUAL(vspltisw, PPC_ALTIVEC, PPC_NONE,
                 vinsertw, PPC_NONE, PPC2_ISA300);
 
+static bool do_vinsx(DisasContext *ctx, int vrt, int size, bool right, TCGv ra,
+            TCGv_i64 rb, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
+{
+    TCGv_ptr t;
+    TCGv idx;
+
+    t = gen_avr_ptr(vrt);
+    idx = tcg_temp_new();
+
+    tcg_gen_andi_tl(idx, ra, 0xF);
+    if (right) {
+        tcg_gen_subfi_tl(idx, 16 - size, idx);
+    }
+
+    gen_helper(cpu_env, t, rb, idx);
+
+    tcg_temp_free_ptr(t);
+    tcg_temp_free(idx);
+
+    return true;
+}
+
+static bool do_vinsx_VX(DisasContext *ctx, arg_VX *a, int size, bool right,
+                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+#if defined(TARGET_PPC64)
+    return do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], cpu_gpr[a->vrb],
+                    gen_helper);
+#else
+    bool ok;
+    TCGv_i64 val;
+
+    val = tcg_temp_new_i64();
+    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
+
+    ok = do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], val, gen_helper);
+
+    tcg_temp_free_i64(val);
+    return ok;
+#endif
+}
+
+TRANS(VINSBLX, do_vinsx_VX, 1, false, gen_helper_VINSBLX)
+TRANS(VINSHLX, do_vinsx_VX, 2, false, gen_helper_VINSHLX)
+TRANS(VINSWLX, do_vinsx_VX, 4, false, gen_helper_VINSWLX)
+TRANS(VINSDLX, do_vinsx_VX, 8, false, gen_helper_VINSDLX)
+
+TRANS(VINSBRX, do_vinsx_VX, 1, true, gen_helper_VINSBLX)
+TRANS(VINSHRX, do_vinsx_VX, 2, true, gen_helper_VINSHLX)
+TRANS(VINSWRX, do_vinsx_VX, 4, true, gen_helper_VINSWLX)
+TRANS(VINSDRX, do_vinsx_VX, 8, true, gen_helper_VINSDLX)
+
 static void gen_vsldoi(DisasContext *ctx)
 {
     TCGv_ptr ra, rb, rd;