diff mbox series

[1/6] target/ppc: add byte-reverse br[dwh] instructions

Message ID 20200613042029.22321-2-ljp@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series Add several Power ISA 3.1 32/64-bit vector instructions | expand

Commit Message

Lijun Pan June 13, 2020, 4:20 a.m. UTC
POWER ISA 3.1 introduces following byte-reverse instructions:
brd: Byte-Reverse Doubleword X-form
brw: Byte-Reverse Word X-form
brh: Byte-Reverse Halfword X-form

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
---
 target/ppc/translate.c | 62 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

Comments

Richard Henderson June 18, 2020, 11:19 p.m. UTC | #1
On 6/12/20 9:20 PM, Lijun Pan wrote:
> POWER ISA 3.1 introduces following byte-reverse instructions:
> brd: Byte-Reverse Doubleword X-form
> brw: Byte-Reverse Word X-form
> brh: Byte-Reverse Halfword X-form
> 
> Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
> ---
>  target/ppc/translate.c | 62 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
> 
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 4ce3d664b5..2d48fbc8db 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -6971,7 +6971,69 @@ static void gen_dform3D(DisasContext *ctx)
>      return gen_invalid(ctx);
>  }
>  
> +/* brd */
> +static void gen_brd(DisasContext *ctx)
> +{
> +	TCGv_i64 temp = tcg_temp_new_i64();
> +
> +	tcg_gen_bswap64_i64(temp, cpu_gpr[rS(ctx->opcode)]);
> +	tcg_gen_st_i64(temp, cpu_env, offsetof(CPUPPCState, gpr[rA(ctx->opcode)]));


The store is wrong.  You cannot modify storage behind a tcg global variable
like that.  This should just be

    tcg_gen_bswap64_i64(cpu_gpr[rA(ctx->opcode)],
                        cpu_gpr[rS(ctx->opcode)]);

Is this code is within an ifdef for TARGET_PPC64?
If not, then this will break the 32-bit qemu-system-ppc build.
Are you sure you have built and tested all configurations?


> +/* brw */
> +static void gen_brw(DisasContext *ctx)
> +{
> +	TCGv_i64 temp = tcg_temp_new_i64();
> +	TCGv_i64 lsb = tcg_temp_new_i64();
> +	TCGv_i64 msb = tcg_temp_new_i64();
> +
> +	tcg_gen_movi_i64(lsb, 0x00000000ffffffffull);
> +	tcg_gen_and_i64(temp, lsb, cpu_gpr[rS(ctx->opcode)]);
> +	tcg_gen_bswap32_i64(lsb, temp);
> +	
> +	tcg_gen_shri_i64(msb, cpu_gpr[rS(ctx->opcode)], 32);
> +	tcg_gen_bswap32_i64(temp, msb);
> +	tcg_gen_shli_i64(msb, temp, 32);
> +	
> +	tcg_gen_or_i64(temp, lsb, msb);
> +
> +	tcg_gen_st_i64(temp, cpu_env, offsetof(CPUPPCState, gpr[rA(ctx->opcode)]));

Again, the store is wrong.

In addition, this can be computed as

    tcg_gen_bswap64_i64(dest, source);
    tcg_gen_rotli_i64(dest, dest, 32);

> +static void gen_brh(DisasContext *ctx)
> +{
> +	TCGv_i64 temp = tcg_temp_new_i64();
> +	TCGv_i64 t0 = tcg_temp_new_i64();
> +	TCGv_i64 t1 = tcg_temp_new_i64();
> +	TCGv_i64 t2 = tcg_temp_new_i64();
> +	TCGv_i64 t3 = tcg_temp_new_i64();
> +
> +	tcg_gen_movi_i64(t0, 0x00ff00ff00ff00ffull);
> +	tcg_gen_shri_i64(t1, cpu_gpr[rS(ctx->opcode)], 8);
> +	tcg_gen_and_i64(t2, t1, t0);
> +	tcg_gen_and_i64(t1, cpu_gpr[rS(ctx->opcode)], t0);
> +	tcg_gen_shli_i64(t1, t1, 8);
> +	tcg_gen_or_i64(temp, t1, t2);
> +	tcg_gen_st_i64(temp, cpu_env, offsetof(CPUPPCState, gpr[rA(ctx->opcode)]));

Again, the store is wrong.


r~
Lijun Pan June 19, 2020, 5:24 a.m. UTC | #2
> On Jun 18, 2020, at 6:19 PM, Richard Henderson <richard.henderson@linaro.org> wrote:
> 
> On 6/12/20 9:20 PM, Lijun Pan wrote:
>> POWER ISA 3.1 introduces following byte-reverse instructions:
>> brd: Byte-Reverse Doubleword X-form
>> brw: Byte-Reverse Word X-form
>> brh: Byte-Reverse Halfword X-form
>> 
>> Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
>> ---
>> target/ppc/translate.c | 62 ++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 62 insertions(+)
>> 
>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>> index 4ce3d664b5..2d48fbc8db 100644
>> --- a/target/ppc/translate.c
>> +++ b/target/ppc/translate.c
>> @@ -6971,7 +6971,69 @@ static void gen_dform3D(DisasContext *ctx)
>>     return gen_invalid(ctx);
>> }
>> 
>> +/* brd */
>> +static void gen_brd(DisasContext *ctx)
>> +{
>> +	TCGv_i64 temp = tcg_temp_new_i64();
>> +
>> +	tcg_gen_bswap64_i64(temp, cpu_gpr[rS(ctx->opcode)]);
>> +	tcg_gen_st_i64(temp, cpu_env, offsetof(CPUPPCState, gpr[rA(ctx->opcode)]));
> 
> 
> The store is wrong.  You cannot modify storage behind a tcg global variable
> like that.  This should just be
> 
>    tcg_gen_bswap64_i64(cpu_gpr[rA(ctx->opcode)],
>                        cpu_gpr[rS(ctx->opcode)]);

Why can’t I retrieve the offset via “offsetof(CPUPPCState, gpr[rA(ctx->opcode)])”?
I would like to learn more.

> Is this code is within an ifdef for TARGET_PPC64?

I will change it to 
+#if defined(TARGET_PPC64)
+GEN_HANDLER_E(brd, 0x1F, 0x1B, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA300),
+#endif

> If not, then this will break the 32-bit qemu-system-ppc build.
> Are you sure you have built and tested all configurations?
> 
> 
>> +/* brw */
>> +static void gen_brw(DisasContext *ctx)
>> +{
>> +	TCGv_i64 temp = tcg_temp_new_i64();
>> +	TCGv_i64 lsb = tcg_temp_new_i64();
>> +	TCGv_i64 msb = tcg_temp_new_i64();
>> +
>> +	tcg_gen_movi_i64(lsb, 0x00000000ffffffffull);
>> +	tcg_gen_and_i64(temp, lsb, cpu_gpr[rS(ctx->opcode)]);
>> +	tcg_gen_bswap32_i64(lsb, temp);
>> +	
>> +	tcg_gen_shri_i64(msb, cpu_gpr[rS(ctx->opcode)], 32);
>> +	tcg_gen_bswap32_i64(temp, msb);
>> +	tcg_gen_shli_i64(msb, temp, 32);
>> +	
>> +	tcg_gen_or_i64(temp, lsb, msb);
>> +
>> +	tcg_gen_st_i64(temp, cpu_env, offsetof(CPUPPCState, gpr[rA(ctx->opcode)]));
> 
> Again, the store is wrong.
> 
> In addition, this can be computed as
> 
>    tcg_gen_bswap64_i64(dest, source);
>    tcg_gen_rotli_i64(dest, dest, 32);
> 
>> +static void gen_brh(DisasContext *ctx)
>> +{
>> +	TCGv_i64 temp = tcg_temp_new_i64();
>> +	TCGv_i64 t0 = tcg_temp_new_i64();
>> +	TCGv_i64 t1 = tcg_temp_new_i64();
>> +	TCGv_i64 t2 = tcg_temp_new_i64();
>> +	TCGv_i64 t3 = tcg_temp_new_i64();
>> +
>> +	tcg_gen_movi_i64(t0, 0x00ff00ff00ff00ffull);
>> +	tcg_gen_shri_i64(t1, cpu_gpr[rS(ctx->opcode)], 8);
>> +	tcg_gen_and_i64(t2, t1, t0);
>> +	tcg_gen_and_i64(t1, cpu_gpr[rS(ctx->opcode)], t0);
>> +	tcg_gen_shli_i64(t1, t1, 8);
>> +	tcg_gen_or_i64(temp, t1, t2);
>> +	tcg_gen_st_i64(temp, cpu_env, offsetof(CPUPPCState, gpr[rA(ctx->opcode)]));
> 
> Again, the store is wrong.
> 
> 
> r~
>
Richard Henderson June 19, 2020, 9:08 p.m. UTC | #3
On 6/18/20 10:24 PM, Lijun Pan wrote:
> Why can’t I retrieve the offset via “offsetof(CPUPPCState,gpr[rA(ctx->opcode)])”?
> I would like to learn more.

The TCG compiler makes some simplifying assumptions in order to make it faster.
 One of them is that global temporaries cannot be modified via direct loads and
stores, so we do not have to check for that overlap during compilation.

I thought that was documented in tcg/README, but I can't find it.


r~
diff mbox series

Patch

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 4ce3d664b5..2d48fbc8db 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6971,7 +6971,69 @@  static void gen_dform3D(DisasContext *ctx)
     return gen_invalid(ctx);
 }
 
+/* brd */
+static void gen_brd(DisasContext *ctx)
+{
+	TCGv_i64 temp = tcg_temp_new_i64();
+
+	tcg_gen_bswap64_i64(temp, cpu_gpr[rS(ctx->opcode)]);
+	tcg_gen_st_i64(temp, cpu_env, offsetof(CPUPPCState, gpr[rA(ctx->opcode)]));
+
+	tcg_temp_free_i64(temp);
+}
+
+/* brw */
+static void gen_brw(DisasContext *ctx)
+{
+	TCGv_i64 temp = tcg_temp_new_i64();
+	TCGv_i64 lsb = tcg_temp_new_i64();
+	TCGv_i64 msb = tcg_temp_new_i64();
+
+	tcg_gen_movi_i64(lsb, 0x00000000ffffffffull);
+	tcg_gen_and_i64(temp, lsb, cpu_gpr[rS(ctx->opcode)]);
+	tcg_gen_bswap32_i64(lsb, temp);
+	
+	tcg_gen_shri_i64(msb, cpu_gpr[rS(ctx->opcode)], 32);
+	tcg_gen_bswap32_i64(temp, msb);
+	tcg_gen_shli_i64(msb, temp, 32);
+	
+	tcg_gen_or_i64(temp, lsb, msb);
+
+	tcg_gen_st_i64(temp, cpu_env, offsetof(CPUPPCState, gpr[rA(ctx->opcode)]));
+
+	tcg_temp_free_i64(temp);
+	tcg_temp_free_i64(lsb);
+	tcg_temp_free_i64(msb);
+}
+
+/* brh */
+static void gen_brh(DisasContext *ctx)
+{
+	TCGv_i64 temp = tcg_temp_new_i64();
+	TCGv_i64 t0 = tcg_temp_new_i64();
+	TCGv_i64 t1 = tcg_temp_new_i64();
+	TCGv_i64 t2 = tcg_temp_new_i64();
+	TCGv_i64 t3 = tcg_temp_new_i64();
+
+	tcg_gen_movi_i64(t0, 0x00ff00ff00ff00ffull);
+	tcg_gen_shri_i64(t1, cpu_gpr[rS(ctx->opcode)], 8);
+	tcg_gen_and_i64(t2, t1, t0);
+	tcg_gen_and_i64(t1, cpu_gpr[rS(ctx->opcode)], t0);
+	tcg_gen_shli_i64(t1, t1, 8);
+	tcg_gen_or_i64(temp, t1, t2);
+	tcg_gen_st_i64(temp, cpu_env, offsetof(CPUPPCState, gpr[rA(ctx->opcode)]));
+
+	tcg_temp_free_i64(temp);
+	tcg_temp_free_i64(t0);
+	tcg_temp_free_i64(t1);
+	tcg_temp_free_i64(t2);
+	tcg_temp_free_i64(t3);
+}
+
 static opcode_t opcodes[] = {
+GEN_HANDLER_E(brd, 0x1F, 0x1B, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
 GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),