Message ID | 1473662506-27441-16-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Sep 12, 2016 at 12:11:44PM +0530, Nikunj A Dadhania wrote: > lxvb16x: Load VSX Vector Byte*16 > lxvh8x: Load VSX Vector Halfword*8 > > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > --- > target-ppc/helper.h | 1 + > target-ppc/mem_helper.c | 6 ++++ > target-ppc/translate/vsx-impl.inc.c | 57 +++++++++++++++++++++++++++++++++++++ > target-ppc/translate/vsx-ops.inc.c | 2 ++ > 4 files changed, 66 insertions(+) > > diff --git a/target-ppc/helper.h b/target-ppc/helper.h > index 1bbeac4..6de0db7 100644 > --- a/target-ppc/helper.h > +++ b/target-ppc/helper.h > @@ -298,6 +298,7 @@ DEF_HELPER_3(lvebx, void, env, avr, tl) > DEF_HELPER_3(lvehx, void, env, avr, tl) > DEF_HELPER_3(lvewx, void, env, avr, tl) > DEF_HELPER_1(bswap32x2, i64, i64) > +DEF_HELPER_1(bswap16x4, i64, i64) > DEF_HELPER_3(stvebx, void, env, avr, tl) > DEF_HELPER_3(stvehx, void, env, avr, tl) > DEF_HELPER_3(stvewx, void, env, avr, tl) > diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c > index a56051a..608803f 100644 > --- a/target-ppc/mem_helper.c > +++ b/target-ppc/mem_helper.c > @@ -290,6 +290,12 @@ uint64_t helper_bswap32x2(uint64_t x) > return deposit64((x >> 32), 32, 32, (x)); > } > > +uint64_t helper_bswap16x4(uint64_t x) > +{ > + uint64_t m = 0x00ff00ff00ff00ffull; > + return ((x & m) << 8) | ((x >> 8) & m); > +} This doesn't seem to match the bswap32x2 function above. bswap32x2 just swaps the two 32-bit words in the 64-bit word. This one swaps the bytes in each individual 16 bit work in the 64-bit word. I suspect the bswap32x2 is wrong, which would explain why the previous patch didn't seem to make sense. > + > #undef HI_IDX > #undef LO_IDX > > diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c > index e3374df..caa6660 100644 > --- a/target-ppc/translate/vsx-impl.inc.c > +++ b/target-ppc/translate/vsx-impl.inc.c > @@ -108,6 +108,63 @@ static void gen_lxvw4x(DisasContext *ctx) > tcg_temp_free(EA); > } > > +static void gen_lxvb16x(DisasContext *ctx) > +{ > + TCGv EA; > + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode)); > + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode)); > + > + if (unlikely(!ctx->vsx_enabled)) { > + gen_exception(ctx, POWERPC_EXCP_VSXU); > + return; > + } > + gen_set_access_type(ctx, ACCESS_INT); > + EA = tcg_temp_new(); > + gen_addr_reg_index(ctx, EA); > + if (ctx->le_mode) { > + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); > + tcg_gen_addi_tl(EA, EA, 8); > + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); > + } else { > + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ); > + gen_helper_bswap32x2(xth, xth); I really don't understand how a bswap32x2 helps here, either as it's defined now, or as I suspect it should be defined by analogy with bswap16x4. > + tcg_gen_addi_tl(EA, EA, 8); > + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ); > + gen_helper_bswap32x2(xtl, xtl); Also.. if I'm understanding the ISA correctly, this instruction loads subsequent higher-address bytes into subsequent (i.e. less signficant, since IBM uses BE bit/byte numbering) bytes in the vector. Doesn't that mean you want a BE load in all cases, not just the LE guest case? > + } > + tcg_temp_free(EA); > +} > + > +static void gen_lxvh8x(DisasContext *ctx) > +{ > + TCGv EA; > + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode)); > + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode)); > + > + if (unlikely(!ctx->vsx_enabled)) { > + gen_exception(ctx, POWERPC_EXCP_VSXU); > + return; > + } > + gen_set_access_type(ctx, ACCESS_INT); > + EA = tcg_temp_new(); > + gen_addr_reg_index(ctx, EA); > + > + if (ctx->le_mode) { > + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); > + gen_helper_bswap16x4(xth, xth); > + tcg_gen_addi_tl(EA, EA, 8); > + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); > + gen_helper_bswap16x4(xtl, xtl); > + } else { > + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ); > + gen_helper_bswap32x2(xth, xth); > + tcg_gen_addi_tl(EA, EA, 8); > + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ); > + gen_helper_bswap32x2(xtl, xtl); Again, I think you want a BE load in both cases, and the bswap32x2 makes no sense to me. > + } > + tcg_temp_free(EA); > +} > + > #define VSX_STORE_SCALAR(name, operation) \ > static void gen_##name(DisasContext *ctx) \ > { \ > diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c > index 414b73b..598b349 100644 > --- a/target-ppc/translate/vsx-ops.inc.c > +++ b/target-ppc/translate/vsx-ops.inc.c > @@ -7,6 +7,8 @@ GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207), > GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX), > GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX), > GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX), > +GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE, PPC2_ISA300), > +GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300), > > GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX), > GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),
David Gibson <david@gibson.dropbear.id.au> writes: > [ Unknown signature status ] > On Mon, Sep 12, 2016 at 12:11:44PM +0530, Nikunj A Dadhania wrote: >> lxvb16x: Load VSX Vector Byte*16 >> lxvh8x: Load VSX Vector Halfword*8 >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> >> --- >> target-ppc/helper.h | 1 + >> target-ppc/mem_helper.c | 6 ++++ >> target-ppc/translate/vsx-impl.inc.c | 57 +++++++++++++++++++++++++++++++++++++ >> target-ppc/translate/vsx-ops.inc.c | 2 ++ >> 4 files changed, 66 insertions(+) >> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h >> index 1bbeac4..6de0db7 100644 >> --- a/target-ppc/helper.h >> +++ b/target-ppc/helper.h >> @@ -298,6 +298,7 @@ DEF_HELPER_3(lvebx, void, env, avr, tl) >> DEF_HELPER_3(lvehx, void, env, avr, tl) >> DEF_HELPER_3(lvewx, void, env, avr, tl) >> DEF_HELPER_1(bswap32x2, i64, i64) >> +DEF_HELPER_1(bswap16x4, i64, i64) >> DEF_HELPER_3(stvebx, void, env, avr, tl) >> DEF_HELPER_3(stvehx, void, env, avr, tl) >> DEF_HELPER_3(stvewx, void, env, avr, tl) >> diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c >> index a56051a..608803f 100644 >> --- a/target-ppc/mem_helper.c >> +++ b/target-ppc/mem_helper.c >> @@ -290,6 +290,12 @@ uint64_t helper_bswap32x2(uint64_t x) >> return deposit64((x >> 32), 32, 32, (x)); >> } >> >> +uint64_t helper_bswap16x4(uint64_t x) >> +{ >> + uint64_t m = 0x00ff00ff00ff00ffull; >> + return ((x & m) << 8) | ((x >> 8) & m); >> +} > > This doesn't seem to match the bswap32x2 function above. bswap32x2 > just swaps the two 32-bit words in the 64-bit word. This one swaps > the bytes in each individual 16 bit work in the 64-bit word. > > I suspect the bswap32x2 is wrong, which would explain why the previous > patch didn't seem to make sense. The confusion is because of the name(bswap32x2), I will rename it as deposit32x2. And let bswap16x4 as is, as that is a required operation in certain cases to get the right order expected by the instruction. > >> + >> #undef HI_IDX >> #undef LO_IDX >> >> diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c >> index e3374df..caa6660 100644 >> --- a/target-ppc/translate/vsx-impl.inc.c >> +++ b/target-ppc/translate/vsx-impl.inc.c >> @@ -108,6 +108,63 @@ static void gen_lxvw4x(DisasContext *ctx) >> tcg_temp_free(EA); >> } >> >> +static void gen_lxvb16x(DisasContext *ctx) >> +{ >> + TCGv EA; >> + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode)); >> + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode)); >> + >> + if (unlikely(!ctx->vsx_enabled)) { >> + gen_exception(ctx, POWERPC_EXCP_VSXU); >> + return; >> + } >> + gen_set_access_type(ctx, ACCESS_INT); >> + EA = tcg_temp_new(); >> + gen_addr_reg_index(ctx, EA); >> + if (ctx->le_mode) { >> + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); >> + tcg_gen_addi_tl(EA, EA, 8); >> + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); >> + } else { >> + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ); >> + gen_helper_bswap32x2(xth, xth); > > I really don't understand how a bswap32x2 helps here, either as it's > defined now, or as I suspect it should be defined by analogy with > bswap16x4. > >> + tcg_gen_addi_tl(EA, EA, 8); >> + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ); >> + gen_helper_bswap32x2(xtl, xtl); > > Also.. if I'm understanding the ISA correctly, this instruction loads > subsequent higher-address bytes into subsequent (i.e. less signficant, > since IBM uses BE bit/byte numbering) bytes in the vector. Doesn't > that mean you want a BE load in all cases, not just the LE guest case? > >> + } >> + tcg_temp_free(EA); >> +} >> + >> +static void gen_lxvh8x(DisasContext *ctx) >> +{ >> + TCGv EA; >> + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode)); >> + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode)); >> + >> + if (unlikely(!ctx->vsx_enabled)) { >> + gen_exception(ctx, POWERPC_EXCP_VSXU); >> + return; >> + } >> + gen_set_access_type(ctx, ACCESS_INT); >> + EA = tcg_temp_new(); >> + gen_addr_reg_index(ctx, EA); >> + >> + if (ctx->le_mode) { >> + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); >> + gen_helper_bswap16x4(xth, xth); >> + tcg_gen_addi_tl(EA, EA, 8); >> + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); >> + gen_helper_bswap16x4(xtl, xtl); >> + } else { >> + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ); >> + gen_helper_bswap32x2(xth, xth); >> + tcg_gen_addi_tl(EA, EA, 8); >> + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ); >> + gen_helper_bswap32x2(xtl, xtl); > > Again, I think you want a BE load in both cases, and the bswap32x2 > makes no sense to me. BTW, MO_BEQ(big-endian load) and MO_LEQ(little-endian) also does magic during loads. Now when I have 8bytes loaded, i do the manipulation within to get the right order. For an input of: uint16_t rb16[8] = {0x0001, 0x1011, 0x2021, 0x3031, 0x4041, 0x5051, 0x6061, 0x7071}; For LE I should get this result: 7071 6061 5051 4041 3031 2021 1011 0001 For BE it should be: 0001 1011 2021 3031 4041 5051 6061 7071 Thats what the tcg operations achieves. I guess renaming bswap32x2 will clear the confusion. Regards Nikunj
diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 1bbeac4..6de0db7 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -298,6 +298,7 @@ DEF_HELPER_3(lvebx, void, env, avr, tl) DEF_HELPER_3(lvehx, void, env, avr, tl) DEF_HELPER_3(lvewx, void, env, avr, tl) DEF_HELPER_1(bswap32x2, i64, i64) +DEF_HELPER_1(bswap16x4, i64, i64) DEF_HELPER_3(stvebx, void, env, avr, tl) DEF_HELPER_3(stvehx, void, env, avr, tl) DEF_HELPER_3(stvewx, void, env, avr, tl) diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c index a56051a..608803f 100644 --- a/target-ppc/mem_helper.c +++ b/target-ppc/mem_helper.c @@ -290,6 +290,12 @@ uint64_t helper_bswap32x2(uint64_t x) return deposit64((x >> 32), 32, 32, (x)); } +uint64_t helper_bswap16x4(uint64_t x) +{ + uint64_t m = 0x00ff00ff00ff00ffull; + return ((x & m) << 8) | ((x >> 8) & m); +} + #undef HI_IDX #undef LO_IDX diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c index e3374df..caa6660 100644 --- a/target-ppc/translate/vsx-impl.inc.c +++ b/target-ppc/translate/vsx-impl.inc.c @@ -108,6 +108,63 @@ static void gen_lxvw4x(DisasContext *ctx) tcg_temp_free(EA); } +static void gen_lxvb16x(DisasContext *ctx) +{ + TCGv EA; + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode)); + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode)); + + if (unlikely(!ctx->vsx_enabled)) { + gen_exception(ctx, POWERPC_EXCP_VSXU); + return; + } + gen_set_access_type(ctx, ACCESS_INT); + EA = tcg_temp_new(); + gen_addr_reg_index(ctx, EA); + if (ctx->le_mode) { + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); + tcg_gen_addi_tl(EA, EA, 8); + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); + } else { + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ); + gen_helper_bswap32x2(xth, xth); + tcg_gen_addi_tl(EA, EA, 8); + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ); + gen_helper_bswap32x2(xtl, xtl); + } + tcg_temp_free(EA); +} + +static void gen_lxvh8x(DisasContext *ctx) +{ + TCGv EA; + TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode)); + TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode)); + + if (unlikely(!ctx->vsx_enabled)) { + gen_exception(ctx, POWERPC_EXCP_VSXU); + return; + } + gen_set_access_type(ctx, ACCESS_INT); + EA = tcg_temp_new(); + gen_addr_reg_index(ctx, EA); + + if (ctx->le_mode) { + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); + gen_helper_bswap16x4(xth, xth); + tcg_gen_addi_tl(EA, EA, 8); + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); + gen_helper_bswap16x4(xtl, xtl); + } else { + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ); + gen_helper_bswap32x2(xth, xth); + tcg_gen_addi_tl(EA, EA, 8); + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ); + gen_helper_bswap32x2(xtl, xtl); + } + tcg_temp_free(EA); +} + #define VSX_STORE_SCALAR(name, operation) \ static void gen_##name(DisasContext *ctx) \ { \ diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c index 414b73b..598b349 100644 --- a/target-ppc/translate/vsx-ops.inc.c +++ b/target-ppc/translate/vsx-ops.inc.c @@ -7,6 +7,8 @@ GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207), GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX), +GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE, PPC2_ISA300), +GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),
lxvb16x: Load VSX Vector Byte*16 lxvh8x: Load VSX Vector Halfword*8 Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> --- target-ppc/helper.h | 1 + target-ppc/mem_helper.c | 6 ++++ target-ppc/translate/vsx-impl.inc.c | 57 +++++++++++++++++++++++++++++++++++++ target-ppc/translate/vsx-ops.inc.c | 2 ++ 4 files changed, 66 insertions(+)