David Gibson <da...@gibson.dropbear.id.au> writes: > [ Unknown signature status ] > On Mon, Sep 12, 2016 at 12:11:43PM +0530, Nikunj A Dadhania wrote: >> Load 8byte at a time and manipulate. >> >> Signed-off-by: Nikunj A Dadhania <nik...@linux.vnet.ibm.com> >> --- >> target-ppc/helper.h | 1 + >> target-ppc/mem_helper.c | 5 +++++ >> target-ppc/translate/vsx-impl.inc.c | 34 ++++++++++++++++++++-------------- >> 3 files changed, 26 insertions(+), 14 deletions(-) >> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h >> index 966f2ce..1bbeac4 100644 >> --- a/target-ppc/helper.h >> +++ b/target-ppc/helper.h >> @@ -297,6 +297,7 @@ DEF_HELPER_2(mtvscr, void, env, avr) >> DEF_HELPER_3(lvebx, void, env, avr, tl) >> DEF_HELPER_3(lvehx, void, env, avr, tl) >> DEF_HELPER_3(lvewx, void, env, avr, tl) >> +DEF_HELPER_1(bswap32x2, i64, i64) >> DEF_HELPER_3(stvebx, void, env, avr, tl) >> DEF_HELPER_3(stvehx, void, env, avr, tl) >> DEF_HELPER_3(stvewx, void, env, avr, tl) >> diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c >> index 6548715..a56051a 100644 >> --- a/target-ppc/mem_helper.c >> +++ b/target-ppc/mem_helper.c >> @@ -285,6 +285,11 @@ STVE(stvewx, cpu_stl_data_ra, bswap32, u32) >> #undef I >> #undef LVE >> >> +uint64_t helper_bswap32x2(uint64_t x) >> +{ >> + return deposit64((x >> 32), 32, 32, (x)); >> +} >> + >> #undef HI_IDX >> #undef LO_IDX >> >> diff --git a/target-ppc/translate/vsx-impl.inc.c >> b/target-ppc/translate/vsx-impl.inc.c >> index eee6052..e3374df 100644 >> --- a/target-ppc/translate/vsx-impl.inc.c >> +++ b/target-ppc/translate/vsx-impl.inc.c >> @@ -75,7 +75,7 @@ static void gen_lxvdsx(DisasContext *ctx) >> static void gen_lxvw4x(DisasContext *ctx) >> { >> TCGv EA; >> - TCGv_i64 tmp; >> + TCGv_i64 t0, t1; >> TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode)); >> TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode)); >> if (unlikely(!ctx->vsx_enabled)) { >> @@ -84,22 +84,28 @@ static void gen_lxvw4x(DisasContext *ctx) >> } >> gen_set_access_type(ctx, ACCESS_INT); >> EA = tcg_temp_new(); >> - tmp = tcg_temp_new_i64(); >> >> gen_addr_reg_index(ctx, EA); >> - gen_qemu_ld32u_i64(ctx, tmp, EA); >> - tcg_gen_addi_tl(EA, EA, 4); >> - gen_qemu_ld32u_i64(ctx, xth, EA); >> - tcg_gen_deposit_i64(xth, xth, tmp, 32, 32); >> - >> - tcg_gen_addi_tl(EA, EA, 4); >> - gen_qemu_ld32u_i64(ctx, tmp, EA); >> - tcg_gen_addi_tl(EA, EA, 4); >> - gen_qemu_ld32u_i64(ctx, xtl, EA); >> - tcg_gen_deposit_i64(xtl, xtl, tmp, 32, 32); >> - >> + if (ctx->le_mode) { >> + t0 = tcg_temp_new_i64(); >> + t1 = tcg_temp_new_i64(); >> + tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ); >> + tcg_gen_shri_i64(t1, t0, 32); >> + tcg_gen_deposit_i64(xth, t1, t0, 32, 32); >> + tcg_gen_addi_tl(EA, EA, 8); >> + tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ); >> + tcg_gen_shri_i64(t1, t0, 32); >> + tcg_gen_deposit_i64(xtl, t1, t0, 32, 32); >> + tcg_temp_free_i64(t0); >> + tcg_temp_free_i64(t1); >> + } else { >> + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ); >> + gen_helper_bswap32x2(xth, xth); >> + tcg_gen_addi_tl(EA, EA, 8); >> + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ); >> + gen_helper_bswap32x2(xtl, xtl); >> + } > > So.. for starters using a helper for just one endianness is kind of > ugly. But for going on with, it looks like the two paths are doing > the same thing, just one is doing it with TCG ops and the other via a > helper.
Right, while iterating and optimizing logic for BE, I got it same as LE ;-) We can just use BE. Following is good enough for both BE/LE. + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ); + gen_helper_bswap32x2(xth, xth); + tcg_gen_addi_tl(EA, EA, 8); + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ); + gen_helper_bswap32x2(xtl, xtl); Regards, Nikunj