This makes comparisons much smaller and faster. The speedup is approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.
Note that CRF_* constants are flipped to match PowerPC's big bit-endianness. Previously, the CR register was effectively stored in mixed endianness, so now there is less indirection going on. Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- v1->v2: fixed all issues reported by Tom, notably: 1) temporary leak in gen_op_mfcr; 2) missing set of cr[so] for gen_op_cmp32; 3) i32 vs. tl typing issues; 4) creqv/nand/nor/orc extra 1 bits. linux-user/main.c | 4 +- target-ppc/cpu.h | 41 +++--- target-ppc/fpu_helper.c | 44 ++----- target-ppc/helper.h | 6 - target-ppc/int_helper.c | 2 +- target-ppc/machine.c | 9 ++ target-ppc/translate.c | 344 ++++++++++++++++++++++++++---------------------- 7 files changed, 236 insertions(+), 214 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index 152c031..b403f24 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env) * PPC ABI uses overflow flag in cr0 to signal an error * in syscalls. */ - env->crf[0] &= ~0x1; + env->cr[CRF_SO] = 0; ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4], env->gpr[5], env->gpr[6], env->gpr[7], env->gpr[8], 0, 0); @@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env) break; } if (ret > (target_ulong)(-515)) { - env->crf[0] |= 0x1; + env->cr[CRF_SO] = 1; ret = -ret; } env->gpr[3] = ret; diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 91eac17..41b8299 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -940,7 +940,7 @@ struct CPUPPCState { /* CTR */ target_ulong ctr; /* condition register */ - uint32_t crf[8]; + uint32_t cr[32]; #if defined(TARGET_PPC64) /* CFAR */ target_ulong cfar; @@ -1059,6 +1059,9 @@ struct CPUPPCState { uint64_t dtl_addr, dtl_size; #endif /* TARGET_PPC64 */ + /* condition register, for migration compatibility */ + uint32_t crf[8]; + int error_code; uint32_t pending_interrupts; #if !defined(CONFIG_USER_ONLY) @@ -1202,8 +1205,8 @@ static inline uint32_t ppc_get_cr(const CPUPPCState *env) uint32_t cr = 0; int i; - for (i = 0; i < ARRAY_SIZE(env->crf); i++) { - cr |= env->crf[i] << (32 - ((i + 1) * 4)); + for (i = 0; i < ARRAY_SIZE(env->cr); i++) { + cr |= env->cr[i] << (31 - i); } return cr; } @@ -1212,19 +1215,27 @@ static inline void ppc_set_cr(CPUPPCState *env, uint32_t cr) { int i; - for (i = 0; i < ARRAY_SIZE(env->crf); i++) { - env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF; + for (i = 0; i < ARRAY_SIZE(env->cr); i++) { + env->cr[i] = (cr >> (31 - i)) & 1; } } static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i) { - return env->crf[i]; + uint32_t r; + r = env->cr[i * 4]; + r = (r << 1) | (env->cr[i * 4 + 1]); + r = (r << 1) | (env->cr[i * 4 + 2]); + r = (r << 1) | (env->cr[i * 4 + 3]); + return r; } static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val) { - env->crf[i] = val; + env->cr[i * 4 + 0] = (val & 0x08) != 0; + env->cr[i * 4 + 1] = (val & 0x04) != 0; + env->cr[i * 4 + 2] = (val & 0x02) != 0; + env->cr[i * 4 + 3] = (val & 0x01) != 0; } static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn) @@ -1271,14 +1282,14 @@ static inline int cpu_mmu_index (CPUPPCState *env) /*****************************************************************************/ /* CRF definitions */ -#define CRF_LT 3 -#define CRF_GT 2 -#define CRF_EQ 1 -#define CRF_SO 0 -#define CRF_CH (1 << CRF_LT) -#define CRF_CL (1 << CRF_GT) -#define CRF_CH_OR_CL (1 << CRF_EQ) -#define CRF_CH_AND_CL (1 << CRF_SO) +#define CRF_LT 0 +#define CRF_GT 1 +#define CRF_EQ 2 +#define CRF_SO 3 +#define CRF_CH CRF_LT +#define CRF_CL CRF_GT +#define CRF_CH_OR_CL CRF_EQ +#define CRF_CH_AND_CL CRF_SO /* XER definitions */ #define XER_SO 31 diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c index 8cf321b..7894dc5 100644 --- a/target-ppc/fpu_helper.c +++ b/target-ppc/fpu_helper.c @@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2, } env->fpscr &= ~(0x0F << FPSCR_FPRF); - env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc; - ppc_set_crf(env, crfD, 1 << fpcc); + env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc; + ppc_set_crf(env, crfD, 0x08 >> fpcc); if (unlikely(fpcc == CRF_SO && (float64_is_signaling_nan(farg1.d) || @@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2, } env->fpscr &= ~(0x0F << FPSCR_FPRF); - env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc; - ppc_set_crf(env, crfD, 1 << fpcc); + env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc; + ppc_set_crf(env, crfD, 0x08 >> fpcc); if (unlikely(fpcc == CRF_SO)) { if (float64_is_signaling_nan(farg1.d) || @@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env, uint32_t op1, uint32_t op2) u1.l = op1; u2.l = op2; - return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0; + return float32_lt(u1.f, u2.f, &env->vec_status); } static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2) @@ -1412,7 +1412,7 @@ static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2) u1.l = op1; u2.l = op2; - return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4; + return !float32_le(u1.f, u2.f, &env->vec_status); } static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2) @@ -1421,7 +1421,7 @@ static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2) u1.l = op1; u2.l = op2; - return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0; + return float32_eq(u1.f, u2.f, &env->vec_status); } static inline uint32_t efststlt(CPUPPCState *env, uint32_t op1, uint32_t op2) @@ -1460,30 +1460,6 @@ HELPER_SINGLE_SPE_CMP(fscmpgt); /* efscmpeq */ HELPER_SINGLE_SPE_CMP(fscmpeq); -static inline uint32_t evcmp_merge(int t0, int t1) -{ - return (t0 << 3) | (t1 << 2) | ((t0 | t1) << 1) | (t0 & t1); -} - -#define HELPER_VECTOR_SPE_CMP(name) \ - uint32_t helper_ev##name(CPUPPCState *env, uint64_t op1, uint64_t op2) \ - { \ - return evcmp_merge(e##name(env, op1 >> 32, op2 >> 32), \ - e##name(env, op1, op2)); \ - } -/* evfststlt */ -HELPER_VECTOR_SPE_CMP(fststlt); -/* evfststgt */ -HELPER_VECTOR_SPE_CMP(fststgt); -/* evfststeq */ -HELPER_VECTOR_SPE_CMP(fststeq); -/* evfscmplt */ -HELPER_VECTOR_SPE_CMP(fscmplt); -/* evfscmpgt */ -HELPER_VECTOR_SPE_CMP(fscmpgt); -/* evfscmpeq */ -HELPER_VECTOR_SPE_CMP(fscmpeq); - /* Double-precision floating-point conversion */ uint64_t helper_efdcfsi(CPUPPCState *env, uint32_t val) { @@ -1725,7 +1701,7 @@ uint32_t helper_efdtstlt(CPUPPCState *env, uint64_t op1, uint64_t op2) u1.ll = op1; u2.ll = op2; - return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0; + return float64_lt(u1.d, u2.d, &env->vec_status); } uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2) @@ -1734,7 +1710,7 @@ uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2) u1.ll = op1; u2.ll = op2; - return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4; + return !float64_le(u1.d, u2.d, &env->vec_status); } uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2) @@ -1743,7 +1719,7 @@ uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2) u1.ll = op1; u2.ll = op2; - return float64_eq_quiet(u1.d, u2.d, &env->vec_status) ? 4 : 0; + return float64_eq_quiet(u1.d, u2.d, &env->vec_status); } uint32_t helper_efdcmplt(CPUPPCState *env, uint64_t op1, uint64_t op2) diff --git a/target-ppc/helper.h b/target-ppc/helper.h index ee748a1..dff7c1c 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -492,12 +492,6 @@ DEF_HELPER_3(efststeq, i32, env, i32, i32) DEF_HELPER_3(efscmplt, i32, env, i32, i32) DEF_HELPER_3(efscmpgt, i32, env, i32, i32) DEF_HELPER_3(efscmpeq, i32, env, i32, i32) -DEF_HELPER_3(evfststlt, i32, env, i64, i64) -DEF_HELPER_3(evfststgt, i32, env, i64, i64) -DEF_HELPER_3(evfststeq, i32, env, i64, i64) -DEF_HELPER_3(evfscmplt, i32, env, i64, i64) -DEF_HELPER_3(evfscmpgt, i32, env, i64, i64) -DEF_HELPER_3(evfscmpeq, i32, env, i64, i64) DEF_HELPER_2(efdcfsi, i64, env, i32) DEF_HELPER_2(efdcfsid, i64, env, i64) DEF_HELPER_2(efdcfui, i64, env, i32) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index b76a895..96f2e7d 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -2580,7 +2580,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, done: env->xer = (env->xer & ~0x7F) | i; if (update_Rc) { - env->crf[0] |= xer_so; + env->cr[CRF_SO] = xer_so; } return i; } diff --git a/target-ppc/machine.c b/target-ppc/machine.c index c801b82..9fa309a 100644 --- a/target-ppc/machine.c +++ b/target-ppc/machine.c @@ -132,6 +132,10 @@ static void cpu_pre_save(void *opaque) CPUPPCState *env = &cpu->env; int i; + for (i = 0; i < 8; i++) { + env->crf[i] = ppc_get_crf(env, i); + } + env->spr[SPR_LR] = env->lr; env->spr[SPR_CTR] = env->ctr; env->spr[SPR_XER] = env->xer; @@ -165,6 +169,11 @@ static int cpu_post_load(void *opaque, int version_id) * software has to take care of running QEMU in a compatible mode. */ env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value; + + for (i = 0; i < 8; i++) { + ppc_set_crf(env, i, env->crf[i]); + } + env->lr = env->spr[SPR_LR]; env->ctr = env->spr[SPR_CTR]; env->xer = env->spr[SPR_XER]; diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 0933c00..d8c9240 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -53,13 +53,13 @@ static char cpu_reg_names[10*3 + 22*4 /* GPR */ + 10*4 + 22*5 /* FPR */ + 2*(10*6 + 22*7) /* AVRh, AVRl */ + 10*5 + 22*6 /* VSR */ - + 8*5 /* CRF */]; + + 32*8 /* CR */]; static TCGv cpu_gpr[32]; static TCGv cpu_gprh[32]; static TCGv_i64 cpu_fpr[32]; static TCGv_i64 cpu_avrh[32], cpu_avrl[32]; static TCGv_i64 cpu_vsr[32]; -static TCGv_i32 cpu_crf[8]; +static TCGv_i32 cpu_cr[32]; static TCGv cpu_nip; static TCGv cpu_msr; static TCGv cpu_ctr; @@ -89,12 +89,13 @@ void ppc_translate_init(void) p = cpu_reg_names; cpu_reg_names_size = sizeof(cpu_reg_names); - for (i = 0; i < 8; i++) { - snprintf(p, cpu_reg_names_size, "crf%d", i); - cpu_crf[i] = tcg_global_mem_new_i32(TCG_AREG0, - offsetof(CPUPPCState, crf[i]), p); - p += 5; - cpu_reg_names_size -= 5; + for (i = 0; i < 32; i++) { + static const char names[] = "lt\0gt\0eq\0so"; + snprintf(p, cpu_reg_names_size, "cr%d[%s]", i >> 2, names + (i & 3) * 3); + cpu_cr[i] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUPPCState, cr[i]), p); + p += 8; + cpu_reg_names_size -= 8; } for (i = 0; i < 32; i++) { @@ -252,17 +253,31 @@ static inline void gen_reset_fpstatus(void) static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, int shift) { - tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift); + TCGv_i32 t0 = tcg_temp_new_i32(); + + tcg_gen_shli_i32(dest, cpu_cr[first_cr + 3], shift); + tcg_gen_shli_i32(t0, cpu_cr[first_cr + 2], shift + 1); + tcg_gen_or_i32(dest, dest, t0); + tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2); + tcg_gen_or_i32(dest, dest, t0); + tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3); + tcg_temp_free_i32(t0); } static inline void gen_op_mtcr(int first_cr, TCGv_i32 src, int shift) { if (shift) { - tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift); - tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F); + tcg_gen_shri_i32(cpu_cr[first_cr + 3], src, shift); + tcg_gen_andi_i32(cpu_cr[first_cr + 3], cpu_cr[first_cr + 3], 1); } else { - tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F); + tcg_gen_andi_i32(cpu_cr[first_cr + 3], src, 1); } + tcg_gen_shri_i32(cpu_cr[first_cr + 2], src, shift + 1); + tcg_gen_andi_i32(cpu_cr[first_cr + 2], cpu_cr[first_cr + 2], 1); + tcg_gen_shri_i32(cpu_cr[first_cr + 1], src, shift + 2); + tcg_gen_andi_i32(cpu_cr[first_cr + 1], cpu_cr[first_cr + 1], 1); + tcg_gen_shri_i32(cpu_cr[first_cr], src, shift + 3); + tcg_gen_andi_i32(cpu_cr[first_cr], cpu_cr[first_cr], 1); } static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc) @@ -663,27 +678,19 @@ static opc_handler_t invalid_handler = { static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf) { TCGv t0 = tcg_temp_new(); - TCGv_i32 t1 = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so); + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so); tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1); - tcg_gen_trunc_tl_i32(t1, t0); - tcg_gen_shli_i32(t1, t1, CRF_LT); - tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1); + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], t0); tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1); - tcg_gen_trunc_tl_i32(t1, t0); - tcg_gen_shli_i32(t1, t1, CRF_GT); - tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1); + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], t0); tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1); - tcg_gen_trunc_tl_i32(t1, t0); - tcg_gen_shli_i32(t1, t1, CRF_EQ); - tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1); + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], t0); tcg_temp_free(t0); - tcg_temp_free_i32(t1); } static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf) @@ -695,19 +702,26 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf) static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf) { - TCGv t0, t1; - t0 = tcg_temp_new(); - t1 = tcg_temp_new(); - if (s) { - tcg_gen_ext32s_tl(t0, arg0); - tcg_gen_ext32s_tl(t1, arg1); - } else { - tcg_gen_ext32u_tl(t0, arg0); - tcg_gen_ext32u_tl(t1, arg1); - } - gen_op_cmp(t0, t1, s, crf); - tcg_temp_free(t1); - tcg_temp_free(t0); + TCGv_i32 t0, t1; + + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(t0, arg0); + tcg_gen_trunc_tl_i32(t1, arg1); + + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so); + + tcg_gen_setcond_i32((s ? TCG_COND_LT: TCG_COND_LTU), + cpu_cr[crf * 4 + CRF_LT], t0, t1); + + tcg_gen_setcond_i32((s ? TCG_COND_GT: TCG_COND_GTU), + cpu_cr[crf * 4 + CRF_GT], t0, t1); + + tcg_gen_setcond_i32(TCG_COND_EQ, + cpu_cr[crf * 4 + CRF_EQ], t0, t1); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t0); } static inline void gen_op_cmpi32(TCGv arg0, target_ulong arg1, int s, int crf) @@ -778,15 +792,10 @@ static void gen_cmpli(DisasContext *ctx) static void gen_isel(DisasContext *ctx) { uint32_t bi = rC(ctx->opcode); - uint32_t mask; - TCGv_i32 t0; TCGv t1, true_op, zero; - mask = 0x08 >> (bi & 0x03); - t0 = tcg_temp_new_i32(); - tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask); t1 = tcg_temp_new(); - tcg_gen_extu_i32_tl(t1, t0); + tcg_gen_extu_i32_tl(t1, cpu_cr[bi]); zero = tcg_const_tl(0); if (rA(ctx->opcode) == 0) { true_op = zero; @@ -796,7 +805,6 @@ static void gen_isel(DisasContext *ctx) tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t1, zero, true_op, cpu_gpr[rB(ctx->opcode)]); tcg_temp_free(t1); - tcg_temp_free_i32(t0); tcg_temp_free(zero); } @@ -2318,21 +2326,29 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT); static void gen_ftdiv(DisasContext *ctx) { + TCGv_i32 crf; if (unlikely(!ctx->fpu_enabled)) { gen_exception(ctx, POWERPC_EXCP_FPU); return; } - gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)], + crf = tcg_temp_new_i32(); + gen_helper_ftdiv(crf, cpu_fpr[rA(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]); + gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0); + tcg_temp_free_i32(crf); } static void gen_ftsqrt(DisasContext *ctx) { + TCGv_i32 crf; if (unlikely(!ctx->fpu_enabled)) { gen_exception(ctx, POWERPC_EXCP_FPU); return; } - gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]); + crf = tcg_temp_new_i32(); + gen_helper_ftsqrt(crf, cpu_fpr[rB(ctx->opcode)]); + gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0); + tcg_temp_free_i32(crf); } @@ -3330,10 +3346,13 @@ static void gen_conditional_store(DisasContext *ctx, TCGv EA, { int l1; - tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); + tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so); + tcg_gen_movi_i32(cpu_cr[CRF_LT], 0); + tcg_gen_movi_i32(cpu_cr[CRF_EQ], 0); + tcg_gen_movi_i32(cpu_cr[CRF_GT], 0); l1 = gen_new_label(); tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1); - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ); + tcg_gen_movi_i32(cpu_cr[CRF_EQ], 1); #if defined(TARGET_PPC64) if (size == 8) { gen_qemu_st64(ctx, cpu_gpr[reg], EA); @@ -3900,17 +3919,11 @@ static inline void gen_bcond(DisasContext *ctx, int type) if ((bo & 0x10) == 0) { /* Test CR */ uint32_t bi = BI(ctx->opcode); - uint32_t mask = 0x08 >> (bi & 0x03); - TCGv_i32 temp = tcg_temp_new_i32(); - if (bo & 0x8) { - tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask); - tcg_gen_brcondi_i32(TCG_COND_EQ, temp, 0, l1); + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[bi], 0, l1); } else { - tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask); - tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1); + tcg_gen_brcondi_i32(TCG_COND_NE, cpu_cr[bi], 0, l1); } - tcg_temp_free_i32(temp); } gen_update_cfar(ctx, ctx->nip); if (type == BCOND_IM) { @@ -3959,35 +3972,21 @@ static void gen_bctar(DisasContext *ctx) } /*** Condition register logical ***/ -#define GEN_CRLOGIC(name, tcg_op, opc) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - uint8_t bitmask; \ - int sh; \ - TCGv_i32 t0, t1; \ - sh = (crbD(ctx->opcode) & 0x03) - (crbA(ctx->opcode) & 0x03); \ - t0 = tcg_temp_new_i32(); \ - if (sh > 0) \ - tcg_gen_shri_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], sh); \ - else if (sh < 0) \ - tcg_gen_shli_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], -sh); \ - else \ - tcg_gen_mov_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2]); \ - t1 = tcg_temp_new_i32(); \ - sh = (crbD(ctx->opcode) & 0x03) - (crbB(ctx->opcode) & 0x03); \ - if (sh > 0) \ - tcg_gen_shri_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], sh); \ - else if (sh < 0) \ - tcg_gen_shli_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], -sh); \ - else \ - tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2]); \ - tcg_op(t0, t0, t1); \ - bitmask = 0x08 >> (crbD(ctx->opcode) & 0x03); \ - tcg_gen_andi_i32(t0, t0, bitmask); \ - tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx->opcode) >> 2], ~bitmask); \ - tcg_gen_or_i32(cpu_crf[crbD(ctx->opcode) >> 2], t0, t1); \ - tcg_temp_free_i32(t0); \ - tcg_temp_free_i32(t1); \ +#define GEN_CRLOGIC(name, tcg_op, opc) \ +static void glue(gen_, name)(DisasContext *ctx) \ +{ \ + tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)], \ + cpu_cr[crbB(ctx->opcode)]); \ +} + +#define GEN_CRLOGIC_MASK(name, tcg_op, opc) \ +static void glue(gen_, name)(DisasContext *ctx) \ +{ \ + TCGv_i32 one = tcg_const_i32(1); \ + tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)], \ + cpu_cr[crbB(ctx->opcode)]); \ + tcg_gen_and_i32(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbD(ctx->opcode)], one); \ + tcg_temp_free_i32(one); \ } /* crand */ @@ -3995,22 +3994,26 @@ GEN_CRLOGIC(crand, tcg_gen_and_i32, 0x08); /* crandc */ GEN_CRLOGIC(crandc, tcg_gen_andc_i32, 0x04); /* creqv */ -GEN_CRLOGIC(creqv, tcg_gen_eqv_i32, 0x09); +GEN_CRLOGIC_MASK(creqv, tcg_gen_eqv_i32, 0x09); /* crnand */ -GEN_CRLOGIC(crnand, tcg_gen_nand_i32, 0x07); +GEN_CRLOGIC_MASK(crnand, tcg_gen_nand_i32, 0x07); /* crnor */ -GEN_CRLOGIC(crnor, tcg_gen_nor_i32, 0x01); +GEN_CRLOGIC_MASK(crnor, tcg_gen_nor_i32, 0x01); /* cror */ GEN_CRLOGIC(cror, tcg_gen_or_i32, 0x0E); /* crorc */ -GEN_CRLOGIC(crorc, tcg_gen_orc_i32, 0x0D); +GEN_CRLOGIC_MASK(crorc, tcg_gen_orc_i32, 0x0D); /* crxor */ GEN_CRLOGIC(crxor, tcg_gen_xor_i32, 0x06); /* mcrf */ static void gen_mcrf(DisasContext *ctx) { - tcg_gen_mov_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfS(ctx->opcode)]); + int i; + for (i = 0; i < 4; i++) { + tcg_gen_mov_i32(cpu_cr[crfD(ctx->opcode) * 4 + i], + cpu_cr[crfS(ctx->opcode) * 4 + i]); + } } /*** System linkage ***/ @@ -4163,20 +4166,12 @@ static void gen_write_xer(TCGv src) /* mcrxr */ static void gen_mcrxr(DisasContext *ctx) { - TCGv_i32 t0 = tcg_temp_new_i32(); - TCGv_i32 t1 = tcg_temp_new_i32(); - TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)]; - - tcg_gen_trunc_tl_i32(t0, cpu_so); - tcg_gen_trunc_tl_i32(t1, cpu_ov); - tcg_gen_trunc_tl_i32(dst, cpu_ca); - tcg_gen_shli_i32(t0, t0, 3); - tcg_gen_shli_i32(t1, t1, 2); - tcg_gen_shli_i32(dst, dst, 1); - tcg_gen_or_i32(dst, dst, t0); - tcg_gen_or_i32(dst, dst, t1); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); + int crf = crfD(ctx->opcode); + + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], cpu_so); + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], cpu_ov); + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], cpu_ca); + tcg_gen_movi_i32(cpu_cr[crf * 4 + CRF_SO], 0); tcg_gen_movi_tl(cpu_so, 0); tcg_gen_movi_tl(cpu_ov, 0); @@ -6351,11 +6346,13 @@ static void gen_tlbsx_40x(DisasContext *ctx) gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0); tcg_temp_free(t0); if (Rc(ctx->opcode)) { - int l1 = gen_new_label(); - tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1); - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02); - gen_set_label(l1); + t0 = tcg_temp_new(); + tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so); + tcg_gen_movi_i32(cpu_cr[CRF_LT], 0); + tcg_gen_movi_i32(cpu_cr[CRF_GT], 0); + tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1); + tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0); + tcg_temp_free(t0); } #endif } @@ -6432,11 +6429,13 @@ static void gen_tlbsx_440(DisasContext *ctx) gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0); tcg_temp_free(t0); if (Rc(ctx->opcode)) { - int l1 = gen_new_label(); - tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1); - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02); - gen_set_label(l1); + t0 = tcg_temp_new(); + tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so); + tcg_gen_movi_i32(cpu_cr[CRF_LT], 0); + tcg_gen_movi_i32(cpu_cr[CRF_GT], 0); + tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1); + tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0); + tcg_temp_free(t0); } #endif } @@ -7402,7 +7401,7 @@ GEN_VXFORM(vpmsumd, 4, 19) static void gen_##op(DisasContext *ctx) \ { \ TCGv_ptr ra, rb, rd; \ - TCGv_i32 ps; \ + TCGv_i32 ps, crf; \ \ if (unlikely(!ctx->altivec_enabled)) { \ gen_exception(ctx, POWERPC_EXCP_VPU); \ @@ -7414,13 +7413,16 @@ static void gen_##op(DisasContext *ctx) \ rd = gen_avr_ptr(rD(ctx->opcode)); \ \ ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \ + crf = tcg_temp_new_i32(); \ \ - gen_helper_##op(cpu_crf[6], rd, ra, rb, ps); \ + gen_helper_##op(crf, rd, ra, rb, ps); \ + gen_op_mtcr(6 << 2, crf, 0); \ \ tcg_temp_free_ptr(ra); \ tcg_temp_free_ptr(rb); \ tcg_temp_free_ptr(rd); \ tcg_temp_free_i32(ps); \ + tcg_temp_free_i32(crf); \ } GEN_BCD(bcdadd) @@ -8248,6 +8250,7 @@ static void gen_##name(DisasContext *ctx) \ static void gen_##name(DisasContext *ctx) \ { \ TCGv_ptr ra, rb; \ + TCGv_i32 tmp; \ if (unlikely(!ctx->fpu_enabled)) { \ gen_exception(ctx, POWERPC_EXCP_FPU); \ return; \ @@ -8255,8 +8258,10 @@ static void gen_##name(DisasContext *ctx) \ gen_update_nip(ctx, ctx->nip - 4); \ ra = gen_fprp_ptr(rA(ctx->opcode)); \ rb = gen_fprp_ptr(rB(ctx->opcode)); \ - gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \ - cpu_env, ra, rb); \ + tmp = tcg_temp_new_i32(); \ + gen_helper_##name(tmp, cpu_env, ra, rb); \ + gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0); \ + tcg_temp_free_i32(tmp); \ tcg_temp_free_ptr(ra); \ tcg_temp_free_ptr(rb); \ } @@ -8265,7 +8270,7 @@ static void gen_##name(DisasContext *ctx) \ static void gen_##name(DisasContext *ctx) \ { \ TCGv_ptr ra; \ - TCGv_i32 dcm; \ + TCGv_i32 dcm, tmp; \ if (unlikely(!ctx->fpu_enabled)) { \ gen_exception(ctx, POWERPC_EXCP_FPU); \ return; \ @@ -8273,8 +8278,10 @@ static void gen_##name(DisasContext *ctx) \ gen_update_nip(ctx, ctx->nip - 4); \ ra = gen_fprp_ptr(rA(ctx->opcode)); \ dcm = tcg_const_i32(DCM(ctx->opcode)); \ - gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \ - cpu_env, ra, dcm); \ + tmp = tcg_temp_new_i32(); \ + gen_helper_##name(tmp, cpu_env, ra, dcm); \ + gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0); \ + tcg_temp_free_i32(tmp); \ tcg_temp_free_ptr(ra); \ tcg_temp_free_i32(dcm); \ } @@ -8699,37 +8706,32 @@ GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32); #define GEN_SPEOP_COMP(name, tcg_cond) \ static inline void gen_##name(DisasContext *ctx) \ { \ + TCGv tmp = tcg_temp_new(); \ + \ if (unlikely(!ctx->spe_enabled)) { \ gen_exception(ctx, POWERPC_EXCP_SPEU); \ return; \ } \ - int l1 = gen_new_label(); \ - int l2 = gen_new_label(); \ - int l3 = gen_new_label(); \ - int l4 = gen_new_label(); \ \ tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); \ tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); \ tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]); \ tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]); \ \ - tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)], \ - cpu_gpr[rB(ctx->opcode)], l1); \ - tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0); \ - tcg_gen_br(l2); \ - gen_set_label(l1); \ - tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], \ - CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL); \ - gen_set_label(l2); \ - tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)], \ - cpu_gprh[rB(ctx->opcode)], l3); \ - tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], \ - ~(CRF_CH | CRF_CH_AND_CL)); \ - tcg_gen_br(l4); \ - gen_set_label(l3); \ - tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], \ - CRF_CH | CRF_CH_OR_CL); \ - gen_set_label(l4); \ + tcg_gen_setcond_tl(tcg_cond, tmp, \ + cpu_gpr[rA(ctx->opcode)], \ + cpu_gpr[rB(ctx->opcode)]); \ + tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], tmp); \ + tcg_gen_setcond_tl(tcg_cond, tmp, \ + cpu_gprh[rA(ctx->opcode)], \ + cpu_gprh[rB(ctx->opcode)]); \ + tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], tmp); \ + tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL], \ + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], \ + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); \ + tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL], \ + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], \ + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); \ } GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU); GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT); @@ -8800,22 +8802,20 @@ static inline void gen_evsel(DisasContext *ctx) int l2 = gen_new_label(); int l3 = gen_new_label(); int l4 = gen_new_label(); - TCGv_i32 t0 = tcg_temp_local_new_i32(); - tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3); - tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1); + + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, l1); tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]); tcg_gen_br(l2); gen_set_label(l1); tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]); gen_set_label(l2); - tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2); - tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3); + + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], 0, l3); tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); tcg_gen_br(l4); gen_set_label(l3); tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); gen_set_label(l4); - tcg_temp_free_i32(t0); } static void gen_evsel0(DisasContext *ctx) @@ -9397,9 +9397,12 @@ static inline void gen_##name(DisasContext *ctx) \ t0 = tcg_temp_new_i32(); \ t1 = tcg_temp_new_i32(); \ \ + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0); \ + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0); \ + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0); \ tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); \ tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); \ - gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1); \ + gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, t0, t1); \ \ tcg_temp_free_i32(t0); \ tcg_temp_free_i32(t1); \ @@ -9416,10 +9419,39 @@ static inline void gen_##name(DisasContext *ctx) \ t1 = tcg_temp_new_i64(); \ gen_load_gpr64(t0, rA(ctx->opcode)); \ gen_load_gpr64(t1, rB(ctx->opcode)); \ - gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1); \ + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0); \ + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0); \ + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0); \ + gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, \ + t0, t1); \ tcg_temp_free_i64(t0); \ tcg_temp_free_i64(t1); \ } +#define GEN_SPEFPUOP_COMP_V64(name, helper) \ +static inline void gen_##name(DisasContext *ctx) \ +{ \ + TCGv_i32 t0, t1; \ + if (unlikely(!ctx->spe_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_SPEU); \ + return; \ + } \ + t0 = tcg_temp_new_i32(); \ + t1 = tcg_temp_new_i32(); \ + tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); \ + tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); \ + gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], cpu_env, t0, t1); \ + tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]); \ + tcg_gen_trunc_tl_i32(t1, cpu_gprh[rB(ctx->opcode)]); \ + gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], cpu_env, t0, t1); \ + tcg_temp_free_i32(t0); \ + tcg_temp_free_i32(t1); \ + tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL], \ + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], \ + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); \ + tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL], \ + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], \ + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); \ +} /* Single precision floating-point vectors operations */ /* Arithmetic */ @@ -9474,12 +9506,12 @@ GEN_SPEFPUOP_CONV_64_64(evfsctuiz); GEN_SPEFPUOP_CONV_64_64(evfsctsiz); /* Comparison */ -GEN_SPEFPUOP_COMP_64(evfscmpgt); -GEN_SPEFPUOP_COMP_64(evfscmplt); -GEN_SPEFPUOP_COMP_64(evfscmpeq); -GEN_SPEFPUOP_COMP_64(evfststgt); -GEN_SPEFPUOP_COMP_64(evfststlt); -GEN_SPEFPUOP_COMP_64(evfststeq); +GEN_SPEFPUOP_COMP_V64(evfscmpgt, efscmpgt); +GEN_SPEFPUOP_COMP_V64(evfscmplt, efscmplt); +GEN_SPEFPUOP_COMP_V64(evfscmpeq, efscmpeq); +GEN_SPEFPUOP_COMP_V64(evfststgt, efststgt); +GEN_SPEFPUOP_COMP_V64(evfststlt, efststlt); +GEN_SPEFPUOP_COMP_V64(evfststeq, efststeq); /* Opcodes definitions */ GEN_SPE(evfsadd, evfssub, 0x00, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE); // -- 1.8.3.1