On 8/28/2014 12:15 PM, Paolo Bonzini wrote: > This makes comparisons much smaller and faster. The speedup is > approximately 10% on user-mode emulation on x86 host, 3-4% on PPC. > > Note that CRF_* constants are flipped to match PowerPC's big > bit-endianness. Previously, the CR register was effectively stored > in mixed endianness, so now there is less indirection going on. > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
There are some issues with this patch -- it doesn't compile due to some typing issues. There are also some functional issues. Some details are below (nit) Also it doesnt pass checkpatch.pl. > --- > linux-user/main.c | 4 +- > target-ppc/cpu.h | 33 ++++-- > target-ppc/fpu_helper.c | 39 ++---- > target-ppc/helper.h | 6 - > target-ppc/int_helper.c | 2 +- > target-ppc/machine.c | 9 ++ > target-ppc/translate.c | 307 > +++++++++++++++++++++++++----------------------- > 7 files changed, 204 insertions(+), 196 deletions(-) > > diff --git a/linux-user/main.c b/linux-user/main.c > index 152c031..b403f24 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env) > * PPC ABI uses overflow flag in cr0 to signal an error > * in syscalls. > */ > - env->crf[0] &= ~0x1; > + env->cr[CRF_SO] = 0; > ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4], > env->gpr[5], env->gpr[6], env->gpr[7], > env->gpr[8], 0, 0); > @@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env) > break; > } > if (ret > (target_ulong)(-515)) { > - env->crf[0] |= 0x1; > + env->cr[CRF_SO] = 1; > ret = -ret; > } > env->gpr[3] = ret; > diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h > index 05c29b2..67510e8 100644 > --- a/target-ppc/cpu.h > +++ b/target-ppc/cpu.h > @@ -939,7 +939,7 @@ struct CPUPPCState { > /* CTR */ > target_ulong ctr; > /* condition register */ > - uint32_t crf[8]; > + uint32_t cr[32]; > #if defined(TARGET_PPC64) > /* CFAR */ > target_ulong cfar; > @@ -1058,6 +1058,9 @@ struct CPUPPCState { > uint64_t dtl_addr, dtl_size; > #endif /* TARGET_PPC64 */ > > + /* condition register, for migration compatibility */ > + uint32_t crf[8]; > + > int error_code; > uint32_t pending_interrupts; > #if !defined(CONFIG_USER_ONLY) > @@ -1200,12 +1203,20 @@ void store_fpscr(CPUPPCState *env, uint64_t arg, > uint32_t mask); > > static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i) > { > - return env->crf[i]; > + uint32_t r; > + r = env->cr[i * 4]; > + r = (r << 1) | (env->cr[i * 4 + 1]); > + r = (r << 1) | (env->cr[i * 4 + 2]); > + r = (r << 1) | (env->cr[i * 4 + 3]); > + return r; > } > > static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val) > { > - env->crf[i] = val; > + env->cr[i * 4 + 0] = (val & 0x08) != 0; > + env->cr[i * 4 + 1] = (val & 0x04) != 0; > + env->cr[i * 4 + 2] = (val & 0x02) != 0; > + env->cr[i * 4 + 3] = (val & 0x01) != 0; > } > > static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn) > @@ -1256,14 +1267,14 @@ static inline int cpu_mmu_index (CPUPPCState *env) > > > /*****************************************************************************/ > /* CRF definitions */ > -#define CRF_LT 3 > -#define CRF_GT 2 > -#define CRF_EQ 1 > -#define CRF_SO 0 > -#define CRF_CH (1 << CRF_LT) > -#define CRF_CL (1 << CRF_GT) > -#define CRF_CH_OR_CL (1 << CRF_EQ) > -#define CRF_CH_AND_CL (1 << CRF_SO) > +#define CRF_LT 0 > +#define CRF_GT 1 > +#define CRF_EQ 2 > +#define CRF_SO 3 > +#define CRF_CH CRF_LT > +#define CRF_CL CRF_GT > +#define CRF_CH_OR_CL CRF_EQ > +#define CRF_CH_AND_CL CRF_SO > > /* XER definitions */ > #define XER_SO 31 > diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c > index 1ccbcf3..9574ebe 100644 > --- a/target-ppc/fpu_helper.c > +++ b/target-ppc/fpu_helper.c > @@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, > uint64_t arg2, > } > > env->fpscr &= ~(0x0F << FPSCR_FPRF); > - env->fpscr |= (0x01 << FPSCR_FPRF) << ret; > - ppc_set_crf(env, crfD, 1 << ret); > + env->fpscr |= (0x08 << FPSCR_FPRF) >> ret; > + ppc_set_crf(env, crfD, 0x08 >> ret); > > if (unlikely(ret == CRF_SO > && (float64_is_signaling_nan(farg1.d) || > @@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, > uint64_t arg2, > } > > env->fpscr &= ~(0x0F << FPSCR_FPRF); > - env->fpscr |= (0x01 << FPSCR_FPRF) << ret; > - ppc_set_crf(env, crfD, 1 << ret); > + env->fpscr |= (0x08 << FPSCR_FPRF) >> ret; > + ppc_set_crf(env, crfD, 0x08 >> ret); > > if (unlikely(ret == CRF_SO)) { > if (float64_is_signaling_nan(farg1.d) || > @@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env, > uint32_t op1, uint32_t op2) > > u1.l = op1; > u2.l = op2; > - return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0; > + return float32_lt(u1.f, u2.f, &env->vec_status); > } > > static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2) > @@ -1412,7 +1412,7 @@ static inline uint32_t efscmpgt(CPUPPCState *env, > uint32_t op1, uint32_t op2) > > u1.l = op1; > u2.l = op2; > - return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4; > + return !float32_le(u1.f, u2.f, &env->vec_status); > } > > static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2) > @@ -1421,7 +1421,7 @@ static inline uint32_t efscmpeq(CPUPPCState *env, > uint32_t op1, uint32_t op2) > > u1.l = op1; > u2.l = op2; > - return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0; > + return float32_eq(u1.f, u2.f, &env->vec_status); > } > > static inline uint32_t efststlt(CPUPPCState *env, uint32_t op1, uint32_t op2) > @@ -1465,25 +1465,6 @@ static inline uint32_t evcmp_merge(int t0, int t1) > return (t0 << 3) | (t1 << 2) | ((t0 | t1) << 1) | (t0 & t1); > } > > -#define HELPER_VECTOR_SPE_CMP(name) \ > - uint32_t helper_ev##name(CPUPPCState *env, uint64_t op1, uint64_t op2) \ > - { \ > - return evcmp_merge(e##name(env, op1 >> 32, op2 >> 32), \ > - e##name(env, op1, op2)); \ > - } > -/* evfststlt */ > -HELPER_VECTOR_SPE_CMP(fststlt); > -/* evfststgt */ > -HELPER_VECTOR_SPE_CMP(fststgt); > -/* evfststeq */ > -HELPER_VECTOR_SPE_CMP(fststeq); > -/* evfscmplt */ > -HELPER_VECTOR_SPE_CMP(fscmplt); > -/* evfscmpgt */ > -HELPER_VECTOR_SPE_CMP(fscmpgt); > -/* evfscmpeq */ > -HELPER_VECTOR_SPE_CMP(fscmpeq); > - > /* Double-precision floating-point conversion */ > uint64_t helper_efdcfsi(CPUPPCState *env, uint32_t val) > { > @@ -1725,7 +1706,7 @@ uint32_t helper_efdtstlt(CPUPPCState *env, uint64_t > op1, uint64_t op2) > > u1.ll = op1; > u2.ll = op2; > - return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0; > + return float64_lt(u1.d, u2.d, &env->vec_status); > } > > uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2) > @@ -1734,7 +1715,7 @@ uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t > op1, uint64_t op2) > > u1.ll = op1; > u2.ll = op2; > - return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4; > + return !float64_le(u1.d, u2.d, &env->vec_status); > } > > uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2) > @@ -1743,7 +1724,7 @@ uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t > op1, uint64_t op2) > > u1.ll = op1; > u2.ll = op2; > - return float64_eq_quiet(u1.d, u2.d, &env->vec_status) ? 4 : 0; > + return float64_eq_quiet(u1.d, u2.d, &env->vec_status); > } > > uint32_t helper_efdcmplt(CPUPPCState *env, uint64_t op1, uint64_t op2) > diff --git a/target-ppc/helper.h b/target-ppc/helper.h > index 5342f13..8d6a92b 100644 > --- a/target-ppc/helper.h > +++ b/target-ppc/helper.h > @@ -493,12 +493,6 @@ DEF_HELPER_3(efststeq, i32, env, i32, i32) > DEF_HELPER_3(efscmplt, i32, env, i32, i32) > DEF_HELPER_3(efscmpgt, i32, env, i32, i32) > DEF_HELPER_3(efscmpeq, i32, env, i32, i32) > -DEF_HELPER_3(evfststlt, i32, env, i64, i64) > -DEF_HELPER_3(evfststgt, i32, env, i64, i64) > -DEF_HELPER_3(evfststeq, i32, env, i64, i64) > -DEF_HELPER_3(evfscmplt, i32, env, i64, i64) > -DEF_HELPER_3(evfscmpgt, i32, env, i64, i64) > -DEF_HELPER_3(evfscmpeq, i32, env, i64, i64) > DEF_HELPER_2(efdcfsi, i64, env, i32) > DEF_HELPER_2(efdcfsid, i64, env, i64) > DEF_HELPER_2(efdcfui, i64, env, i32) > diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c > index 2287064..d3ace6a 100644 > --- a/target-ppc/int_helper.c > +++ b/target-ppc/int_helper.c > @@ -2602,7 +2602,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, > target_ulong high, > done: > env->xer = (env->xer & ~0x7F) | i; > if (update_Rc) { > - env->crf[0] |= xer_so; > + env->cr[CRF_SO] = xer_so; > } > return i; > } > diff --git a/target-ppc/machine.c b/target-ppc/machine.c > index c801b82..9fa309a 100644 > --- a/target-ppc/machine.c > +++ b/target-ppc/machine.c > @@ -132,6 +132,10 @@ static void cpu_pre_save(void *opaque) > CPUPPCState *env = &cpu->env; > int i; > > + for (i = 0; i < 8; i++) { > + env->crf[i] = ppc_get_crf(env, i); > + } > + > env->spr[SPR_LR] = env->lr; > env->spr[SPR_CTR] = env->ctr; > env->spr[SPR_XER] = env->xer; > @@ -165,6 +169,11 @@ static int cpu_post_load(void *opaque, int version_id) > * software has to take care of running QEMU in a compatible mode. > */ > env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value; > + > + for (i = 0; i < 8; i++) { > + ppc_set_crf(env, i, env->crf[i]); > + } > + > env->lr = env->spr[SPR_LR]; > env->ctr = env->spr[SPR_CTR]; > env->xer = env->spr[SPR_XER]; > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index 1ed6a8f..dd19b39 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -53,13 +53,13 @@ static char cpu_reg_names[10*3 + 22*4 /* GPR */ > + 10*4 + 22*5 /* FPR */ > + 2*(10*6 + 22*7) /* AVRh, AVRl */ > + 10*5 + 22*6 /* VSR */ > - + 8*5 /* CRF */]; > + + 32*8 /* CR */]; > static TCGv cpu_gpr[32]; > static TCGv cpu_gprh[32]; > static TCGv_i64 cpu_fpr[32]; > static TCGv_i64 cpu_avrh[32], cpu_avrl[32]; > static TCGv_i64 cpu_vsr[32]; > -static TCGv_i32 cpu_crf[8]; > +static TCGv_i32 cpu_cr[32]; > static TCGv cpu_nip; > static TCGv cpu_msr; > static TCGv cpu_ctr; > @@ -89,12 +89,13 @@ void ppc_translate_init(void) > p = cpu_reg_names; > cpu_reg_names_size = sizeof(cpu_reg_names); > > - for (i = 0; i < 8; i++) { > - snprintf(p, cpu_reg_names_size, "crf%d", i); > - cpu_crf[i] = tcg_global_mem_new_i32(TCG_AREG0, > - offsetof(CPUPPCState, crf[i]), > p); > - p += 5; > - cpu_reg_names_size -= 5; > + for (i = 0; i < 32; i++) { > + static const char names[] = "lt\0gt\0eq\0so"; > + snprintf(p, cpu_reg_names_size, "cr%d[%s]", i >> 2, names + (i & 3) > * 3); > + cpu_cr[i] = tcg_global_mem_new_i32(TCG_AREG0, > + offsetof(CPUPPCState, cr[i]), p); > + p += 8; > + cpu_reg_names_size -= 8; > } > > for (i = 0; i < 32; i++) { > @@ -251,17 +252,30 @@ static inline void gen_reset_fpstatus(void) > > static inline void gen_op_mfcr(TCGv dest, int first_cr, int shift) > { > - tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift); > + TCGv_i32 t0 = tcg_temp_new_i32(); > + > + tcg_gen_shli_i32(dest, cpu_cr[first_cr + 3], shift); > + tcg_gen_shli_i32(t0, cpu_cr[first_cr + 2], shift + 1); > + tcg_gen_or_i32(dest, dest, t0); > + tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2); > + tcg_gen_or_i32(dest, dest, t0); > + tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3); This leaks t0. > } > > static inline void gen_op_mtcr(int first_cr, TCGv src, int shift) > { > if (shift) { > - tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift); > - tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], > 0x0F); > + tcg_gen_shri_i32(cpu_cr[first_cr + 3], src, shift); > + tcg_gen_andi_i32(cpu_cr[first_cr + 3], cpu_cr[first_cr + 3], 1); > } else { > - tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F); > + tcg_gen_andi_i32(cpu_cr[first_cr + 3], src, 1); > } > + tcg_gen_shri_i32(cpu_cr[first_cr + 2], src, shift + 1); > + tcg_gen_andi_i32(cpu_cr[first_cr + 2], cpu_cr[first_cr + 2], 1); > + tcg_gen_shri_i32(cpu_cr[first_cr + 1], src, shift + 2); > + tcg_gen_andi_i32(cpu_cr[first_cr + 1], cpu_cr[first_cr + 1], 1); > + tcg_gen_shri_i32(cpu_cr[first_cr], src, shift + 3); > + tcg_gen_andi_i32(cpu_cr[first_cr], cpu_cr[first_cr], 1); > } > > static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc) > @@ -675,27 +689,19 @@ static bool is_user_mode(DisasContext *ctx) > static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf) > { > TCGv t0 = tcg_temp_new(); > - TCGv_i32 t1 = tcg_temp_new_i32(); > > - tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so); > + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so); This looks correct to me but is causing problems. The above statement seems to get dropped in the generated asm ... at least on a PPC host: IN: 0x00000000100005b4: cmpw cr3,r30,r29 OUT: [size=160] 0x6041ad30: lwz r14,-4(r27) 0x6041ad34: cmpwi cr7,r14,0 0x6041ad38: bne- cr7,0x6041adbc 0x6041ad3c: ld r14,240(r27) <<< r30 0x6041ad40: ld r15,232(r27) <<< r31 0x6041ad44: cmpw cr7,r14,r15 <<< this is the TCG_COND_LTx code 0x6041ad48: li r16,1 0x6041ad4c: li r0,0 0x6041ad50: isel r16,r16,r0,28 0x6041ad54: stw r16,576(r27) <<< store cpu_cr[LT] 0x6041ad58: cmpw cr7,r14,r15 0x6041ad5c: li r16,1 0x6041ad60: li r0,0 0x6041ad64: isel r16,r16,r0,29 0x6041ad68: stw r16,580(r27) <<< store cpu_cr[GT] 0x6041ad6c: cmplw cr7,r14,r15 0x6041ad70: li r14,1 0x6041ad74: li r0,0 0x6041ad78: isel r14,r14,r0,30 0x6041ad7c: stw r14,584(r27) <<< store cpu_cr[EQ] 0x6041ad80: .long 0x0 0x6041ad84: .long 0x0 Richard: any ideas or hints on how to proceed? > > tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1); > - tcg_gen_trunc_tl_i32(t1, t0); > - tcg_gen_shli_i32(t1, t1, CRF_LT); > - tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1); > + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], t0); > > tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1); > - tcg_gen_trunc_tl_i32(t1, t0); > - tcg_gen_shli_i32(t1, t1, CRF_GT); > - tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1); > + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], t0); > > tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1); > - tcg_gen_trunc_tl_i32(t1, t0); > - tcg_gen_shli_i32(t1, t1, CRF_EQ); > - tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1); > + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], t0); > > tcg_temp_free(t0); > - tcg_temp_free_i32(t1); > } > > static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf) > @@ -707,17 +713,22 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong > arg1, int s, int crf) > > static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf) > { > - TCGv t0, t1; > + TCGv_i32 t0, t1; > + > t0 = tcg_temp_new(); > t1 = tcg_temp_new(); Needs to be tcg_temp_new_i32() .... > - if (s) { > - tcg_gen_ext32s_tl(t0, arg0); > - tcg_gen_ext32s_tl(t1, arg1); > - } else { > - tcg_gen_ext32u_tl(t0, arg0); > - tcg_gen_ext32u_tl(t1, arg1); > - } > - gen_op_cmp(t0, t1, s, crf); > + tcg_gen_trunc_tl_i32(t0, arg0); > + tcg_gen_trunc_tl_i32(t1, arg1); > + > + tcg_gen_setcond_i32((s ? TCG_COND_LT: TCG_COND_LTU), > + cpu_cr[crf * 4 + CRF_LT], t0, t1); > + > + tcg_gen_setcond_i32((s ? TCG_COND_GT: TCG_COND_GTU), > + cpu_cr[crf * 4 + CRF_GT], t0, t1); > + > + tcg_gen_setcond_i32(TCG_COND_EQ, > + cpu_cr[crf * 4 + CRF_EQ], t0, t1); > + > tcg_temp_free(t1); > tcg_temp_free(t0); ... and tcg_temp_free_i32() > } > @@ -790,15 +801,10 @@ static void gen_cmpli(DisasContext *ctx) > static void gen_isel(DisasContext *ctx) > { > uint32_t bi = rC(ctx->opcode); > - uint32_t mask; > - TCGv_i32 t0; > TCGv t1, true_op, zero; > > - mask = 0x08 >> (bi & 0x03); > - t0 = tcg_temp_new_i32(); > - tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask); > t1 = tcg_temp_new(); > - tcg_gen_extu_i32_tl(t1, t0); > + tcg_gen_extu_i32_tl(t1, cpu_cr[bi]); > zero = tcg_const_tl(0); > if (rA(ctx->opcode) == 0) > true_op = zero; > @@ -2288,21 +2294,29 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT); > > static void gen_ftdiv(DisasContext *ctx) > { > + TCGv_i32 crf; > if (unlikely(!ctx->fpu_enabled)) { > gen_exception(ctx, POWERPC_EXCP_FPU); > return; > } > - gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)], > + crf = tcg_temp_new_i32(); > + gen_helper_ftdiv(crf, cpu_fpr[rA(ctx->opcode)], > cpu_fpr[rB(ctx->opcode)]); > + gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0); > + tcg_temp_free_i32(crf); > } > > static void gen_ftsqrt(DisasContext *ctx) > { > + TCGv_i32 crf; > if (unlikely(!ctx->fpu_enabled)) { > gen_exception(ctx, POWERPC_EXCP_FPU); > return; > } > - gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]); > + crf = tcg_temp_new_i32(); > + gen_helper_ftsqrt(crf, cpu_fpr[rB(ctx->opcode)]); > + gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0); > + tcg_temp_free_i32(crf); > } > > > @@ -3300,10 +3314,13 @@ static void gen_conditional_store(DisasContext *ctx, > TCGv EA, > { > int l1; > > - tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); > + tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so); > + tcg_gen_movi_i32(cpu_cr[CRF_LT], 0); > + tcg_gen_movi_i32(cpu_cr[CRF_EQ], 0); > + tcg_gen_movi_i32(cpu_cr[CRF_GT], 0); > l1 = gen_new_label(); > tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1); > - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ); > + tcg_gen_movi_i32(cpu_cr[CRF_EQ], 1); > #if defined(TARGET_PPC64) > if (size == 8) { > gen_qemu_st64(ctx, cpu_gpr[reg], EA); > @@ -3870,17 +3887,11 @@ static inline void gen_bcond(DisasContext *ctx, int > type) > if ((bo & 0x10) == 0) { > /* Test CR */ > uint32_t bi = BI(ctx->opcode); > - uint32_t mask = 0x08 >> (bi & 0x03); > - TCGv_i32 temp = tcg_temp_new_i32(); > - > if (bo & 0x8) { > - tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask); > - tcg_gen_brcondi_i32(TCG_COND_EQ, temp, 0, l1); > + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[bi], 0, l1); > } else { > - tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask); > - tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1); > + tcg_gen_brcondi_i32(TCG_COND_NE, cpu_cr[bi], 0, l1); > } > - tcg_temp_free_i32(temp); > } > gen_update_cfar(ctx, ctx->nip); > if (type == BCOND_IM) { > @@ -3929,35 +3940,11 @@ static void gen_bctar(DisasContext *ctx) > } > > /*** Condition register logical > ***/ > -#define GEN_CRLOGIC(name, tcg_op, opc) > \ > -static void glue(gen_, name)(DisasContext *ctx) > \ > -{ > \ > - uint8_t bitmask; > \ > - int sh; > \ > - TCGv_i32 t0, t1; > \ > - sh = (crbD(ctx->opcode) & 0x03) - (crbA(ctx->opcode) & 0x03); > \ > - t0 = tcg_temp_new_i32(); > \ > - if (sh > 0) > \ > - tcg_gen_shri_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], sh); > \ > - else if (sh < 0) > \ > - tcg_gen_shli_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], -sh); > \ > - else > \ > - tcg_gen_mov_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2]); > \ > - t1 = tcg_temp_new_i32(); > \ > - sh = (crbD(ctx->opcode) & 0x03) - (crbB(ctx->opcode) & 0x03); > \ > - if (sh > 0) > \ > - tcg_gen_shri_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], sh); > \ > - else if (sh < 0) > \ > - tcg_gen_shli_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], -sh); > \ > - else > \ > - tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2]); > \ > - tcg_op(t0, t0, t1); > \ > - bitmask = 0x08 >> (crbD(ctx->opcode) & 0x03); > \ > - tcg_gen_andi_i32(t0, t0, bitmask); > \ > - tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx->opcode) >> 2], ~bitmask); > \ > - tcg_gen_or_i32(cpu_crf[crbD(ctx->opcode) >> 2], t0, t1); > \ > - tcg_temp_free_i32(t0); > \ > - tcg_temp_free_i32(t1); > \ > +#define GEN_CRLOGIC(name, tcg_op, opc) > \ > +static void glue(gen_, name)(DisasContext *ctx) > \ > +{ > \ > + tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)], > \ > + cpu_cr[crbB(ctx->opcode)]); > \ > } > This is a very nice cleanup ... but it oversteers just a little. For some CR logical instructions, the generated code can produce non-zero bits in the i32 cr variable in places other than the LSB. For example, consider crnand, which produces the following on a PPC host: IN: 0x0000000010000578: crnand 4*cr7+so,4*cr7+lt,4*cr7+eq OUT: [size=112] 0x6041a630: lwz r14,-4(r27) 0x6041a634: cmpwi cr7,r14,0 0x6041a638: bne- cr7,0x6041a68c 0x6041a63c: lwz r14,640(r27) 0x6041a640: lwz r15,648(r27) 0x6041a644: nand r14,r14,r15 0x6041a648: andi. r14,r14,1 0x6041a64c: stw r14,652(r27) 0x6041a650: .long 0x0 0x6041a654: .long 0x0 0x6041a658: .long 0x0 0x6041a65c: .long 0x0 The host nand operation will always produce an i32 value that has 1s in bits 0-30, since they are presumably zero. A brute-force fix would be to add a tcg_gen_andi_i32(D,D,1) to your macro. But I think this is required only for a subset of the instructions (crnand, crnor, creqv, crorc). > /* crand */ > @@ -3980,7 +3967,11 @@ GEN_CRLOGIC(crxor, tcg_gen_xor_i32, 0x06); > /* mcrf */ > static void gen_mcrf(DisasContext *ctx) > { > - tcg_gen_mov_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfS(ctx->opcode)]); > + int i; > + for (i = 0; i < 4; i++) { > + tcg_gen_mov_i32(cpu_cr[crfD(ctx->opcode) * 4 + i], > + cpu_cr[crfS(ctx->opcode) * 4 + i]); > + } > } > > /*** System linkage > ***/ > @@ -4133,20 +4124,12 @@ static void gen_write_xer(TCGv src) > /* mcrxr */ > static void gen_mcrxr(DisasContext *ctx) > { > - TCGv_i32 t0 = tcg_temp_new_i32(); > - TCGv_i32 t1 = tcg_temp_new_i32(); > - TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)]; > - > - tcg_gen_trunc_tl_i32(t0, cpu_so); > - tcg_gen_trunc_tl_i32(t1, cpu_ov); > - tcg_gen_trunc_tl_i32(dst, cpu_ca); > - tcg_gen_shli_i32(t0, t0, 3); > - tcg_gen_shli_i32(t1, t1, 2); > - tcg_gen_shli_i32(dst, dst, 1); > - tcg_gen_or_i32(dst, dst, t0); > - tcg_gen_or_i32(dst, dst, t1); > - tcg_temp_free_i32(t0); > - tcg_temp_free_i32(t1); > + int crf = crfD(ctx->opcode); > + > + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], cpu_so); > + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], cpu_ov); > + tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], cpu_ca); > + tcg_gen_movi_i32(cpu_cr[crf * 4 + CRF_SO], 0); > > tcg_gen_movi_tl(cpu_so, 0); > tcg_gen_movi_tl(cpu_ov, 0); > @@ -6320,11 +6303,13 @@ static void gen_tlbsx_40x(DisasContext *ctx) > gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0); > tcg_temp_free(t0); > if (Rc(ctx->opcode)) { > - int l1 = gen_new_label(); > - tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); > - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1); > - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02); > - gen_set_label(l1); > + t0 = tcg_temp_new(); > + tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so); > + tcg_gen_movi_i32(cpu_cr[CRF_LT], 0); > + tcg_gen_movi_i32(cpu_cr[CRF_GT], 0); > + tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1); > + tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0); > + tcg_temp_free(t0); > } > #endif > } > @@ -6401,11 +6386,13 @@ static void gen_tlbsx_440(DisasContext *ctx) > gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0); > tcg_temp_free(t0); > if (Rc(ctx->opcode)) { > - int l1 = gen_new_label(); > - tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); > - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1); > - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02); > - gen_set_label(l1); > + t0 = tcg_temp_new(); > + tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so); > + tcg_gen_movi_i32(cpu_cr[CRF_LT], 0); > + tcg_gen_movi_i32(cpu_cr[CRF_GT], 0); > + tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1); > + tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0); > + tcg_temp_free(t0); > } > #endif > } > @@ -7371,7 +7358,7 @@ GEN_VXFORM(vpmsumd, 4, 19) > static void gen_##op(DisasContext *ctx) \ > { \ > TCGv_ptr ra, rb, rd; \ > - TCGv_i32 ps; \ > + TCGv_i32 ps, crf; \ > \ > if (unlikely(!ctx->altivec_enabled)) { \ > gen_exception(ctx, POWERPC_EXCP_VPU); \ > @@ -7383,13 +7370,16 @@ static void gen_##op(DisasContext *ctx) \ > rd = gen_avr_ptr(rD(ctx->opcode)); \ > \ > ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \ > + crf = tcg_temp_new_i32(); \ > \ > - gen_helper_##op(cpu_crf[6], rd, ra, rb, ps); \ > + gen_helper_##op(crf, rd, ra, rb, ps); \ > + gen_op_mtcr(6 << 2, crf, 0); \ > \ > tcg_temp_free_ptr(ra); \ > tcg_temp_free_ptr(rb); \ > tcg_temp_free_ptr(rd); \ > tcg_temp_free_i32(ps); \ > + tcg_temp_free_ptr(crf); \ tcg_temp_free_i32() ? > } > > GEN_BCD(bcdadd) > @@ -8217,6 +8207,7 @@ static void gen_##name(DisasContext *ctx) \ > static void gen_##name(DisasContext *ctx) \ > { \ > TCGv_ptr ra, rb; \ > + TCGv_i32 tmp; \ > if (unlikely(!ctx->fpu_enabled)) { \ > gen_exception(ctx, POWERPC_EXCP_FPU); \ > return; \ > @@ -8224,8 +8215,10 @@ static void gen_##name(DisasContext *ctx) \ > gen_update_nip(ctx, ctx->nip - 4); \ > ra = gen_fprp_ptr(rA(ctx->opcode)); \ > rb = gen_fprp_ptr(rB(ctx->opcode)); \ > - gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \ > - cpu_env, ra, rb); \ > + tmp = tcg_temp_new_i32(); \ > + gen_helper_##name(tmp, cpu_env, ra, rb); \ > + gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0); \ > + tcg_temp_free_i32(tmp); \ > tcg_temp_free_ptr(ra); \ > tcg_temp_free_ptr(rb); \ > } > @@ -8234,7 +8227,7 @@ static void gen_##name(DisasContext *ctx) \ > static void gen_##name(DisasContext *ctx) \ > { \ > TCGv_ptr ra; \ > - TCGv_i32 dcm; \ > + TCGv_i32 dcm, tmp; \ > if (unlikely(!ctx->fpu_enabled)) { \ > gen_exception(ctx, POWERPC_EXCP_FPU); \ > return; \ > @@ -8242,8 +8235,10 @@ static void gen_##name(DisasContext *ctx) \ > gen_update_nip(ctx, ctx->nip - 4); \ > ra = gen_fprp_ptr(rA(ctx->opcode)); \ > dcm = tcg_const_i32(DCM(ctx->opcode)); \ > - gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \ > - cpu_env, ra, dcm); \ > + tmp = tcg_temp_new_i32(); \ > + gen_helper_##name(tmp, cpu_env, ra, dcm); \ > + gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0); \ > + tcg_temp_free_i32(tmp); \ > tcg_temp_free_ptr(ra); \ > tcg_temp_free_i32(dcm); \ > } > @@ -8668,37 +8663,32 @@ GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32); > #define GEN_SPEOP_COMP(name, tcg_cond) > \ > static inline void gen_##name(DisasContext *ctx) > \ > { > \ > + TCGv tmp = tcg_temp_new(); > \ > + > \ > if (unlikely(!ctx->spe_enabled)) { > \ > gen_exception(ctx, POWERPC_EXCP_SPEU); > \ > return; > \ > } > \ > - int l1 = gen_new_label(); > \ > - int l2 = gen_new_label(); > \ > - int l3 = gen_new_label(); > \ > - int l4 = gen_new_label(); > \ > > \ > tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); > \ > tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); > \ > tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]); > \ > tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]); > \ > > \ > - tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)], > \ > - cpu_gpr[rB(ctx->opcode)], l1); > \ > - tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0); > \ > - tcg_gen_br(l2); > \ > - gen_set_label(l1); > \ > - tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], > \ > - CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL); > \ > - gen_set_label(l2); > \ > - tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)], > \ > - cpu_gprh[rB(ctx->opcode)], l3); > \ > - tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], > \ > - ~(CRF_CH | CRF_CH_AND_CL)); > \ > - tcg_gen_br(l4); > \ > - gen_set_label(l3); > \ > - tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], > \ > - CRF_CH | CRF_CH_OR_CL); > \ > - gen_set_label(l4); > \ > + tcg_gen_setcond_tl(tcg_cond, tmp, > \ > + cpu_gpr[rA(ctx->opcode)], > \ > + cpu_gpr[rB(ctx->opcode)]); > \ > + tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], tmp); > \ > + tcg_gen_setcond_tl(tcg_cond, tmp, > \ > + cpu_gprh[rA(ctx->opcode)], > \ > + cpu_gprh[rB(ctx->opcode)]); > \ > + tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], tmp); > \ > + tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL], > \ > + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], > \ > + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); > \ > + tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL], > \ > + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], > \ > + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); > \ > } > GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU); > GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT); > @@ -8769,22 +8759,20 @@ static inline void gen_evsel(DisasContext *ctx) > int l2 = gen_new_label(); > int l3 = gen_new_label(); > int l4 = gen_new_label(); > - TCGv_i32 t0 = tcg_temp_local_new_i32(); > - tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3); > - tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1); > + > + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, > l1); > tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]); > tcg_gen_br(l2); > gen_set_label(l1); > tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]); > gen_set_label(l2); > - tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2); > - tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3); > + > + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], > 0, l3); > tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); > tcg_gen_br(l4); > gen_set_label(l3); > tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); > gen_set_label(l4); > - tcg_temp_free_i32(t0); > } > > static void gen_evsel0(DisasContext *ctx) > @@ -9366,9 +9354,12 @@ static inline void gen_##name(DisasContext *ctx) > \ > t0 = tcg_temp_new_i32(); > \ > t1 = tcg_temp_new_i32(); > \ > > \ > + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0); > \ > + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0); > \ > + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0); > \ > tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); > \ > tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); > \ > - gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1); > \ > + gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, t0, > t1); \ > > \ > tcg_temp_free_i32(t0); > \ > tcg_temp_free_i32(t1); > \ > @@ -9385,10 +9376,32 @@ static inline void gen_##name(DisasContext *ctx) > \ > t1 = tcg_temp_new_i64(); > \ > gen_load_gpr64(t0, rA(ctx->opcode)); > \ > gen_load_gpr64(t1, rB(ctx->opcode)); > \ > - gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1); > \ > + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0); > \ > + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0); > \ > + tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0); > \ > + gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, > \ > + t0, t1); > \ > tcg_temp_free_i64(t0); > \ > tcg_temp_free_i64(t1); > \ > } > +#define GEN_SPEFPUOP_COMP_V64(name, helper) > \ > +static inline void gen_##name(DisasContext *ctx) > \ > +{ > \ > + if (unlikely(!ctx->spe_enabled)) { > \ > + gen_exception(ctx, POWERPC_EXCP_SPEU); > \ > + return; > \ > + } > \ > + gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], cpu_env, > \ > + cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); > \ > + gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], cpu_env, > \ > + cpu_gprh[rA(ctx->opcode)], > cpu_gprh[rB(ctx->opcode)]);\ This doesn't compile for 64 bit targets because the helpers declare i32 types for the GPR arguments. > + tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL], > \ > + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], > \ > + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); > \ > + tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL], > \ > + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], > \ > + cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); > \ > +} > > /* Single precision floating-point vectors operations */ > /* Arithmetic */ > @@ -9443,12 +9456,12 @@ GEN_SPEFPUOP_CONV_64_64(evfsctuiz); > GEN_SPEFPUOP_CONV_64_64(evfsctsiz); > > /* Comparison */ > -GEN_SPEFPUOP_COMP_64(evfscmpgt); > -GEN_SPEFPUOP_COMP_64(evfscmplt); > -GEN_SPEFPUOP_COMP_64(evfscmpeq); > -GEN_SPEFPUOP_COMP_64(evfststgt); > -GEN_SPEFPUOP_COMP_64(evfststlt); > -GEN_SPEFPUOP_COMP_64(evfststeq); > +GEN_SPEFPUOP_COMP_V64(evfscmpgt, efscmpgt); > +GEN_SPEFPUOP_COMP_V64(evfscmplt, efscmplt); > +GEN_SPEFPUOP_COMP_V64(evfscmpeq, efscmpeq); > +GEN_SPEFPUOP_COMP_V64(evfststgt, efststgt); > +GEN_SPEFPUOP_COMP_V64(evfststlt, efststlt); > +GEN_SPEFPUOP_COMP_V64(evfststeq, efststeq); > > /* Opcodes definitions */ > GEN_SPE(evfsadd, evfssub, 0x00, 0x0A, 0x00000000, 0x00000000, > PPC_SPE_SINGLE); // >