Re: [Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers

Tom Musta Thu, 04 Sep 2014 11:28:56 -0700

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
> This makes comparisons much smaller and faster.  The speedup is
> approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.
> 
> Note that CRF_* constants are flipped to match PowerPC's big
> bit-endianness.  Previously, the CR register was effectively stored
> in mixed endianness, so now there is less indirection going on.
> 
> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>


There are some issues with this patch -- it doesn't compile due to some typing 
issues.  There are also some functional issues.  Some details are below

(nit) Also it doesnt pass checkpatch.pl.

> ---
>  linux-user/main.c       |   4 +-
>  target-ppc/cpu.h        |  33 ++++--
>  target-ppc/fpu_helper.c |  39 ++----
>  target-ppc/helper.h     |   6 -
>  target-ppc/int_helper.c |   2 +-
>  target-ppc/machine.c    |   9 ++
>  target-ppc/translate.c  | 307 
> +++++++++++++++++++++++++-----------------------
>  7 files changed, 204 insertions(+), 196 deletions(-)
> 
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 152c031..b403f24 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env)
>               * PPC ABI uses overflow flag in cr0 to signal an error
>               * in syscalls.
>               */
> -            env->crf[0] &= ~0x1;
> +            env->cr[CRF_SO] = 0;
>              ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4],
>                               env->gpr[5], env->gpr[6], env->gpr[7],
>                               env->gpr[8], 0, 0);
> @@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env)
>                  break;
>              }
>              if (ret > (target_ulong)(-515)) {
> -                env->crf[0] |= 0x1;
> +                env->cr[CRF_SO] = 1;
>                  ret = -ret;
>              }
>              env->gpr[3] = ret;
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 05c29b2..67510e8 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -939,7 +939,7 @@ struct CPUPPCState {
>      /* CTR */
>      target_ulong ctr;
>      /* condition register */
> -    uint32_t crf[8];
> +    uint32_t cr[32];
>  #if defined(TARGET_PPC64)
>      /* CFAR */
>      target_ulong cfar;
> @@ -1058,6 +1058,9 @@ struct CPUPPCState {
>      uint64_t dtl_addr, dtl_size;
>  #endif /* TARGET_PPC64 */
>  
> +    /* condition register, for migration compatibility */
> +    uint32_t crf[8];
> +
>      int error_code;
>      uint32_t pending_interrupts;
>  #if !defined(CONFIG_USER_ONLY)
> @@ -1200,12 +1203,20 @@ void store_fpscr(CPUPPCState *env, uint64_t arg, 
> uint32_t mask);
>  
>  static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
>  {
> -    return env->crf[i];
> +    uint32_t r;
> +    r = env->cr[i * 4];
> +    r = (r << 1) | (env->cr[i * 4 + 1]);
> +    r = (r << 1) | (env->cr[i * 4 + 2]);
> +    r = (r << 1) | (env->cr[i * 4 + 3]);
> +    return r;
>  }
>  
>  static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
>  {
> -    env->crf[i] = val;
> +    env->cr[i * 4 + 0] = (val & 0x08) != 0;
> +    env->cr[i * 4 + 1] = (val & 0x04) != 0;
> +    env->cr[i * 4 + 2] = (val & 0x02) != 0;
> +    env->cr[i * 4 + 3] = (val & 0x01) != 0;
>  }
>  
>  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
> @@ -1256,14 +1267,14 @@ static inline int cpu_mmu_index (CPUPPCState *env)
>  
>  
> /*****************************************************************************/
>  /* CRF definitions */
> -#define CRF_LT        3
> -#define CRF_GT        2
> -#define CRF_EQ        1
> -#define CRF_SO        0
> -#define CRF_CH        (1 << CRF_LT)
> -#define CRF_CL        (1 << CRF_GT)
> -#define CRF_CH_OR_CL  (1 << CRF_EQ)
> -#define CRF_CH_AND_CL (1 << CRF_SO)
> +#define CRF_LT        0
> +#define CRF_GT        1
> +#define CRF_EQ        2
> +#define CRF_SO        3
> +#define CRF_CH        CRF_LT
> +#define CRF_CL        CRF_GT
> +#define CRF_CH_OR_CL  CRF_EQ
> +#define CRF_CH_AND_CL CRF_SO
>  
>  /* XER definitions */
>  #define XER_SO  31
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index 1ccbcf3..9574ebe 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, 
> uint64_t arg2,
>      }
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
> -    env->fpscr |= (0x01 << FPSCR_FPRF) << ret;
> -    ppc_set_crf(env, crfD, 1 << ret);
> +    env->fpscr |= (0x08 << FPSCR_FPRF) >> ret;
> +    ppc_set_crf(env, crfD, 0x08 >> ret);
>  
>      if (unlikely(ret == CRF_SO
>                   && (float64_is_signaling_nan(farg1.d) ||
> @@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, 
> uint64_t arg2,
>      }
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
> -    env->fpscr |= (0x01 << FPSCR_FPRF) << ret;
> -    ppc_set_crf(env, crfD, 1 << ret);
> +    env->fpscr |= (0x08 << FPSCR_FPRF) >> ret;
> +    ppc_set_crf(env, crfD, 0x08 >> ret);
>  
>      if (unlikely(ret == CRF_SO)) {
>          if (float64_is_signaling_nan(farg1.d) ||
> @@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env, 
> uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0;
> +    return float32_lt(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1412,7 +1412,7 @@ static inline uint32_t efscmpgt(CPUPPCState *env, 
> uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4;
> +    return !float32_le(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1421,7 +1421,7 @@ static inline uint32_t efscmpeq(CPUPPCState *env, 
> uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0;
> +    return float32_eq(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efststlt(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1465,25 +1465,6 @@ static inline uint32_t evcmp_merge(int t0, int t1)
>      return (t0 << 3) | (t1 << 2) | ((t0 | t1) << 1) | (t0 & t1);
>  }
>  
> -#define HELPER_VECTOR_SPE_CMP(name)                                     \
> -    uint32_t helper_ev##name(CPUPPCState *env, uint64_t op1, uint64_t op2) \
> -    {                                                                   \
> -        return evcmp_merge(e##name(env, op1 >> 32, op2 >> 32),          \
> -                           e##name(env, op1, op2));                     \
> -    }
> -/* evfststlt */
> -HELPER_VECTOR_SPE_CMP(fststlt);
> -/* evfststgt */
> -HELPER_VECTOR_SPE_CMP(fststgt);
> -/* evfststeq */
> -HELPER_VECTOR_SPE_CMP(fststeq);
> -/* evfscmplt */
> -HELPER_VECTOR_SPE_CMP(fscmplt);
> -/* evfscmpgt */
> -HELPER_VECTOR_SPE_CMP(fscmpgt);
> -/* evfscmpeq */
> -HELPER_VECTOR_SPE_CMP(fscmpeq);
> -
>  /* Double-precision floating-point conversion */
>  uint64_t helper_efdcfsi(CPUPPCState *env, uint32_t val)
>  {
> @@ -1725,7 +1706,7 @@ uint32_t helper_efdtstlt(CPUPPCState *env, uint64_t 
> op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0;
> +    return float64_lt(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
> @@ -1734,7 +1715,7 @@ uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t 
> op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4;
> +    return !float64_le(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
> @@ -1743,7 +1724,7 @@ uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t 
> op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_eq_quiet(u1.d, u2.d, &env->vec_status) ? 4 : 0;
> +    return float64_eq_quiet(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdcmplt(CPUPPCState *env, uint64_t op1, uint64_t op2)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 5342f13..8d6a92b 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -493,12 +493,6 @@ DEF_HELPER_3(efststeq, i32, env, i32, i32)
>  DEF_HELPER_3(efscmplt, i32, env, i32, i32)
>  DEF_HELPER_3(efscmpgt, i32, env, i32, i32)
>  DEF_HELPER_3(efscmpeq, i32, env, i32, i32)
> -DEF_HELPER_3(evfststlt, i32, env, i64, i64)
> -DEF_HELPER_3(evfststgt, i32, env, i64, i64)
> -DEF_HELPER_3(evfststeq, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmplt, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmpgt, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmpeq, i32, env, i64, i64)
>  DEF_HELPER_2(efdcfsi, i64, env, i32)
>  DEF_HELPER_2(efdcfsid, i64, env, i64)
>  DEF_HELPER_2(efdcfui, i64, env, i32)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 2287064..d3ace6a 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -2602,7 +2602,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, 
> target_ulong high,
>   done:
>      env->xer = (env->xer & ~0x7F) | i;
>      if (update_Rc) {
> -        env->crf[0] |= xer_so;
> +        env->cr[CRF_SO] = xer_so;
>      }
>      return i;
>  }
> diff --git a/target-ppc/machine.c b/target-ppc/machine.c
> index c801b82..9fa309a 100644
> --- a/target-ppc/machine.c
> +++ b/target-ppc/machine.c
> @@ -132,6 +132,10 @@ static void cpu_pre_save(void *opaque)
>      CPUPPCState *env = &cpu->env;
>      int i;
>  
> +    for (i = 0; i < 8; i++) {
> +        env->crf[i] = ppc_get_crf(env, i);
> +    }
> +
>      env->spr[SPR_LR] = env->lr;
>      env->spr[SPR_CTR] = env->ctr;
>      env->spr[SPR_XER] = env->xer;
> @@ -165,6 +169,11 @@ static int cpu_post_load(void *opaque, int version_id)
>       * software has to take care of running QEMU in a compatible mode.
>       */
>      env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value;
> +
> +    for (i = 0; i < 8; i++) {
> +        ppc_set_crf(env, i, env->crf[i]);
> +    }
> +
>      env->lr = env->spr[SPR_LR];
>      env->ctr = env->spr[SPR_CTR];
>      env->xer = env->spr[SPR_XER];
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 1ed6a8f..dd19b39 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -53,13 +53,13 @@ static char cpu_reg_names[10*3 + 22*4 /* GPR */
>      + 10*4 + 22*5 /* FPR */
>      + 2*(10*6 + 22*7) /* AVRh, AVRl */
>      + 10*5 + 22*6 /* VSR */
> -    + 8*5 /* CRF */];
> +    + 32*8 /* CR */];
>  static TCGv cpu_gpr[32];
>  static TCGv cpu_gprh[32];
>  static TCGv_i64 cpu_fpr[32];
>  static TCGv_i64 cpu_avrh[32], cpu_avrl[32];
>  static TCGv_i64 cpu_vsr[32];
> -static TCGv_i32 cpu_crf[8];
> +static TCGv_i32 cpu_cr[32];
>  static TCGv cpu_nip;
>  static TCGv cpu_msr;
>  static TCGv cpu_ctr;
> @@ -89,12 +89,13 @@ void ppc_translate_init(void)
>      p = cpu_reg_names;
>      cpu_reg_names_size = sizeof(cpu_reg_names);
>  
> -    for (i = 0; i < 8; i++) {
> -        snprintf(p, cpu_reg_names_size, "crf%d", i);
> -        cpu_crf[i] = tcg_global_mem_new_i32(TCG_AREG0,
> -                                            offsetof(CPUPPCState, crf[i]), 
> p);
> -        p += 5;
> -        cpu_reg_names_size -= 5;
> +    for (i = 0; i < 32; i++) {
> +        static const char names[] = "lt\0gt\0eq\0so";
> +        snprintf(p, cpu_reg_names_size, "cr%d[%s]", i >> 2, names + (i & 3) 
> * 3);
> +        cpu_cr[i] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                           offsetof(CPUPPCState, cr[i]), p);
> +        p += 8;
> +        cpu_reg_names_size -= 8;
>      }
>  
>      for (i = 0; i < 32; i++) {
> @@ -251,17 +252,30 @@ static inline void gen_reset_fpstatus(void)
>  
>  static inline void gen_op_mfcr(TCGv dest, int first_cr, int shift)
>  {
> -    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
> +    TCGv_i32 t0 = tcg_temp_new_i32();
> +
> +    tcg_gen_shli_i32(dest, cpu_cr[first_cr + 3], shift);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 2], shift + 1);
> +    tcg_gen_or_i32(dest, dest, t0);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2);
> +    tcg_gen_or_i32(dest, dest, t0);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3);

This leaks t0.

>  }
>  
>  static inline void gen_op_mtcr(int first_cr, TCGv src, int shift)
>  {
>      if (shift) {
> -        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
> -        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 
> 0x0F);
> +        tcg_gen_shri_i32(cpu_cr[first_cr + 3], src, shift);
> +        tcg_gen_andi_i32(cpu_cr[first_cr + 3], cpu_cr[first_cr + 3], 1);
>      } else {
> -        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
> +        tcg_gen_andi_i32(cpu_cr[first_cr + 3], src, 1);
>      }
> +    tcg_gen_shri_i32(cpu_cr[first_cr + 2], src, shift + 1);
> +    tcg_gen_andi_i32(cpu_cr[first_cr + 2], cpu_cr[first_cr + 2], 1);
> +    tcg_gen_shri_i32(cpu_cr[first_cr + 1], src, shift + 2);
> +    tcg_gen_andi_i32(cpu_cr[first_cr + 1], cpu_cr[first_cr + 1], 1);
> +    tcg_gen_shri_i32(cpu_cr[first_cr], src, shift + 3);
> +    tcg_gen_andi_i32(cpu_cr[first_cr], cpu_cr[first_cr], 1);
>  }
>  
>  static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
> @@ -675,27 +689,19 @@ static bool is_user_mode(DisasContext *ctx)
>  static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
>  {
>      TCGv t0 = tcg_temp_new();
> -    TCGv_i32 t1 = tcg_temp_new_i32();
>  
> -    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);

This looks correct to me but is causing problems.  The above statement seems to 
get dropped in the generated asm ... at least on a PPC host:

IN:
0x00000000100005b4:  cmpw    cr3,r30,r29

OUT: [size=160]
0x6041ad30:  lwz     r14,-4(r27)
0x6041ad34:  cmpwi   cr7,r14,0
0x6041ad38:  bne-    cr7,0x6041adbc
0x6041ad3c:  ld      r14,240(r27)   <<< r30
0x6041ad40:  ld      r15,232(r27)   <<< r31
0x6041ad44:  cmpw    cr7,r14,r15    <<< this is the TCG_COND_LTx code
0x6041ad48:  li      r16,1
0x6041ad4c:  li      r0,0
0x6041ad50:  isel    r16,r16,r0,28
0x6041ad54:  stw     r16,576(r27)   <<< store cpu_cr[LT]
0x6041ad58:  cmpw    cr7,r14,r15
0x6041ad5c:  li      r16,1
0x6041ad60:  li      r0,0
0x6041ad64:  isel    r16,r16,r0,29
0x6041ad68:  stw     r16,580(r27)   <<< store cpu_cr[GT]
0x6041ad6c:  cmplw   cr7,r14,r15
0x6041ad70:  li      r14,1
0x6041ad74:  li      r0,0
0x6041ad78:  isel    r14,r14,r0,30
0x6041ad7c:  stw     r14,584(r27)   <<< store cpu_cr[EQ]
0x6041ad80:  .long 0x0
0x6041ad84:  .long 0x0

Richard:  any ideas or hints on how to proceed?
>  
>      tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_LT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], t0);
>  
>      tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_GT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], t0);
>  
>      tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_EQ);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], t0);
>  
>      tcg_temp_free(t0);
> -    tcg_temp_free_i32(t1);
>  }
>  
>  static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
> @@ -707,17 +713,22 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong 
> arg1, int s, int crf)
>  
>  static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
>  {
> -    TCGv t0, t1;
> +    TCGv_i32 t0, t1;
> +
>      t0 = tcg_temp_new();
>      t1 = tcg_temp_new();

Needs to be tcg_temp_new_i32() ....

> -    if (s) {
> -        tcg_gen_ext32s_tl(t0, arg0);
> -        tcg_gen_ext32s_tl(t1, arg1);
> -    } else {
> -        tcg_gen_ext32u_tl(t0, arg0);
> -        tcg_gen_ext32u_tl(t1, arg1);
> -    }
> -    gen_op_cmp(t0, t1, s, crf);
> +    tcg_gen_trunc_tl_i32(t0, arg0);
> +    tcg_gen_trunc_tl_i32(t1, arg1);
> +
> +    tcg_gen_setcond_i32((s ? TCG_COND_LT: TCG_COND_LTU), 
> +                        cpu_cr[crf * 4 + CRF_LT], t0, t1);
> +
> +    tcg_gen_setcond_i32((s ? TCG_COND_GT: TCG_COND_GTU), 
> +                        cpu_cr[crf * 4 + CRF_GT], t0, t1);
> +
> +    tcg_gen_setcond_i32(TCG_COND_EQ, 
> +                        cpu_cr[crf * 4 + CRF_EQ], t0, t1);
> +
>      tcg_temp_free(t1);
>      tcg_temp_free(t0);

... and tcg_temp_free_i32()

>  }
> @@ -790,15 +801,10 @@ static void gen_cmpli(DisasContext *ctx)
>  static void gen_isel(DisasContext *ctx)
>  {
>      uint32_t bi = rC(ctx->opcode);
> -    uint32_t mask;
> -    TCGv_i32 t0;
>      TCGv t1, true_op, zero;
>  
> -    mask = 0x08 >> (bi & 0x03);
> -    t0 = tcg_temp_new_i32();
> -    tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
>      t1 = tcg_temp_new();
> -    tcg_gen_extu_i32_tl(t1, t0);
> +    tcg_gen_extu_i32_tl(t1, cpu_cr[bi]);
>      zero = tcg_const_tl(0);
>      if (rA(ctx->opcode) == 0)
>          true_op = zero;




> @@ -2288,21 +2294,29 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
>  
>  static void gen_ftdiv(DisasContext *ctx)
>  {
> +    TCGv_i32 crf;
>      if (unlikely(!ctx->fpu_enabled)) {
>          gen_exception(ctx, POWERPC_EXCP_FPU);
>          return;
>      }
> -    gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
> +    crf = tcg_temp_new_i32();
> +    gen_helper_ftdiv(crf, cpu_fpr[rA(ctx->opcode)],
>                       cpu_fpr[rB(ctx->opcode)]);
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
> +    tcg_temp_free_i32(crf);
>  }
>  
>  static void gen_ftsqrt(DisasContext *ctx)
>  {
> +    TCGv_i32 crf;
>      if (unlikely(!ctx->fpu_enabled)) {
>          gen_exception(ctx, POWERPC_EXCP_FPU);
>          return;
>      }
> -    gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
> +    crf = tcg_temp_new_i32();
> +    gen_helper_ftsqrt(crf, cpu_fpr[rB(ctx->opcode)]);
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
> +    tcg_temp_free_i32(crf);
>  }
>  
>  
> @@ -3300,10 +3314,13 @@ static void gen_conditional_store(DisasContext *ctx, 
> TCGv EA,
>  {
>      int l1;
>  
> -    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +    tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 0);
> +    tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
>      l1 = gen_new_label();
>      tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
> -    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
> +    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 1);
>  #if defined(TARGET_PPC64)
>      if (size == 8) {
>          gen_qemu_st64(ctx, cpu_gpr[reg], EA);
> @@ -3870,17 +3887,11 @@ static inline void gen_bcond(DisasContext *ctx, int 
> type)
>      if ((bo & 0x10) == 0) {
>          /* Test CR */
>          uint32_t bi = BI(ctx->opcode);
> -        uint32_t mask = 0x08 >> (bi & 0x03);
> -        TCGv_i32 temp = tcg_temp_new_i32();
> -
>          if (bo & 0x8) {
> -            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
> -            tcg_gen_brcondi_i32(TCG_COND_EQ, temp, 0, l1);
> +            tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[bi], 0, l1);
>          } else {
> -            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
> -            tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
> +            tcg_gen_brcondi_i32(TCG_COND_NE, cpu_cr[bi], 0, l1);
>          }
> -        tcg_temp_free_i32(temp);
>      }
>      gen_update_cfar(ctx, ctx->nip);
>      if (type == BCOND_IM) {
> @@ -3929,35 +3940,11 @@ static void gen_bctar(DisasContext *ctx)
>  }
>  
>  /***                      Condition register logical                       
> ***/
> -#define GEN_CRLOGIC(name, tcg_op, opc)                                       
>  \
> -static void glue(gen_, name)(DisasContext *ctx)                              
>          \
> -{                                                                            
>  \
> -    uint8_t bitmask;                                                         
>  \
> -    int sh;                                                                  
>  \
> -    TCGv_i32 t0, t1;                                                         
>  \
> -    sh = (crbD(ctx->opcode) & 0x03) - (crbA(ctx->opcode) & 0x03);            
>  \
> -    t0 = tcg_temp_new_i32();                                                 
>  \
> -    if (sh > 0)                                                              
>  \
> -        tcg_gen_shri_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], sh);           
>  \
> -    else if (sh < 0)                                                         
>  \
> -        tcg_gen_shli_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], -sh);          
>  \
> -    else                                                                     
>  \
> -        tcg_gen_mov_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2]);                
>  \
> -    t1 = tcg_temp_new_i32();                                                 
>  \
> -    sh = (crbD(ctx->opcode) & 0x03) - (crbB(ctx->opcode) & 0x03);            
>  \
> -    if (sh > 0)                                                              
>  \
> -        tcg_gen_shri_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], sh);           
>  \
> -    else if (sh < 0)                                                         
>  \
> -        tcg_gen_shli_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], -sh);          
>  \
> -    else                                                                     
>  \
> -        tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2]);                
>  \
> -    tcg_op(t0, t0, t1);                                                      
>  \
> -    bitmask = 0x08 >> (crbD(ctx->opcode) & 0x03);                            
>  \
> -    tcg_gen_andi_i32(t0, t0, bitmask);                                       
>  \
> -    tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx->opcode) >> 2], ~bitmask);         
>  \
> -    tcg_gen_or_i32(cpu_crf[crbD(ctx->opcode) >> 2], t0, t1);                 
>  \
> -    tcg_temp_free_i32(t0);                                                   
>  \
> -    tcg_temp_free_i32(t1);                                                   
>  \
> +#define GEN_CRLOGIC(name, tcg_op, opc)                                       
>   \
> +static void glue(gen_, name)(DisasContext *ctx)                              
>   \
> +{                                                                            
>   \
> +    tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],             
>   \
> +           cpu_cr[crbB(ctx->opcode)]);                                       
>   \
>  }
>  

This is a very nice cleanup ... but it oversteers just a little.  For some CR 
logical instructions, the generated code can produce non-zero bits in the i32 
cr variable in places other than the LSB.
For example, consider crnand, which produces the following on a PPC host:

IN:
0x0000000010000578:  crnand  4*cr7+so,4*cr7+lt,4*cr7+eq

OUT: [size=112]
0x6041a630:  lwz     r14,-4(r27)
0x6041a634:  cmpwi   cr7,r14,0
0x6041a638:  bne-    cr7,0x6041a68c
0x6041a63c:  lwz     r14,640(r27)
0x6041a640:  lwz     r15,648(r27)
0x6041a644:  nand    r14,r14,r15
0x6041a648:  andi.   r14,r14,1
0x6041a64c:  stw     r14,652(r27)
0x6041a650:  .long 0x0
0x6041a654:  .long 0x0
0x6041a658:  .long 0x0
0x6041a65c:  .long 0x0

The host nand operation will always produce an i32 value that has 1s in bits 
0-30, since they are presumably zero.  A brute-force fix would be to add a 
tcg_gen_andi_i32(D,D,1) to your macro.  But I think this is required only for a 
subset of the
instructions (crnand, crnor, creqv, crorc).

>  /* crand */
> @@ -3980,7 +3967,11 @@ GEN_CRLOGIC(crxor, tcg_gen_xor_i32, 0x06);
>  /* mcrf */
>  static void gen_mcrf(DisasContext *ctx)
>  {
> -    tcg_gen_mov_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfS(ctx->opcode)]);
> +    int i;
> +    for (i = 0; i < 4; i++) {
> +        tcg_gen_mov_i32(cpu_cr[crfD(ctx->opcode) * 4 + i],
> +                        cpu_cr[crfS(ctx->opcode) * 4 + i]);
> +    }
>  }
>  
>  /***                           System linkage                              
> ***/
> @@ -4133,20 +4124,12 @@ static void gen_write_xer(TCGv src)
>  /* mcrxr */
>  static void gen_mcrxr(DisasContext *ctx)
>  {
> -    TCGv_i32 t0 = tcg_temp_new_i32();
> -    TCGv_i32 t1 = tcg_temp_new_i32();
> -    TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
> -
> -    tcg_gen_trunc_tl_i32(t0, cpu_so);
> -    tcg_gen_trunc_tl_i32(t1, cpu_ov);
> -    tcg_gen_trunc_tl_i32(dst, cpu_ca);
> -    tcg_gen_shli_i32(t0, t0, 3);
> -    tcg_gen_shli_i32(t1, t1, 2);
> -    tcg_gen_shli_i32(dst, dst, 1);
> -    tcg_gen_or_i32(dst, dst, t0);
> -    tcg_gen_or_i32(dst, dst, t1);
> -    tcg_temp_free_i32(t0);
> -    tcg_temp_free_i32(t1);
> +    int crf = crfD(ctx->opcode);
> +
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], cpu_ov);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], cpu_ca);
> +    tcg_gen_movi_i32(cpu_cr[crf * 4 + CRF_SO], 0);
>  
>      tcg_gen_movi_tl(cpu_so, 0);
>      tcg_gen_movi_tl(cpu_ov, 0);
> @@ -6320,11 +6303,13 @@ static void gen_tlbsx_40x(DisasContext *ctx)
>      gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
>      tcg_temp_free(t0);
>      if (Rc(ctx->opcode)) {
> -        int l1 = gen_new_label();
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
> -        gen_set_label(l1);
> +        t0 = tcg_temp_new();
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
> +        tcg_temp_free(t0);
>      }
>  #endif
>  }
> @@ -6401,11 +6386,13 @@ static void gen_tlbsx_440(DisasContext *ctx)
>      gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
>      tcg_temp_free(t0);
>      if (Rc(ctx->opcode)) {
> -        int l1 = gen_new_label();
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
> -        gen_set_label(l1);
> +        t0 = tcg_temp_new();
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
> +        tcg_temp_free(t0);
>      }
>  #endif
>  }
> @@ -7371,7 +7358,7 @@ GEN_VXFORM(vpmsumd, 4, 19)
>  static void gen_##op(DisasContext *ctx)             \
>  {                                                   \
>      TCGv_ptr ra, rb, rd;                            \
> -    TCGv_i32 ps;                                    \
> +    TCGv_i32 ps, crf;                               \
>                                                      \
>      if (unlikely(!ctx->altivec_enabled)) {          \
>          gen_exception(ctx, POWERPC_EXCP_VPU);       \
> @@ -7383,13 +7370,16 @@ static void gen_##op(DisasContext *ctx)             \
>      rd = gen_avr_ptr(rD(ctx->opcode));              \
>                                                      \
>      ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
> +    crf = tcg_temp_new_i32();                       \
>                                                      \
> -    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
> +    gen_helper_##op(crf, rd, ra, rb, ps);           \
> +    gen_op_mtcr(6 << 2, crf, 0);                    \
>                                                      \
>      tcg_temp_free_ptr(ra);                          \
>      tcg_temp_free_ptr(rb);                          \
>      tcg_temp_free_ptr(rd);                          \
>      tcg_temp_free_i32(ps);                          \
> +    tcg_temp_free_ptr(crf);                         \

tcg_temp_free_i32() ?

>  }
>  
>  GEN_BCD(bcdadd)
> @@ -8217,6 +8207,7 @@ static void gen_##name(DisasContext *ctx)        \
>  static void gen_##name(DisasContext *ctx)         \
>  {                                                 \
>      TCGv_ptr ra, rb;                              \
> +    TCGv_i32 tmp;                                 \
>      if (unlikely(!ctx->fpu_enabled)) {            \
>          gen_exception(ctx, POWERPC_EXCP_FPU);     \
>          return;                                   \
> @@ -8224,8 +8215,10 @@ static void gen_##name(DisasContext *ctx)         \
>      gen_update_nip(ctx, ctx->nip - 4);            \
>      ra = gen_fprp_ptr(rA(ctx->opcode));           \
>      rb = gen_fprp_ptr(rB(ctx->opcode));           \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
> -                      cpu_env, ra, rb);           \
> +    tmp = tcg_temp_new_i32();                     \
> +    gen_helper_##name(tmp, cpu_env, ra, rb);      \
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
> +    tcg_temp_free_i32(tmp);                       \
>      tcg_temp_free_ptr(ra);                        \
>      tcg_temp_free_ptr(rb);                        \
>  }
> @@ -8234,7 +8227,7 @@ static void gen_##name(DisasContext *ctx)         \
>  static void gen_##name(DisasContext *ctx)         \
>  {                                                 \
>      TCGv_ptr ra;                                  \
> -    TCGv_i32 dcm;                                 \
> +    TCGv_i32 dcm, tmp;                            \
>      if (unlikely(!ctx->fpu_enabled)) {            \
>          gen_exception(ctx, POWERPC_EXCP_FPU);     \
>          return;                                   \
> @@ -8242,8 +8235,10 @@ static void gen_##name(DisasContext *ctx)         \
>      gen_update_nip(ctx, ctx->nip - 4);            \
>      ra = gen_fprp_ptr(rA(ctx->opcode));           \
>      dcm = tcg_const_i32(DCM(ctx->opcode));        \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
> -                      cpu_env, ra, dcm);          \
> +    tmp = tcg_temp_new_i32();                     \
> +    gen_helper_##name(tmp, cpu_env, ra, dcm);     \
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
> +    tcg_temp_free_i32(tmp);                       \
>      tcg_temp_free_ptr(ra);                        \
>      tcg_temp_free_i32(dcm);                       \
>  }
> @@ -8668,37 +8663,32 @@ GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32);
>  #define GEN_SPEOP_COMP(name, tcg_cond)                                       
>  \
>  static inline void gen_##name(DisasContext *ctx)                             
>  \
>  {                                                                            
>  \
> +    TCGv tmp = tcg_temp_new();                                               
>  \
> +                                                                             
>  \
>      if (unlikely(!ctx->spe_enabled)) {                                       
>  \
>          gen_exception(ctx, POWERPC_EXCP_SPEU);                               
>  \
>          return;                                                              
>  \
>      }                                                                        
>  \
> -    int l1 = gen_new_label();                                                
>  \
> -    int l2 = gen_new_label();                                                
>  \
> -    int l3 = gen_new_label();                                                
>  \
> -    int l4 = gen_new_label();                                                
>  \
>                                                                               
>  \
>      tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);   
>  \
>      tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);   
>  \
>      tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]); 
>  \
>      tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]); 
>  \
>                                                                               
>  \
> -    tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)],                    
>  \
> -                       cpu_gpr[rB(ctx->opcode)], l1);                        
>  \
> -    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0);                         
>  \
> -    tcg_gen_br(l2);                                                          
>  \
> -    gen_set_label(l1);                                                       
>  \
> -    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)],                             
>  \
> -                     CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL);                 
>  \
> -    gen_set_label(l2);                                                       
>  \
> -    tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)],                   
>  \
> -                       cpu_gprh[rB(ctx->opcode)], l3);                       
>  \
> -    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], 
>  \
> -                     ~(CRF_CH | CRF_CH_AND_CL));                             
>  \
> -    tcg_gen_br(l4);                                                          
>  \
> -    gen_set_label(l3);                                                       
>  \
> -    tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],  
>  \
> -                    CRF_CH | CRF_CH_OR_CL);                                  
>  \
> -    gen_set_label(l4);                                                       
>  \
> +    tcg_gen_setcond_tl(tcg_cond, tmp,                                        
>  \
> +                       cpu_gpr[rA(ctx->opcode)],                             
>  \
> +                       cpu_gpr[rB(ctx->opcode)]);                            
>  \
> +    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], tmp);       
>  \
> +    tcg_gen_setcond_tl(tcg_cond, tmp,                                        
>  \
> +                       cpu_gprh[rA(ctx->opcode)],                            
>  \
> +                       cpu_gprh[rB(ctx->opcode)]);                           
>  \
> +    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], tmp);       
>  \
> +    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],             
>  \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   
>  \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  
>  \
> +    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],           
>  \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                  
>  \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                 
>  \
>  }
>  GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU);
>  GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT);
> @@ -8769,22 +8759,20 @@ static inline void gen_evsel(DisasContext *ctx)
>      int l2 = gen_new_label();
>      int l3 = gen_new_label();
>      int l4 = gen_new_label();
> -    TCGv_i32 t0 = tcg_temp_local_new_i32();
> -    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
> +
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, 
> l1);
>      tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
>      tcg_gen_br(l2);
>      gen_set_label(l1);
>      tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
>      gen_set_label(l2);
> -    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3);
> +
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], 
> 0, l3);
>      tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
>      tcg_gen_br(l4);
>      gen_set_label(l3);
>      tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
>      gen_set_label(l4);
> -    tcg_temp_free_i32(t0);
>  }
>  
>  static void gen_evsel0(DisasContext *ctx)
> @@ -9366,9 +9354,12 @@ static inline void gen_##name(DisasContext *ctx)       
>                        \
>      t0 = tcg_temp_new_i32();                                                 
>  \
>      t1 = tcg_temp_new_i32();                                                 
>  \
>                                                                               
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);             
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);             
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);             
>  \
>      tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                      
>  \
>      tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                      
>  \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);          
>  \
> +    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, t0, 
> t1); \
>                                                                               
>  \
>      tcg_temp_free_i32(t0);                                                   
>  \
>      tcg_temp_free_i32(t1);                                                   
>  \
> @@ -9385,10 +9376,32 @@ static inline void gen_##name(DisasContext *ctx)      
>                         \
>      t1 = tcg_temp_new_i64();                                                 
>  \
>      gen_load_gpr64(t0, rA(ctx->opcode));                                     
>  \
>      gen_load_gpr64(t1, rB(ctx->opcode));                                     
>  \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);          
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);             
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);             
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);             
>  \
> +    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env,       
>  \
> +                      t0, t1);                                               
>  \
>      tcg_temp_free_i64(t0);                                                   
>  \
>      tcg_temp_free_i64(t1);                                                   
>  \
>  }
> +#define GEN_SPEFPUOP_COMP_V64(name, helper)                                  
>  \
> +static inline void gen_##name(DisasContext *ctx)                             
>  \
> +{                                                                            
>  \
> +    if (unlikely(!ctx->spe_enabled)) {                                       
>  \
> +        gen_exception(ctx, POWERPC_EXCP_SPEU);                               
>  \
> +        return;                                                              
>  \
> +    }                                                                        
>  \
> +    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], cpu_env,     
>  \
> +                        cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); 
>  \
> +    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], cpu_env,     
>  \
> +                        cpu_gprh[rA(ctx->opcode)], 
> cpu_gprh[rB(ctx->opcode)]);\

This doesn't compile for 64 bit targets because the helpers declare i32 types 
for the GPR arguments.

> +    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],             
>  \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   
>  \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  
>  \
> +    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],           
>  \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                  
>  \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                 
>  \
> +}
>  
>  /* Single precision floating-point vectors operations */
>  /* Arithmetic */
> @@ -9443,12 +9456,12 @@ GEN_SPEFPUOP_CONV_64_64(evfsctuiz);
>  GEN_SPEFPUOP_CONV_64_64(evfsctsiz);
>  
>  /* Comparison */
> -GEN_SPEFPUOP_COMP_64(evfscmpgt);
> -GEN_SPEFPUOP_COMP_64(evfscmplt);
> -GEN_SPEFPUOP_COMP_64(evfscmpeq);
> -GEN_SPEFPUOP_COMP_64(evfststgt);
> -GEN_SPEFPUOP_COMP_64(evfststlt);
> -GEN_SPEFPUOP_COMP_64(evfststeq);
> +GEN_SPEFPUOP_COMP_V64(evfscmpgt, efscmpgt);
> +GEN_SPEFPUOP_COMP_V64(evfscmplt, efscmplt);
> +GEN_SPEFPUOP_COMP_V64(evfscmpeq, efscmpeq);
> +GEN_SPEFPUOP_COMP_V64(evfststgt, efststgt);
> +GEN_SPEFPUOP_COMP_V64(evfststlt, efststlt);
> +GEN_SPEFPUOP_COMP_V64(evfststeq, efststeq);
>  
>  /* Opcodes definitions */
>  GEN_SPE(evfsadd,   evfssub,   0x00, 0x0A, 0x00000000, 0x00000000, 
> PPC_SPE_SINGLE); //
>

Re: [Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers

Reply via email to