With normal FP, this doesn't have much affect on the generated code, because most of the FP operations are not CONST/PURE, and so we spill registers in about the same frequency as the explicit load/stores.
But with Loongson multimedia instructions, which are all integral and whose helpers are in fact CONST+PURE, this greatly improves the code. On a 64-bit host, rather than over-use the deposit operation, we create TCG registers for both the 64-bit FPU register as a whole and the two 32-bit halves. We only ever reference the whole register or the two half registers in any one TB, so there's no problem with aliasing. On a 32-bit host, we only create the 64-bit FPU registers, and then directly reference the internal 32-bit TCG register halves as needed. Signed-off-by: Richard Henderson <r...@twiddle.net> --- target-mips/translate.c | 123 ++++++++++++++++++++++++++++++++++++----------- 1 files changed, 95 insertions(+), 28 deletions(-) diff --git a/target-mips/translate.c b/target-mips/translate.c index d2aeff0..b6a7aeb 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -486,6 +486,14 @@ static TCGv cpu_dspctrl, btarget, bcond; static TCGv_i32 hflags; static TCGv_i32 fpu_fcr0, fpu_fcr31; +/* FPU registers. These alias, but we'll only use one or the other in any + one TB based on MIPS_HFLAG_F64. */ +#if TCG_TARGET_REG_BITS == 64 +static TCGv_i32 fpu_f32[32]; +static TCGv_i32 fpu_fh32[32]; +#endif +static TCGv_i64 fpu_f64[32]; + static uint32_t gen_opc_hflags[OPC_BUF_SIZE]; #include "gen-icount.h" @@ -555,6 +563,14 @@ static const char *fregnames[] = "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", }; +#if TCG_TARGET_REG_BITS == 64 +static const char *fhregnames[] = + { "fh0", "fh1", "fh2", "fh3", "fh4", "fh5", "fh6", "fh7", + "fh8", "fh9", "fh10", "fh11", "fh12", "fh13", "fh14", "fh15", + "fh16", "fh17", "fh18", "fh19", "fh20", "fh21", "fh22", "fh23", + "fh24", "fh25", "fh26", "fh27", "fh28", "fh29", "fh30", "fh31", }; +#endif + #ifdef MIPS_DEBUG_DISAS #define MIPS_DEBUG(fmt, ...) \ qemu_log_mask(CPU_LOG_TB_IN_ASM, \ @@ -647,55 +663,91 @@ static inline void gen_store_srsgpr (int from, int to) } /* Floating point register moves. */ -static inline void gen_load_fpr32 (DisasContext *ctx, TCGv_i32 t, int reg) +static void gen_load_fpr32 (DisasContext *ctx, TCGv_i32 t, int reg) { - tcg_gen_ld_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX])); +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(t, TCGV_LOW(fpu_f64[reg])); +#else + if (ctx->hflags & MIPS_HFLAG_F64) { + tcg_gen_trunc_i64_i32(t, fpu_f64[reg]); + } else { + tcg_gen_mov_i32(t, fpu_f32[reg]); + } +#endif } -static inline void gen_store_fpr32 (DisasContext *ctx, TCGv_i32 t, int reg) +static void gen_store_fpr32 (DisasContext *ctx, TCGv_i32 t, int reg) { - tcg_gen_st_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX])); +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg]), t); +#else + if (ctx->hflags & MIPS_HFLAG_F64) { + TCGv_i64 t64 = MAKE_TCGV_I64(GET_TCGV_I32(t)); + tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32); + } else { + tcg_gen_mov_i32(fpu_f32[reg], t); + } +#endif } -static inline void gen_load_fpr32h (DisasContext *ctx, TCGv_i32 t, int reg) +static void gen_load_fpr32h (DisasContext *ctx, TCGv_i32 t, int reg) { - tcg_gen_ld_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX])); +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(t, TCGV_HIGH(fpu_f64[reg])); +#else + if (ctx->hflags & MIPS_HFLAG_F64) { + TCGv_i64 t64 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t64, fpu_f64[reg], 32); + tcg_gen_trunc_i64_i32(t, t64); + tcg_temp_free_i64(t64); + } else { + tcg_gen_mov_i32(t, fpu_fh32[reg]); + } +#endif } -static inline void gen_store_fpr32h (DisasContext *ctx, TCGv_i32 t, int reg) +static void gen_store_fpr32h (DisasContext *ctx, TCGv_i32 t, int reg) { - tcg_gen_st_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX])); +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(TCGV_HIGH(fpu_f64[reg]), t); +#else + if (ctx->hflags & MIPS_HFLAG_F64) { + TCGv_i64 t64 = MAKE_TCGV_I64(GET_TCGV_I32(t)); + tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32); + } else { + tcg_gen_mov_i32(fpu_fh32[reg], t); + } +#endif } -static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg) +static void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg) { if (ctx->hflags & MIPS_HFLAG_F64) { - tcg_gen_ld_i64(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].d)); + tcg_gen_mov_i64(t, fpu_f64[reg]); } else { - TCGv_i32 t0 = tcg_temp_new_i32(); - TCGv_i32 t1 = tcg_temp_new_i32(); - gen_load_fpr32(ctx, t0, reg & ~1); - gen_load_fpr32(ctx, t1, reg | 1); - tcg_gen_concat_i32_i64(t, t0, t1); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_concat32_i64(t, fpu_f64[reg & ~1], fpu_f64[reg | 1]); +#else + tcg_gen_concat_i32_i64(t, fpu_f32[reg & ~1], fpu_f32[reg | 1]); +#endif } } -static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg) +static void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg) { if (ctx->hflags & MIPS_HFLAG_F64) { - tcg_gen_st_i64(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].d)); + tcg_gen_mov_i64(fpu_f64[reg], t); } else { - TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_i32 t1 = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(t1, t); - gen_store_fpr32(ctx, t1, reg & ~1); - tcg_gen_shri_i64(t0, t, 32); - tcg_gen_trunc_i64_i32(t1, t0); - gen_store_fpr32(ctx, t1, reg | 1); - tcg_temp_free_i32(t1); - tcg_temp_free_i64(t0); +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg & ~1]), TCGV_LOW(t)); + tcg_gen_mov_i32(TCGV_LOW(fpu_f64[reg | 1]), TCGV_HIGH(t)); +#else + TCGv_i64 t64 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t64, t, 32); + tcg_gen_trunc_i64_i32(fpu_f32[reg | 1], t64); + tcg_temp_free_i64(t64); + tcg_gen_trunc_i64_i32(fpu_f32[reg & ~1], t); +#endif } } @@ -12681,6 +12733,21 @@ static void mips_tcg_init(void) offsetof(CPUState, active_fpu.fcr31), "fcr31"); +#if TCG_TARGET_REG_BITS == 64 + for (i = 0; i < 32; i++) { + int off = offsetof(CPUState, active_fpu.fpr[i].w[FP_ENDIAN_IDX]); + fpu_f32[i] = tcg_global_mem_new_i32(TCG_AREG0, off, fregnames[i]); + } + for (i = 0; i < 32; i++) { + int off = offsetof(CPUState, active_fpu.fpr[i].w[!FP_ENDIAN_IDX]); + fpu_fh32[i] = tcg_global_mem_new_i32(TCG_AREG0, off, fhregnames[i]); + } +#endif + for (i = 0; i < 32; i++) { + int off = offsetof(CPUState, active_fpu.fpr[i].w[FP_ENDIAN_IDX]); + fpu_f64[i] = tcg_global_mem_new_i64(TCG_AREG0, off, fregnames[i]); + } + /* register helpers */ #define GEN_HELPER 2 #include "helper.h" -- 1.7.7.4