Tested i386-softmmu only. Now tci can run windows xp sp2 and its speed is about 6 times slower than jit. -- SUN OF A BEACH
Subject: [PATCH 1/5] tci: fix op_sar_iXX and op_ext16s_iXX
--- tcg/tci.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tcg/tci.c b/tcg/tci.c index e467b3a..81c415c 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -206,7 +206,7 @@ static uint16_t tci_read_r16(uint8_t **tb_ptr) } /* Read indexed register (16 bit signed) from bytecode. */ -static uint16_t tci_read_r16s(uint8_t **tb_ptr) +static int16_t tci_read_r16s(uint8_t **tb_ptr) { uint16_t value = tci_read_reg16s(**tb_ptr); *tb_ptr += 1; @@ -549,7 +549,7 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr) t0 = *tb_ptr++; t1 = tci_read_ri32(&tb_ptr); t2 = tci_read_ri32(&tb_ptr); - tci_write_reg32(t0, (t1 >> t2) | (t1 & (1UL << 31))); + tci_write_reg32(t0, ((int32_t)t1 >> t2)); break; #ifdef TCG_TARGET_HAS_rot_i32 case INDEX_op_rotl_i32: @@ -794,7 +794,7 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr) t0 = *tb_ptr++; t1 = tci_read_ri64(&tb_ptr); t2 = tci_read_ri64(&tb_ptr); - tci_write_reg64(t0, (t1 >> t2) | (t1 & (1ULL << 63))); + tci_write_reg64(t0, ((int64_t)t1 >> t2)); break; #ifdef TCG_TARGET_HAS_rot_i64 case INDEX_op_rotl_i64: -- 1.6.3.msysgit.0
Subject: [PATCH 2/5] tci: add bswapXX_i32,div_i32 and rot_i32 --- tcg/bytecode/tcg-target.c | 24 +++++++++++++++++++++++- tcg/tci.c | 40 +++++++++++++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/tcg/bytecode/tcg-target.c b/tcg/bytecode/tcg-target.c index 2bd12b8..aae570f 100644 --- a/tcg/bytecode/tcg-target.c +++ b/tcg/bytecode/tcg-target.c @@ -722,6 +722,10 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: +#ifdef TCG_TARGET_HAS_rot_i32 + case INDEX_op_rotl_i32: + case INDEX_op_rotr_i32: +#endif tcg_out_op_t(s, opc); tcg_out_r(s, args[0]); tcg_out_ri32(s, const_args[1], args[1]); @@ -816,7 +820,10 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - TODO(); + tcg_out_op_t(s, opc); + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out_ri32(s, const_args[2], args[2]); break; #else case INDEX_op_div2_i32: @@ -1002,6 +1009,21 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, break; #endif #endif /* TCG_TARGET_REG_BITS == 64 */ +#if defined(TCG_TARGET_HAS_bswap32_i32) + case INDEX_op_bswap32_i32: + tcg_out_op_t(s, opc); + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + break; +#endif +#if defined(TCG_TARGET_HAS_bswap16_i32) + case INDEX_op_bswap16_i32: + tcg_dump_ops(s, stderr); + tcg_out_op_t(s, opc); + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + break; +#endif case INDEX_op_end: TODO(); break; diff --git a/tcg/tci.c b/tcg/tci.c index 81c415c..8bb78e3 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -503,11 +503,29 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr) break; #ifdef TCG_TARGET_HAS_div_i32 case INDEX_op_div_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(&tb_ptr); + t2 = tci_read_ri32(&tb_ptr); + tci_write_reg32(t0, (int32_t)t1 / (int32_t)t2); + break; case INDEX_op_divu_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(&tb_ptr); + t2 = tci_read_ri32(&tb_ptr); + tci_write_reg32(t0, t1 / t2); + break; case INDEX_op_rem_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(&tb_ptr); + t2 = tci_read_ri32(&tb_ptr); + tci_write_reg32(t0, (int32_t)t1 % (int32_t)t2); + break; case INDEX_op_remu_i32: - TODO(); - break; + t0 = *tb_ptr++; + t1 = tci_read_ri32(&tb_ptr); + t2 = tci_read_ri32(&tb_ptr); + tci_write_reg32(t0, t1 % t2); + break; #else case INDEX_op_div2_i32: case INDEX_op_divu2_i32: @@ -553,8 +571,16 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr) break; #ifdef TCG_TARGET_HAS_rot_i32 case INDEX_op_rotl_i32: + t0 = *tb_ptr++; + t1 = tci_read_ri32(&tb_ptr); + t2 = tci_read_ri32(&tb_ptr); + tci_write_reg32(t0, (t1<<t2)|(t1>>(32-t2))); + break; case INDEX_op_rotr_i32: - TODO(); + t0 = *tb_ptr++; + t1 = tci_read_ri32(&tb_ptr); + t2 = tci_read_ri32(&tb_ptr); + tci_write_reg32(t0, (t1>>t2)|(t1<<(32-t2))); break; #endif case INDEX_op_brcond_i32: @@ -640,12 +666,16 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr) #endif #ifdef TCG_TARGET_HAS_bswap16_i32 case INDEX_op_bswap16_i32: - TODO(); + t0 = *tb_ptr++; + t1 = tci_read_r16(&tb_ptr); + tci_write_reg32(t0, bswap16(t1)); break; #endif #ifdef TCG_TARGET_HAS_bswap32_i32 case INDEX_op_bswap32_i32: - TODO(); + t0 = *tb_ptr++; + t1 = tci_read_r32(&tb_ptr); + tci_write_reg32(t0, bswap32(t1)); break; #endif #ifdef TCG_TARGET_HAS_not_i32 -- 1.6.3.msysgit.0
Subject: [PATCH 3/5] tci: support GETPC() for SOFTMMU --- dyngen-exec.h | 5 ++++- tcg/tci.c | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletions(-) diff --git a/dyngen-exec.h b/dyngen-exec.h index d5620ca..ba213c4 100644 --- a/dyngen-exec.h +++ b/dyngen-exec.h @@ -119,7 +119,10 @@ extern int printf(const char *, ...); /* The return address may point to the start of the next instruction. Subtracting one gets us the call instruction itself. */ -#if defined(__s390__) +#if defined(CONFIG_TCG_INTERPRETER) +extern uint8_t * tci_tb_ptr; +# define GETPC() ((void *)tci_tb_ptr) +#elif defined(__s390__) # define GETPC() ((void*)(((unsigned long)__builtin_return_address(0) & 0x7fffffffUL) - 1)) #elif defined(__arm__) /* Thumb return addresses have the low bit set, so we need to subtract two. diff --git a/tcg/tci.c b/tcg/tci.c index 8bb78e3..0ba605b 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -50,6 +50,10 @@ struct CPUX86State *env; #error Target support missing, please fix! #endif +#ifdef CONFIG_SOFTMMU +uint8_t * tci_tb_ptr; +#endif + static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS]; static tcg_target_ulong tci_read_reg(uint32_t index) @@ -380,6 +384,9 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr) tci_reg[TCG_AREG0] = (tcg_target_ulong)env; for (;;) { +#ifdef CONFIG_SOFTMMU + tci_tb_ptr=tb_ptr; +#endif uint8_t opc = *(uint8_t *)tb_ptr++; tcg_target_ulong t0; tcg_target_ulong t1; -- 1.6.3.msysgit.0
Subject: [PATCH 4/5] tci: new op_call implementation for tci --- tcg/bytecode/tcg-target.c | 191 ++++++++++++++++++++++++++++++++++++++++++++- tcg/tcg-opc.h | 25 ++++++ tcg/tcg.c | 139 ++++++++++++++++++++++++++++++++ tcg/tci.c | 156 ++++++++++++++++++++++++++++++++++-- 4 files changed, 500 insertions(+), 11 deletions(-) diff --git a/tcg/bytecode/tcg-target.c b/tcg/bytecode/tcg-target.c index aae570f..744b9e6 100644 --- a/tcg/bytecode/tcg-target.c +++ b/tcg/bytecode/tcg-target.c @@ -248,6 +248,28 @@ static const TCGTargetOpDef tcg_target_op_defs[] = { { INDEX_op_bswap32_i32, { "r", "r" } }, #endif + { INDEX_op_call0_r0, { "i"} }, + { INDEX_op_call1_r0, { "i","ri"} }, + { INDEX_op_call2_r0, { "i","ri","ri"} }, + { INDEX_op_call3_r0, { "i","ri","ri","ri"} }, + { INDEX_op_call4_r0, { "i","ri","ri","ri","ri"} }, + + { INDEX_op_call0_r1, { "i","r"} }, + { INDEX_op_call1_r1, { "i","ri","r"} }, + { INDEX_op_call2_r1, { "i","ri","ri","r"} }, + { INDEX_op_call3_r1, { "i","ri","ri","ri","r"} }, + { INDEX_op_call4_r1, { "i","ri","ri","ri","ri","r"} }, + +#if TCG_TARGET_REG_BITS == 32 + + { INDEX_op_call0_r2, { "i","r","r"} }, + { INDEX_op_call1_r2, { "i","ri","r","r"} }, + { INDEX_op_call2_r2, { "i","ri","ri","r","r"} }, + { INDEX_op_call3_r2, { "i","ri","ri","ri","r","r"} }, + { INDEX_op_call4_r2, { "i","ri","ri","ri","ri","r","r"} }, + +#endif + { -1 }, }; @@ -655,6 +677,172 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } +static void tcg_out_op_call(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + int nb_iargs=args[0]&0x0F; + int nb_oargs=args[0]>>4; + + assert(const_args[1]!=0); + + switch(nb_iargs) + { + case 0: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call0_r0); + tcg_out_i(s, args[1]);/*func*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call0_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_r(s, args[2]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call0_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_r(s, args[2]);/*r1*/ + tcg_out_r(s, args[3]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + case 1: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call1_r0); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg1*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call1_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg1*/ + tcg_out_r(s, args[3]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call1_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg1*/ + tcg_out_r(s, args[3]);/*r1*/ + tcg_out_r(s, args[4]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + case 2: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call2_r0); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg1*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg2*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call2_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg1*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg2*/ + tcg_out_r(s, args[4]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call2_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg1*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg2*/ + tcg_out_r(s, args[4]);/*r1*/ + tcg_out_r(s, args[5]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + case 3: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call3_r0); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg1*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg2*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg3*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call3_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg1*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg2*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg3*/ + tcg_out_r(s, args[5]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call3_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg1*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg2*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg3*/ + tcg_out_r(s, args[5]);/*r1*/ + tcg_out_r(s, args[6]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + case 4: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call4_r0); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[5], args[5]);/*arg1*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg2*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg3*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg4*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call4_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[5], args[5]);/*arg1*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg2*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg3*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg4*/ + tcg_out_r(s, args[6]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call4_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[5], args[5]);/*arg1*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg2*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg3*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg4*/ + tcg_out_r(s, args[6]);/*r1*/ + tcg_out_r(s, args[7]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + default: + TODO(); + } +} + + static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, const int *const_args) { @@ -683,8 +871,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, tci_out_label(s, args[0]); break; case INDEX_op_call: - tcg_out_op_t(s, opc); - tcg_out_ri(s, const_args[0], args[0]); + tcg_out_op_call(s,args,const_args); break; case INDEX_op_jmp: TODO(); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index b7f3fd7..070ba39 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -269,4 +269,29 @@ DEF2(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif /* TCG_TARGET_REG_BITS != 32 */ +#ifdef CONFIG_TCG_INTERPRETER + +DEF2(call0_r0, 0, 0, 0, 0) +DEF2(call1_r0, 0, 1, 0, 0) +DEF2(call2_r0, 0, 2, 0, 0) +DEF2(call3_r0, 0, 3, 0, 0) +DEF2(call4_r0, 0, 4, 0, 0) +DEF2(call0_r1, 1, 0, 0, 0) +DEF2(call1_r1, 1, 1, 0, 0) +DEF2(call2_r1, 1, 2, 0, 0) +DEF2(call3_r1, 1, 3, 0, 0) +DEF2(call4_r1, 1, 4, 0, 0) + +#if TCG_TARGET_REG_BITS == 32 + +DEF2(call0_r2, 2, 0, 0, 0) +DEF2(call1_r2, 2, 1, 0, 0) +DEF2(call2_r2, 2, 2, 0, 0) +DEF2(call3_r2, 2, 3, 0, 0) +DEF2(call4_r2, 2, 4, 0, 0) + +#endif + +#endif + #undef DEF2 diff --git a/tcg/tcg.c b/tcg/tcg.c index 2a82f37..20aac38 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1690,6 +1690,142 @@ static void tcg_reg_alloc_op(TCGContext *s, #define STACK_DIR(x) (x) #endif +#ifdef CONFIG_TCG_INTERPRETER + +static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, + int opc, const TCGArg *args, + unsigned int dead_iargs) +{ + int nb_iargs, nb_oargs, flags, i, reg, nb_params; + TCGArg arg,func_arg; + TCGTemp *ts; + tcg_target_long func_addr; + TCGRegSet allocated_regs; + const TCGArgConstraint *arg_ct; + TCGArg new_args[TCG_MAX_OP_ARGS]; + int const_args[TCG_MAX_OP_ARGS]; + + arg = *args++; + + nb_oargs = arg >> 16; + nb_iargs = arg & 0xffff; + nb_params = nb_iargs - 1; + + flags = args[nb_oargs + nb_iargs]; + + const_args[0]=1; + new_args[0]=(nb_oargs<<4)|nb_params; + + /* satisfy input constraints */ + tcg_regset_set(allocated_regs, s->reserved_regs); + + for(i = nb_params; i >= 0; i--) { + arg = args[nb_oargs + i]; + ts = &s->temps[arg]; + if (ts->val_type == TEMP_VAL_MEM) { + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], allocated_regs); + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 1; + s->reg_to_temp[reg] = arg; + } else if (ts->val_type == TEMP_VAL_CONST) { + /* constant is OK for instruction */ + const_args[nb_params+1-i] = 1; + new_args[nb_params+1-i] = ts->val; + goto iarg_end; + } + assert(ts->val_type == TEMP_VAL_REG); + reg = ts->reg; + if (tcg_regset_test_reg(tcg_target_available_regs[ts->type], reg)) { + /* nothing to do : the constraint is satisfied */ + } else { + allocate_in_reg: + /* allocate a new register matching the constraint + and move the temporary register into it */ + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], allocated_regs); + tcg_out_mov(s, reg, ts->reg); + } + new_args[nb_params+1-i] = reg; + const_args[nb_params+1-i] = 0; + tcg_regset_set_reg(allocated_regs, reg); + iarg_end: ; + } + + /* mark dead temporaries and free the associated registers */ + for(i = 0; i < nb_iargs; i++) { + arg = args[nb_oargs + i]; + if (IS_DEAD_IARG(i)) { + ts = &s->temps[arg]; + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } + } + } + + /* clobber call registers */ + for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { + if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { + tcg_reg_free(s, reg); + } + } + + /* store globals and free associated registers (we assume the insn + can modify any global. */ + if (!(flags & TCG_CALL_CONST)) { + save_globals(s, allocated_regs); + } + + /* satisfy the output constraints */ + tcg_regset_set(allocated_regs, s->reserved_regs); + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + ts = &s->temps[arg]; + + /* if fixed register, we try to use it */ + reg = ts->reg; + if (ts->fixed_reg && + tcg_regset_test_reg(tcg_target_available_regs[ts->type], reg)) { + goto oarg_end; + } + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], allocated_regs); + + tcg_regset_set_reg(allocated_regs, reg); + /* if a fixed register is used, then a move will be done afterwards */ + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + /* temp value is modified, so the value kept in memory is + potentially not the same */ + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; + } +oarg_end: + new_args[i+nb_params+2] = reg; + } + + + /* emit instruction */ + tcg_out_op(s, opc, new_args, const_args); + + /* move the outputs in the correct register if needed */ + for(i = 0; i < nb_oargs; i++) { + ts = &s->temps[args[i]]; + reg = new_args[i+nb_params+2]; + if (ts->fixed_reg && ts->reg != reg) { + tcg_out_mov(s, ts->reg, reg); + } + } + + return nb_iargs + nb_oargs + def->nb_cargs + 1; +} + +#else + static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, int opc, const TCGArg *args, unsigned int dead_iargs) @@ -1868,6 +2004,9 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, return nb_iargs + nb_oargs + def->nb_cargs + 1; } + +#endif + #ifdef CONFIG_PROFILER static int64_t tcg_table_op_count[NB_OPS]; diff --git a/tcg/tci.c b/tcg/tci.c index 0ba605b..3e4165b 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -41,8 +41,23 @@ #define TRACE() ((void)0) #endif -typedef tcg_target_ulong (*helper_function)(tcg_target_ulong, tcg_target_ulong, +typedef tcg_target_ulong (*helper_function0)(void); +typedef tcg_target_ulong (*helper_function1)(tcg_target_ulong); +typedef tcg_target_ulong (*helper_function2)(tcg_target_ulong, tcg_target_ulong); +typedef tcg_target_ulong (*helper_function3)(tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong); +typedef tcg_target_ulong (*helper_function4)(tcg_target_ulong, tcg_target_ulong, tcg_target_ulong, tcg_target_ulong); +#if TCG_TARGET_REG_BITS == 32 + +typedef uint64_t (*helper_function0_r64)(void); +typedef uint64_t (*helper_function1_r64)(tcg_target_ulong); +typedef uint64_t (*helper_function2_r64)(tcg_target_ulong, tcg_target_ulong); +typedef uint64_t (*helper_function3_r64)(tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong); +typedef uint64_t (*helper_function4_r64)(tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong, tcg_target_ulong); +#endif #if defined(TARGET_I386) struct CPUX86State *env; @@ -427,15 +442,138 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr) case INDEX_op_set_label: TODO(); break; - case INDEX_op_call: - t0 = tci_read_ri(&tb_ptr); - t0 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0), - tci_read_reg(TCG_REG_R1), - tci_read_reg(TCG_REG_R2), - tci_read_reg(TCG_REG_R3)); - // TODO: fix for 32 bit host / 64 bit target. - tci_write_reg(TCG_REG_R0, t0); + case INDEX_op_call0_r0: + t0 = tci_read_i(&tb_ptr); + ((helper_function0)t0)(); + break; + case INDEX_op_call1_r0: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + ((helper_function1)t0)(t1); + break; + case INDEX_op_call2_r0: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + ((helper_function2)t0)(t1,t2); + break; + case INDEX_op_call3_r0: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + ((helper_function3)t0)(t1,t2,t3); + break; + case INDEX_op_call4_r0: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + t4 = tci_read_ri(&tb_ptr); + ((helper_function4)t0)(t1,t2,t3,t4); + break; + case INDEX_op_call0_r1: + t0 = tci_read_i(&tb_ptr); + t0 = ((helper_function0)t0)(); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + + break; + case INDEX_op_call1_r1: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t0 = ((helper_function1)t0)(t1); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + + break; + case INDEX_op_call2_r1: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t0 = ((helper_function2)t0)(t1,t2); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + + break; + case INDEX_op_call3_r1: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + t0 = ((helper_function3)t0)(t1,t2,t3); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + + break; + case INDEX_op_call4_r1: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + t4 = tci_read_ri(&tb_ptr); + t0 = ((helper_function4)t0)(t1,t2,t3,t4); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_call0_r2: + t0 = tci_read_i(&tb_ptr); + u64 = ((helper_function0_r64)t0)(); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; + case INDEX_op_call1_r2: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + u64 = ((helper_function1_r64)t0)(t1); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; + case INDEX_op_call2_r2: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + u64 = ((helper_function2_r64)t0)(t1,t2); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; + case INDEX_op_call3_r2: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + u64 = ((helper_function3_r64)t0)(t1,t2,t3); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; + case INDEX_op_call4_r2: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + t4 = tci_read_ri(&tb_ptr); + u64 = ((helper_function4_r64)t0)(t1,t2,t3,t4); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; +#endif case INDEX_op_jmp: case INDEX_op_br: t0 = *(uint64_t *)tb_ptr; -- 1.6.3.msysgit.0
Subject: [PATCH 5/5] tci: speed optimization --- tcg/tci.c | 66 ++++++++++++++++++++++++++++++------------------------------ 1 files changed, 33 insertions(+), 33 deletions(-) diff --git a/tcg/tci.c b/tcg/tci.c index 3e4165b..8628e69 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -71,88 +71,88 @@ uint8_t * tci_tb_ptr; static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS]; -static tcg_target_ulong tci_read_reg(uint32_t index) +static inline tcg_target_ulong tci_read_reg(uint32_t index) { assert(index < ARRAY_SIZE(tci_reg)); return tci_reg[index]; } -static uint8_t tci_read_reg8(uint32_t index) +static inline uint8_t tci_read_reg8(uint32_t index) { return (uint8_t)tci_read_reg(index); } -static int8_t tci_read_reg8s(uint32_t index) +static inline int8_t tci_read_reg8s(uint32_t index) { return (int8_t)tci_read_reg(index); } -static uint16_t tci_read_reg16(uint32_t index) +static inline uint16_t tci_read_reg16(uint32_t index) { return (uint16_t)tci_read_reg(index); } -static int16_t tci_read_reg16s(uint32_t index) +static inline int16_t tci_read_reg16s(uint32_t index) { return (int16_t)tci_read_reg(index); } -static uint32_t tci_read_reg32(uint32_t index) +static inline uint32_t tci_read_reg32(uint32_t index) { return (uint32_t)tci_read_reg(index); } #if TCG_TARGET_REG_BITS == 64 -static int32_t tci_read_reg32s(uint32_t index) +static inline int32_t tci_read_reg32s(uint32_t index) { return (int32_t)tci_read_reg(index); } -static uint64_t tci_read_reg64(uint32_t index) +static inline uint64_t tci_read_reg64(uint32_t index) { return tci_read_reg(index); } #endif -static void tci_write_reg(uint32_t index, tcg_target_ulong value) +static inline void tci_write_reg(uint32_t index, tcg_target_ulong value) { assert(index < ARRAY_SIZE(tci_reg)); assert(index != TCG_AREG0); tci_reg[index] = value; } -static void tci_write_reg8(uint32_t index, uint8_t value) +static inline void tci_write_reg8(uint32_t index, uint8_t value) { tci_write_reg(index, value); } -static void tci_write_reg8s(uint32_t index, int8_t value) +static inline void tci_write_reg8s(uint32_t index, int8_t value) { tci_write_reg(index, value); } -static void tci_write_reg16s(uint32_t index, int16_t value) +static inline void tci_write_reg16s(uint32_t index, int16_t value) { tci_write_reg(index, value); } -static void tci_write_reg16(uint32_t index, uint16_t value) +static inline void tci_write_reg16(uint32_t index, uint16_t value) { tci_write_reg(index, value); } -static void tci_write_reg32(uint32_t index, uint32_t value) +static inline void tci_write_reg32(uint32_t index, uint32_t value) { tci_write_reg(index, value); } -static void tci_write_reg32s(uint32_t index, int32_t value) +static inline void tci_write_reg32s(uint32_t index, int32_t value) { tci_write_reg(index, value); } #if TCG_TARGET_REG_BITS == 64 -static void tci_write_reg64(uint32_t index, uint64_t value) +static inline void tci_write_reg64(uint32_t index, uint64_t value) { tci_write_reg(index, value); } @@ -160,14 +160,14 @@ static void tci_write_reg64(uint32_t index, uint64_t value) #if TCG_TARGET_REG_BITS == 32 /* Create a 64 bit value from two 32 bit values. */ -static uint64_t tci_uint64(uint32_t high, uint32_t low) +static inline uint64_t tci_uint64(uint32_t high, uint32_t low) { return ((uint64_t)high << 32) + low; } #endif /* Read constant (native size) from bytecode. */ -static tcg_target_ulong tci_read_i(uint8_t **tb_ptr) +static inline tcg_target_ulong tci_read_i(uint8_t **tb_ptr) { tcg_target_ulong value = *(tcg_target_ulong *)(*tb_ptr); *tb_ptr += sizeof(tcg_target_ulong); @@ -175,7 +175,7 @@ static tcg_target_ulong tci_read_i(uint8_t **tb_ptr) } /* Read constant (32 bit) from bytecode. */ -static uint32_t tci_read_i32(uint8_t **tb_ptr) +static inline uint32_t tci_read_i32(uint8_t **tb_ptr) { uint32_t value = *(uint32_t *)(*tb_ptr); *tb_ptr += 4; @@ -184,7 +184,7 @@ static uint32_t tci_read_i32(uint8_t **tb_ptr) #if TCG_TARGET_REG_BITS == 64 /* Read constant (64 bit) from bytecode. */ -static uint64_t tci_read_i64(uint8_t **tb_ptr) +static inline uint64_t tci_read_i64(uint8_t **tb_ptr) { uint64_t value = *(uint64_t *)(*tb_ptr); *tb_ptr += 8; @@ -193,7 +193,7 @@ static uint64_t tci_read_i64(uint8_t **tb_ptr) #endif /* Read indexed register (native size) from bytecode. */ -static tcg_target_ulong tci_read_r(uint8_t **tb_ptr) +static inline tcg_target_ulong tci_read_r(uint8_t **tb_ptr) { tcg_target_ulong value = tci_read_reg(**tb_ptr); *tb_ptr += 1; @@ -201,7 +201,7 @@ static tcg_target_ulong tci_read_r(uint8_t **tb_ptr) } /* Read indexed register (8 bit) from bytecode. */ -static uint8_t tci_read_r8(uint8_t **tb_ptr) +static inline uint8_t tci_read_r8(uint8_t **tb_ptr) { uint8_t value = tci_read_reg8(**tb_ptr); *tb_ptr += 1; @@ -209,7 +209,7 @@ static uint8_t tci_read_r8(uint8_t **tb_ptr) } /* Read indexed register (8 bit signed) from bytecode. */ -static int8_t tci_read_r8s(uint8_t **tb_ptr) +static inline int8_t tci_read_r8s(uint8_t **tb_ptr) { int8_t value = tci_read_reg8s(**tb_ptr); *tb_ptr += 1; @@ -217,7 +217,7 @@ static int8_t tci_read_r8s(uint8_t **tb_ptr) } /* Read indexed register (16 bit) from bytecode. */ -static uint16_t tci_read_r16(uint8_t **tb_ptr) +static inline uint16_t tci_read_r16(uint8_t **tb_ptr) { uint16_t value = tci_read_reg16(**tb_ptr); *tb_ptr += 1; @@ -225,7 +225,7 @@ static uint16_t tci_read_r16(uint8_t **tb_ptr) } /* Read indexed register (16 bit signed) from bytecode. */ -static int16_t tci_read_r16s(uint8_t **tb_ptr) +static inline int16_t tci_read_r16s(uint8_t **tb_ptr) { uint16_t value = tci_read_reg16s(**tb_ptr); *tb_ptr += 1; @@ -233,7 +233,7 @@ static int16_t tci_read_r16s(uint8_t **tb_ptr) } /* Read indexed register (32 bit) from bytecode. */ -static uint32_t tci_read_r32(uint8_t **tb_ptr) +static inline uint32_t tci_read_r32(uint8_t **tb_ptr) { uint32_t value = tci_read_reg32(**tb_ptr); *tb_ptr += 1; @@ -242,7 +242,7 @@ static uint32_t tci_read_r32(uint8_t **tb_ptr) #if TCG_TARGET_REG_BITS == 64 /* Read indexed register (32 bit signed) from bytecode. */ -static int32_t tci_read_r32s(uint8_t **tb_ptr) +static inline int32_t tci_read_r32s(uint8_t **tb_ptr) { int32_t value = tci_read_reg32s(**tb_ptr); *tb_ptr += 1; @@ -250,7 +250,7 @@ static int32_t tci_read_r32s(uint8_t **tb_ptr) } /* Read indexed register (64 bit) from bytecode. */ -static uint64_t tci_read_r64(uint8_t **tb_ptr) +static inline uint64_t tci_read_r64(uint8_t **tb_ptr) { uint64_t value = tci_read_reg64(**tb_ptr); *tb_ptr += 1; @@ -259,7 +259,7 @@ static uint64_t tci_read_r64(uint8_t **tb_ptr) #endif /* Read indexed register or constant (native size) from bytecode. */ -static tcg_target_ulong tci_read_ri(uint8_t **tb_ptr) +static inline tcg_target_ulong tci_read_ri(uint8_t **tb_ptr) { bool const_arg; tcg_target_ulong value; @@ -274,7 +274,7 @@ static tcg_target_ulong tci_read_ri(uint8_t **tb_ptr) } /* Read indexed register or constant (32 bit) from bytecode. */ -static uint32_t tci_read_ri32(uint8_t **tb_ptr) +static inline uint32_t tci_read_ri32(uint8_t **tb_ptr) { bool const_arg; uint32_t value; @@ -290,7 +290,7 @@ static uint32_t tci_read_ri32(uint8_t **tb_ptr) #if TCG_TARGET_REG_BITS == 64 /* Read indexed register or constant (64 bit) from bytecode. */ -static uint64_t tci_read_ri64(uint8_t **tb_ptr) +static inline uint64_t tci_read_ri64(uint8_t **tb_ptr) { bool const_arg; uint64_t value; @@ -305,7 +305,7 @@ static uint64_t tci_read_ri64(uint8_t **tb_ptr) } #endif -static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) +static inline bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) { bool result = false; int32_t i0 = u0; @@ -347,7 +347,7 @@ static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) return result; } -static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) +static inline bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) { bool result = false; int64_t i0 = u0; -- 1.6.3.msysgit.0