Here's a small patch that allow an optimisation for code fetch, at least for RISC CPU targets, as suggested by Fabrice Bellard. The main idea is that a translated block is never to span over a page boundary. As the tb_find_slow routine already gets the physical address of the page of code to be translated, the code translator could then fetch the code using raw host memory accesses instead of doing it through the softmmu routines. This patch could also be adapted to RISC CPU targets, with care for the last instruction of a page. For now, I did implement it for alpha, arm, mips, PowerPC and SH4. I don't actually know if the optimsation would bring a sensible speed gain or if it will be absolutelly marginal.
Please comment. -- J. Mayer <[EMAIL PROTECTED]> Never organized
Index: cpu-exec.c =================================================================== RCS file: /sources/qemu/qemu/cpu-exec.c,v retrieving revision 1.119 diff -u -d -d -p -r1.119 cpu-exec.c --- cpu-exec.c 8 Oct 2007 13:16:13 -0000 1.119 +++ cpu-exec.c 12 Oct 2007 07:14:43 -0000 @@ -133,6 +133,7 @@ static TranslationBlock *tb_find_slow(ta tb->tc_ptr = tc_ptr; tb->cs_base = cs_base; tb->flags = flags; + tb->page_addr[0] = phys_page1; cpu_gen_code(env, tb, CODE_GEN_MAX_SIZE, &code_gen_size); code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); Index: target-alpha/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-alpha/translate.c,v retrieving revision 1.5 diff -u -d -d -p -r1.5 translate.c --- target-alpha/translate.c 16 Sep 2007 21:08:01 -0000 1.5 +++ target-alpha/translate.c 12 Oct 2007 07:14:47 -0000 @@ -1966,12 +1966,15 @@ int gen_intermediate_code_internal (CPUS #endif DisasContext ctx, *ctxp = &ctx; target_ulong pc_start; + unsigned long phys_pc; uint32_t insn; uint16_t *gen_opc_end; int j, lj = -1; int ret; pc_start = tb->pc; + phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); gen_opc_ptr = gen_opc_buf; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; gen_opparam_ptr = gen_opparam_buf; @@ -2010,7 +2013,7 @@ int gen_intermediate_code_internal (CPUS ctx.pc, ctx.mem_idx); } #endif - insn = ldl_code(ctx.pc); + insn = ldl_raw(phys_pc); #if defined ALPHA_DEBUG_DISAS insn_count++; if (logfile != NULL) { @@ -2018,6 +2021,7 @@ int gen_intermediate_code_internal (CPUS } #endif ctx.pc += 4; + phys_pc += 4; ret = translate_one(ctxp, insn); if (ret != 0) break; Index: target-arm/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-arm/translate.c,v retrieving revision 1.57 diff -u -d -d -p -r1.57 translate.c --- target-arm/translate.c 17 Sep 2007 08:09:51 -0000 1.57 +++ target-arm/translate.c 12 Oct 2007 07:14:47 -0000 @@ -38,6 +38,7 @@ /* internal defines */ typedef struct DisasContext { target_ulong pc; + unsigned long phys_pc; int is_jmp; /* Nonzero if this instruction has been conditionally skipped. */ int condjmp; @@ -2206,8 +2207,9 @@ static void disas_arm_insn(CPUState * en { unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh; - insn = ldl_code(s->pc); + insn = ldl_raw(s->phys_pc); s->pc += 4; + s->phys_pc += 4; cond = insn >> 28; if (cond == 0xf){ @@ -2971,8 +2973,9 @@ static void disas_thumb_insn(DisasContex int32_t offset; int i; - insn = lduw_code(s->pc); + insn = lduw_raw(s->phys_pc); s->pc += 2; + s->phys_pc += 2; switch (insn >> 12) { case 0: case 1: @@ -3494,7 +3497,7 @@ static void disas_thumb_insn(DisasContex break; } offset = ((int32_t)insn << 21) >> 10; - insn = lduw_code(s->pc); + insn = lduw_raw(s->phys_pc); offset |= insn & 0x7ff; val = (uint32_t)s->pc + 2; @@ -3544,6 +3547,8 @@ static inline int gen_intermediate_code_ dc->is_jmp = DISAS_NEXT; dc->pc = pc_start; + dc->phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); dc->singlestep_enabled = env->singlestep_enabled; dc->condjmp = 0; dc->thumb = env->thumb; Index: target-mips/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-mips/translate.c,v retrieving revision 1.106 diff -u -d -d -p -r1.106 translate.c --- target-mips/translate.c 9 Oct 2007 03:39:58 -0000 1.106 +++ target-mips/translate.c 12 Oct 2007 07:14:48 -0000 @@ -6483,6 +6483,7 @@ gen_intermediate_code_internal (CPUState { DisasContext ctx; target_ulong pc_start; + unsigned long phys_pc; uint16_t *gen_opc_end; int j, lj = -1; @@ -6490,6 +6491,8 @@ gen_intermediate_code_internal (CPUState fprintf (logfile, "search pc %d\n", search_pc); pc_start = tb->pc; + phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); gen_opc_ptr = gen_opc_buf; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; gen_opparam_ptr = gen_opparam_buf; @@ -6544,9 +6547,10 @@ gen_intermediate_code_internal (CPUState gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK; gen_opc_instr_start[lj] = 1; } - ctx.opcode = ldl_code(ctx.pc); + ctx.opcode = ldl_raw(phys_pc); decode_opc(env, &ctx); ctx.pc += 4; + phys_pc += 4; if (env->singlestep_enabled) break; Index: target-ppc/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-ppc/translate.c,v retrieving revision 1.92 diff -u -d -d -p -r1.92 translate.c --- target-ppc/translate.c 7 Oct 2007 23:10:08 -0000 1.92 +++ target-ppc/translate.c 12 Oct 2007 07:14:49 -0000 @@ -6679,12 +7569,15 @@ static always_inline int gen_intermediat DisasContext ctx, *ctxp = &ctx; opc_handler_t **table, *handler; target_ulong pc_start; + unsigned long phys_pc; uint16_t *gen_opc_end; int supervisor; int single_step, branch_step; int j, lj = -1; pc_start = tb->pc; + phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); gen_opc_ptr = gen_opc_buf; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; gen_opparam_ptr = gen_opparam_buf; @@ -6763,7 +7649,7 @@ static always_inline int gen_intermediat ctx.nip, 1 - msr_pr, msr_ir); } #endif - ctx.opcode = ldl_code(ctx.nip); + ctx.opcode = ldl_raw(phys_pc); if (msr_le) { ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) | ((ctx.opcode & 0x00FF0000) >> 8) | @@ -6778,6 +7664,7 @@ static always_inline int gen_intermediat } #endif ctx.nip += 4; + phys_pc += 4; table = env->opcodes; handler = table[opc1(ctx.opcode)]; if (is_indirect_opcode(handler)) { Index: target-sh4/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-sh4/translate.c,v retrieving revision 1.18 diff -u -d -d -p -r1.18 translate.c --- target-sh4/translate.c 29 Sep 2007 19:52:22 -0000 1.18 +++ target-sh4/translate.c 12 Oct 2007 07:14:50 -0000 @@ -1150,11 +1150,14 @@ gen_intermediate_code_internal(CPUState { DisasContext ctx; target_ulong pc_start; + unsigned long phys_pc; static uint16_t *gen_opc_end; uint32_t old_flags; int i, ii; pc_start = tb->pc; + phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); gen_opc_ptr = gen_opc_buf; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; gen_opparam_ptr = gen_opparam_buf; @@ -1210,9 +1213,10 @@ gen_intermediate_code_internal(CPUState fprintf(stderr, "Loading opcode at address 0x%08x\n", ctx.pc); fflush(stderr); #endif - ctx.opcode = lduw_code(ctx.pc); + ctx.opcode = lduw_raw(phys_pc); decode_opc(&ctx); ctx.pc += 2; + phys_pc += 2; if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) break; if (env->singlestep_enabled)