Here's a small patch that allow an optimisation for code fetch, at least
for RISC CPU targets, as suggested by Fabrice Bellard.
The main idea is that a translated block is never to span over a page
boundary. As the tb_find_slow routine already gets the physical address
of the page of code to be translated, the code translator could then
fetch the code using raw host memory accesses instead of doing it
through the softmmu routines.
This patch could also be adapted to RISC CPU targets, with care for the
last instruction of a page. For now, I did implement it for alpha, arm,
mips, PowerPC and SH4.
I don't actually know if the optimsation would bring a sensible speed
gain or if it will be absolutelly marginal.

Please comment.

-- 
J. Mayer <[EMAIL PROTECTED]>
Never organized
Index: cpu-exec.c
===================================================================
RCS file: /sources/qemu/qemu/cpu-exec.c,v
retrieving revision 1.119
diff -u -d -d -p -r1.119 cpu-exec.c
--- cpu-exec.c	8 Oct 2007 13:16:13 -0000	1.119
+++ cpu-exec.c	12 Oct 2007 07:14:43 -0000
@@ -133,6 +133,7 @@ static TranslationBlock *tb_find_slow(ta
     tb->tc_ptr = tc_ptr;
     tb->cs_base = cs_base;
     tb->flags = flags;
+    tb->page_addr[0] = phys_page1;
     cpu_gen_code(env, tb, CODE_GEN_MAX_SIZE, &code_gen_size);
     code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
 
Index: target-alpha/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/translate.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 translate.c
--- target-alpha/translate.c	16 Sep 2007 21:08:01 -0000	1.5
+++ target-alpha/translate.c	12 Oct 2007 07:14:47 -0000
@@ -1966,12 +1966,15 @@ int gen_intermediate_code_internal (CPUS
 #endif
     DisasContext ctx, *ctxp = &ctx;
     target_ulong pc_start;
+    unsigned long phys_pc;
     uint32_t insn;
     uint16_t *gen_opc_end;
     int j, lj = -1;
     int ret;
 
     pc_start = tb->pc;
+    phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] +
+        (pc_start & ~TARGET_PAGE_MASK);
     gen_opc_ptr = gen_opc_buf;
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
     gen_opparam_ptr = gen_opparam_buf;
@@ -2010,7 +2013,7 @@ int gen_intermediate_code_internal (CPUS
                     ctx.pc, ctx.mem_idx);
         }
 #endif
-        insn = ldl_code(ctx.pc);
+        insn = ldl_raw(phys_pc);
 #if defined ALPHA_DEBUG_DISAS
         insn_count++;
         if (logfile != NULL) {
@@ -2018,6 +2021,7 @@ int gen_intermediate_code_internal (CPUS
         }
 #endif
         ctx.pc += 4;
+        phys_pc += 4;
         ret = translate_one(ctxp, insn);
         if (ret != 0)
             break;
Index: target-arm/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/translate.c,v
retrieving revision 1.57
diff -u -d -d -p -r1.57 translate.c
--- target-arm/translate.c	17 Sep 2007 08:09:51 -0000	1.57
+++ target-arm/translate.c	12 Oct 2007 07:14:47 -0000
@@ -38,6 +38,7 @@
 /* internal defines */
 typedef struct DisasContext {
     target_ulong pc;
+    unsigned long phys_pc;
     int is_jmp;
     /* Nonzero if this instruction has been conditionally skipped.  */
     int condjmp;
@@ -2206,8 +2207,9 @@ static void disas_arm_insn(CPUState * en
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
 
-    insn = ldl_code(s->pc);
+    insn = ldl_raw(s->phys_pc);
     s->pc += 4;
+    s->phys_pc += 4;
 
     cond = insn >> 28;
     if (cond == 0xf){
@@ -2971,8 +2973,9 @@ static void disas_thumb_insn(DisasContex
     int32_t offset;
     int i;
 
-    insn = lduw_code(s->pc);
+    insn = lduw_raw(s->phys_pc);
     s->pc += 2;
+    s->phys_pc += 2;
 
     switch (insn >> 12) {
     case 0: case 1:
@@ -3494,7 +3497,7 @@ static void disas_thumb_insn(DisasContex
             break;
         }
         offset = ((int32_t)insn << 21) >> 10;
-        insn = lduw_code(s->pc);
+        insn = lduw_raw(s->phys_pc);
         offset |= insn & 0x7ff;
 
         val = (uint32_t)s->pc + 2;
@@ -3544,6 +3547,8 @@ static inline int gen_intermediate_code_
 
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
+    dc->phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] +
+        (pc_start & ~TARGET_PAGE_MASK);
     dc->singlestep_enabled = env->singlestep_enabled;
     dc->condjmp = 0;
     dc->thumb = env->thumb;
Index: target-mips/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/translate.c,v
retrieving revision 1.106
diff -u -d -d -p -r1.106 translate.c
--- target-mips/translate.c	9 Oct 2007 03:39:58 -0000	1.106
+++ target-mips/translate.c	12 Oct 2007 07:14:48 -0000
@@ -6483,6 +6483,7 @@ gen_intermediate_code_internal (CPUState
 {
     DisasContext ctx;
     target_ulong pc_start;
+    unsigned long phys_pc;
     uint16_t *gen_opc_end;
     int j, lj = -1;
 
@@ -6490,6 +6491,8 @@ gen_intermediate_code_internal (CPUState
         fprintf (logfile, "search pc %d\n", search_pc);
 
     pc_start = tb->pc;
+    phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] +
+        (pc_start & ~TARGET_PAGE_MASK);
     gen_opc_ptr = gen_opc_buf;
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
     gen_opparam_ptr = gen_opparam_buf;
@@ -6544,9 +6547,10 @@ gen_intermediate_code_internal (CPUState
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
             gen_opc_instr_start[lj] = 1;
         }
-        ctx.opcode = ldl_code(ctx.pc);
+        ctx.opcode = ldl_raw(phys_pc);
         decode_opc(env, &ctx);
         ctx.pc += 4;
+        phys_pc += 4;
 
         if (env->singlestep_enabled)
             break;
Index: target-ppc/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/translate.c,v
retrieving revision 1.92
diff -u -d -d -p -r1.92 translate.c
--- target-ppc/translate.c	7 Oct 2007 23:10:08 -0000	1.92
+++ target-ppc/translate.c	12 Oct 2007 07:14:49 -0000
@@ -6679,12 +7569,15 @@ static always_inline int gen_intermediat
     DisasContext ctx, *ctxp = &ctx;
     opc_handler_t **table, *handler;
     target_ulong pc_start;
+    unsigned long phys_pc;
     uint16_t *gen_opc_end;
     int supervisor;
     int single_step, branch_step;
     int j, lj = -1;
 
     pc_start = tb->pc;
+    phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] +
+        (pc_start & ~TARGET_PAGE_MASK);
     gen_opc_ptr = gen_opc_buf;
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
     gen_opparam_ptr = gen_opparam_buf;
@@ -6763,7 +7649,7 @@ static always_inline int gen_intermediat
                     ctx.nip, 1 - msr_pr, msr_ir);
         }
 #endif
-        ctx.opcode = ldl_code(ctx.nip);
+        ctx.opcode = ldl_raw(phys_pc);
         if (msr_le) {
             ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) |
                 ((ctx.opcode & 0x00FF0000) >> 8) |
@@ -6778,6 +7664,7 @@ static always_inline int gen_intermediat
         }
 #endif
         ctx.nip += 4;
+        phys_pc += 4;
         table = env->opcodes;
         handler = table[opc1(ctx.opcode)];
         if (is_indirect_opcode(handler)) {
Index: target-sh4/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/translate.c,v
retrieving revision 1.18
diff -u -d -d -p -r1.18 translate.c
--- target-sh4/translate.c	29 Sep 2007 19:52:22 -0000	1.18
+++ target-sh4/translate.c	12 Oct 2007 07:14:50 -0000
@@ -1150,11 +1150,14 @@ gen_intermediate_code_internal(CPUState 
 {
     DisasContext ctx;
     target_ulong pc_start;
+    unsigned long phys_pc;
     static uint16_t *gen_opc_end;
     uint32_t old_flags;
     int i, ii;
 
     pc_start = tb->pc;
+    phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] +
+        (pc_start & ~TARGET_PAGE_MASK);
     gen_opc_ptr = gen_opc_buf;
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
     gen_opparam_ptr = gen_opparam_buf;
@@ -1210,9 +1213,10 @@ gen_intermediate_code_internal(CPUState 
 	fprintf(stderr, "Loading opcode at address 0x%08x\n", ctx.pc);
 	fflush(stderr);
 #endif
-	ctx.opcode = lduw_code(ctx.pc);
+	ctx.opcode = lduw_raw(phys_pc);
 	decode_opc(&ctx);
 	ctx.pc += 2;
+        phys_pc += 2;
 	if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
 	    break;
 	if (env->singlestep_enabled)

Reply via email to