On 27 February 2013 22:09, Anthony Green <gr...@moxielogic.com> wrote: > +static const VMStateDescription vmstate_moxie_cpu = { > + .name = "cpu", > + .unmigratable = 1, > +};
Since this is a new CPU it should just go ahead and implement migration support. > +/* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump > + * after the delay slot should be taken or not. It is calculated from SR_T. > + * > + * It is unclear if it is permitted to modify the SR_T flag in a delay slot. > + * The use of DELAY_SLOT_TRUE flag makes us accept such SR_T modification. > + */ Is the Moxie really an SH-4 clone which has cloned this particular SH-4 issue, or have you just copied this comment incorrectly from the target-sh4 code? Do all of the copied constants really also apply? > +/* XXXXX The structure could be made more compact */ Either: * you should just move the 32 bit fields to the front * we don't actually care about the tiny wasted space (how many copies of this struct exist at once anyway) and should drop the XXXXX comment * it's matching an in-memory hardware struct layout, in which case it's not our bug to fix [and probably needs some kind of packed attribute] > +static inline void cpu_pc_from_tb(CPUMoxieState *env, TranslationBlock *tb) > +{ > + env->pc = tb->pc; > + env->flags = tb->flags; > +} > + > +static inline void cpu_get_tb_cpu_state(CPUMoxieState *env, target_ulong *pc, > + target_ulong *cs_base, int *flags) > +{ > + *pc = env->pc; > + *cs_base = 0; > + *flags = env->flags; This looks bogus. The TB flags should generally not be an exact copy of your CPU's flags register -- it needs to track bits of state which the generated code makes assumptions about, which often includes some flags but also some other CPU state. > +int cpu_moxie_handle_mmu_fault(CPUMoxieState *env, target_ulong address, > + int rw, int mmu_idx, int is_softmmu) > +{ > + struct moxie_mmu_result_t res; > + int prot, miss; > + target_ulong phy; > + int r = 1; Indentation here is wrong -- please conform to the coding style (which specifies 4-space indent). > +void do_interrupt(CPUMoxieState *env) > +{ > + fflush(NULL); Huh? Accidentally included debug code? > + > + switch (env->exception_index) { > + case EXCP_BREAK: > + break; > + default: > + break; > + } > +} > +int cpu_moxie_handle_mmu_fault(CPUMoxieState *env, target_ulong address, > + int rw, int mmu_idx); > + > +/* Try to fill the TLB and return an exception if error. If retaddr is > + NULL, it means that the function was called in C code (i.e. not > + from generated code or from helper.c) */ > +/* XXX: fix it to restore all registers */ This XXX comment suggests confusion -- cpu_restore_state() (assisted by your code in cpu.h or translate.c) should indeed restore all the CPU registers. If it doesn't the XXX comment should be somewhere else, not here. > +void tlb_fill(CPUMoxieState *env, target_ulong addr, int is_write, int > mmu_idx, > + uintptr_t retaddr) > +{ > + int ret; > + > + ret = cpu_moxie_handle_mmu_fault(env, addr, is_write, mmu_idx); > + if (unlikely(ret)) { > + if (retaddr) { > + cpu_restore_state(env, retaddr); > + } > + } Duff indentation again. > + cpu_loop_exit(env); > +} > + > +void helper_debug(CPUMoxieState *env) > +{ > + env->exception_index = EXCP_DEBUG; > + cpu_loop_exit(env); > +} > + > +#if 0 > +void helper_raise_exception(uint32_t index) > +{ > + env->exception_index = index; > + cpu_loop_exit(); > +} > +#endif Don't include #if-0'd out code, please. > + > +#if 0 > +void do_unassigned_access(target_phys_addr_t addr, int is_write, int is_exec, > + int is_asi) > +{ > +} > +#endif > +#define REG(x) (cpu_gregs[x]) Is this really a particularly useful abstraction? > +static inline void gen_goto_tb(CPUMoxieState *env, DisasContext *ctx, > + int n, target_ulong dest) > +{ > + TranslationBlock *tb; > + tb = ctx->tb; > + > + if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) && > + !ctx->singlestep_enabled) { > + tcg_gen_goto_tb(n); > + tcg_gen_movi_i32(cpu_pc, dest); > + tcg_gen_exit_tb((long) tb + n); > + } else { > + tcg_gen_movi_i32(cpu_pc, dest); > + if (ctx->singlestep_enabled) { > + gen_helper_debug(cpu_env); > + } > + tcg_gen_exit_tb(0); > + } > +} > + > +static int decode_opc(MoxieCPU *cpu, DisasContext *ctx) It's better style to write your decoder so that it takes only the DisasContext*, and is not passed either a MoxieCPU* or a CPUMoxieState*. The rationale for this is that mostly your generated code shouldn't depend on the values in the CPUState (it is not guaranteed to be "the CPU state at the start of the TB", which is perhaps not obvious; for more on that see http://lists.gnu.org/archive/html/qemu-devel/2011-06/msg02404.html) So if the functions which generate code aren't passed a CPUState they can't accidentally rely on the values in it. Instead you copy the values from the CPUState (or from the tb_flags) into the DisasContext at the top level gen_intermediate_code_internal() function, and then it's very clear what the list of CPUState fields your decoder relies on is. Many of our existing targets don't do things this way, but it's a nicer way to write a new port. I won't insist on this change, but you should consider it. In this case it's a bit complicated by the requirement to call cpu_ldl_code() to read immediate values from the instruction stream; I guess you could stick a pointer to the env into the DisasContext, though. In any case you should definitely go through and satisfy yourself about which of the various valid ways to handle CPU state each of your uses of a CPUState field in the decoder falls into. > +{ > + CPUMoxieState *env = &cpu->env; > + > + /* Local cache for the instruction opcode. */ > + int opcode; > + /* Set the default instruction length. */ > + int length = 2; > + > + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) { > + tcg_gen_debug_insn_start(ctx->pc); > + } > + > + /* Examine the 16-bit opcode. */ > + opcode = ctx->opcode; > + > + /* Decode instruction. */ > + if (opcode & (1 << 15)) { > + if (opcode & (1 << 14)) { > + /* This is a Form 3 instruction. */ > + int inst = (opcode >> 10 & 0xf); > + > + switch (inst) { > + case 0x00: /* beq */ > + { > + int l1 = gen_new_label(); This is a weird way to indent switch statements. You can save some horizontal space by moving the braces left so they line up with the 'c' in 'case'. > + tcg_gen_brcondi_i32(TCG_COND_EQ, cc, CC_EQ, l1); > + gen_goto_tb(env, ctx, 1, ctx->pc+2); > + gen_set_label(l1); > + gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2); > + ctx->bstate = BS_BRANCH; > + } > + break; > + case 0x01: /* bne */ > + { > + int l1 = gen_new_label(); > + tcg_gen_brcondi_i32(TCG_COND_NE, cc, CC_EQ, l1); > + gen_goto_tb(env, ctx, 1, ctx->pc+2); > + gen_set_label(l1); > + gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2); > + ctx->bstate = BS_BRANCH; > + } > + break; > + case 0x02: /* blt */ > + { > + int l1 = gen_new_label(); > + TCGv t1 = tcg_temp_new_i32(); > + tcg_gen_andi_i32(t1, cc, CC_LT); > + tcg_gen_brcondi_i32(TCG_COND_EQ, t1, CC_LT, l1); > + tcg_temp_free_i32(t1); > + gen_goto_tb(env, ctx, 1, ctx->pc+2); > + gen_set_label(l1); > + gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2); A lot of your calls to gen_goto_tb() are passing offset + ctx->pc+2. Consider abstracting this out as a gen_goto_rel() that does the addition of ctx->pc+2 for you. > + ctx->bstate = BS_BRANCH; > + } > + break; > + } else { > + /* This is a Form 1 instruction. */ > + int inst = opcode >> 8; > + switch (inst) { > + case 0x00: /* nop */ > + break; > + case 0x01: /* ldi.l (immediate) */ > + { > + int reg = (opcode >> 4) & 0xf; > + int val = cpu_ldl_code(env, ctx->pc+2); Most (all?) of your calls to cpu_ldl_code() use an argument of ctx->pc+2. Consider abstracting this out to load_insn_imm32(DisasContext *ctx). > + tcg_gen_movi_i32(REG(reg), val); > + length = 6; Tracking the insn length separately like this is a recipe for bugs -- it would be better to arrange to automatically update the length when the decoder calls the "read an imm32 from the instruction stream". > + case 0x26: /* and */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_and_i32(REG(a), REG(a), REG(b)); > + } > + break; > + case 0x27: /* lshr */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_shr_i32(REG(a), REG(a), REG(b)); > + } > + break; > + case 0x28: /* ashl */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_shl_i32(REG(a), REG(a), REG(b)); > + } > + break; > + case 0x29: /* sub.l */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_sub_i32(REG(a), REG(a), REG(b)); > + } > + break; > + case 0x2a: /* neg */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_neg_i32(REG(a), REG(b)); > + } > + break; > + case 0x2b: /* or */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_or_i32(REG(a), REG(a), REG(b)); > + } > + break; > + case 0x2c: /* not */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_not_i32(REG(a), REG(b)); > + } > + break; > + case 0x2d: /* ashr */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_sar_i32(REG(a), REG(a), REG(b)); > + } > + break; > + case 0x2e: /* xor */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_xor_i32(REG(a), REG(a), REG(b)); > + } > + break; > + case 0x2f: /* mul.l */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + > + tcg_gen_mul_i32(REG(a), REG(a), REG(b)); > + } > + break; This handling of the basic data processing insns is rather repetitive -- consider whether it can be table driven or otherwise refactored? > + case 0x31: /* div.l */ > + { > + int a = (opcode >> 4) & 0xf; > + int b = opcode & 0xf; > + tcg_gen_div_i32(REG(a), REG(a), REG(b)); Didn't Richard mention the problem of exceptions on division in review of an earlier version of this patch? > +/* generate intermediate code for basic block 'tb'. */ > +struct DisasContext ctx; > +static void > +gen_intermediate_code_internal(MoxieCPU *cpu, TranslationBlock *tb, > + bool search_pc) > +{ > + DisasContext ctx; > + target_ulong pc_start; > + uint16_t *gen_opc_end; > + CPUBreakpoint *bp; > + int j, lj = -1; > + CPUMoxieState *env = &cpu->env; > + > + if (search_pc) { > + qemu_log("search pc %d\n", search_pc); > + } > + > + pc_start = tb->pc; > + gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE; > + ctx.pc = pc_start; > + ctx.saved_pc = -1; > + ctx.tb = tb; > + ctx.memidx = 0; > + ctx.singlestep_enabled = 0; > + ctx.bstate = BS_NONE; > + /* Restore delay slot state from the tb context. */ So, this comment about delay slots seems to be copied from the MIPS target; earlier you had some constants and comments about delay slots copied from the SH4 target. Does Moxie actually have delay slots? > + ctx.hflags = (uint32_t)tb->flags; /* FIXME: maybe use 64 bits here? */ > + restore_cpu_state(cpu, &ctx); > + > + do { > + if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { > + QTAILQ_FOREACH(bp, &env->breakpoints, entry) { > + if (ctx.pc == bp->pc) { > + save_cpu_state(&ctx, 1); > + tcg_gen_movi_i32(cpu_pc, ctx.pc); > + gen_helper_debug(cpu_env); > + ctx.bstate = BS_EXCP; > + goto done_generating; > + } > + } > + } > + > + if (search_pc) { > + j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; > + if (lj < j) { > + lj++; > + while (lj < j) { > + tcg_ctx.gen_opc_instr_start[lj++] = 0; > + } > + } > + tcg_ctx.gen_opc_pc[lj] = ctx.pc; > + tcg_ctx.gen_opc_instr_start[lj] = 1; > + } > + ctx.opcode = cpu_ldsw_code(env, ctx.pc); Are you sure you want to sign extend the opcode ?? (ldsw is a signed load, ctx.opcode is 32 bit. cpu_lduw_code() would be the unsigned load.) > + ctx.pc += decode_opc(cpu, &ctx); thanks -- PMM