On 3/20/19 7:15 AM, Yoshinori Sato wrote: > +/* [ri, rb] */ > +static inline void rx_gen_regindex(DisasContext *ctx, TCGv mem,
Please drop all of the inline markers. Let the compiler choose which are profitable to inline. > +/* load source operand */ > +static inline TCGv rx_load_source(DisasContext *ctx, TCGv mem, > + int ld, int mi, int rs) > +{ > + TCGv addr; > + if (ld < 3) { > + switch (mi) { > + case 0: /* dsp[rs].b */ > + case 1: /* dsp[rs].w */ > + case 2: /* dsp[rs].l */ > + addr = rx_index_addr(ctx, mem, ld, mi, rs); > + rx_gen_ld(mi, mem, addr); > + break; > + case 3: /* dsp[rs].uw */ > + case 4: /* dsp[rs].ub */ > + addr = rx_index_addr(ctx, mem, ld, 4 - mi, rs); > + rx_gen_ldu(4 - mi, mem, addr); Note that the mi_to_mop helper applies here. TCGMemOp mop = mi_to_mop(mi); addr = rx_index_addr(ctx, mem, ld, mop & MO_SIZE, rs); tcg_gen_ld_i32(mem, addr, 0, mop); > + tcg_gen_not_i32(dc->temp, dc->temp); > + tcg_gen_and_i32(dc->temp, dc->temp, cpu_psw_z); tcg_gen_andc_i32(dc->temp, cpu_psw_z, dc->temp); > +typedef void (*ldstfn)(unsigned int sz, TCGv val, TCGv mem); > +static inline void MOV_prrp(ldstfn ldst, int ad, int sz, int rs, int rd) > +{ > + TCGv temp; > + if (rs == rd) { > + temp = tcg_temp_new(); > + tcg_gen_mov_i32(temp, cpu_regs[rs]); > + } else { > + temp = cpu_regs[rs]; > + } > + if (ad == 1) { > + tcg_gen_subi_i32(cpu_regs[rd], cpu_regs[rd], 1 << sz); Does this really decrement first? This doesn't match the behaviour described for PUSH wrt SP... > + } > + ldst(sz, temp, cpu_regs[rd]); > + if (ad == 0 && rs != rd) { > + tcg_gen_addi_i32(cpu_regs[rd], cpu_regs[rd], 1 << sz); > + } > + if (rs == rd) { > + tcg_temp_free(temp); > + } > +} ... > +/* pop rd */ > +static bool trans_POP(DisasContext *ctx, arg_POP *a) > +{ > + rx_gen_ld(MO_32, cpu_regs[a->rd], cpu_regs[0]); > + if (a->rd != 0) { > + tcg_gen_addi_i32(cpu_regs[0], cpu_regs[0], 4); > + } > + return true; Perhaps as MOV_prrp(ctx, 0, 2, 0, a->rd); ? > +static inline TCGMemOp mi_to_mop(unsigned mi) > +{ > + static const TCGMemOp mop[5] = { MO_SB, MO_SW, MO_UL, MO_UW, MO_UB }; > + tcg_debug_assert(mi < 5); > + return mop[mi]; > +} > + > +/* xchg dsp[rs].<mi>,rd */ > +static bool trans_XCHG_mr(DisasContext *ctx, arg_XCHG_mr *a) > +{ > + TCGv mem, addr; > + mem = tcg_temp_new(); > + switch (a->mi) { > + case 0: /* dsp[rs].b */ > + case 1: /* dsp[rs].w */ > + case 2: /* dsp[rs].l */ > + addr = rx_index_addr(ctx, mem, a->ld, a->mi, a->rs); > + break; > + case 3: /* dsp[rs].uw */ > + case 4: /* dsp[rs].ub */ > + addr = rx_index_addr(ctx, mem, a->ld, 4 - a->mi, a->rs); addr = rx_index_addr(ctx, mem, a->ld, mop & MO_SIZE, a->rs); > +typedef void (*logicfn)(TCGv ret, TCGv arg1, TCGv arg2); > +static inline void gen_logic_op(logicfn opr, TCGv ret, TCGv arg1, TCGv arg2) > +{ > + opr(cpu_psw_z, arg1, arg2); > + tcg_gen_mov_i32(cpu_psw_s, cpu_psw_z); > + if (ret) { > + tcg_gen_mov_i32(ret, cpu_psw_z); > + } > +} If you here define static void rx_gen_and_i32(TCGv ret, TCGv arg1, TCGv arg2) { gen_logic_op(tcg_gen_and_i32, ret, arg1, arg2); } static void rx_gen_tst_i32(TCGv ret, TCGv arg1, TCGv arg2) { gen_logic_op(tcg_gen_and_i32, NULL, arg1, arg2); } etc for OR and XOR, then suddenly we have a consistent interface for all of the arithmetic and logic operations -- add, sub, etc included. Which means that we can then do static bool rx_gen_irr(DisasContext *ctx, arg_rri *a, logicfn opr) { TCGv_i32 imm = tcg_const_i32(a->imm); opr(cpu_regs[a->rd], cpu_regs[a->rs2], imm); tcg_temp_free_i32(imm); return true; } static bool rx_gen_rrr(DisasContext *ctx, arg_rrr *a, logicfn opr) { opr(cpu_regs[a->rd], cpu_regs[a->rs2], cpu_regs[a->rs]); return true; } static bool rx_gen_mr(DisasContext *ctx, arg_rm *a, logicfn opr) { TCGv mem = tcg_temp_new(); TCGv val = rx_load_source(ctx, mem, a->ld, a->mi, a->rs); opr(cpu_regs[a->rd], cpu_regs[a->rd], val); tcg_temp_free(mem); return true; } static bool trans_AND_mr(DisasContext *ctx, arg_AND_mr *a) { return rx_gen_mr(ctx, a, tcg_gen_and_i32); } static bool trans_ADD_mr(DisasContext *ctx, arg_AND_mr *a) { return rx_gen_mr(ctx, a, rx_gen_add_i32); } etc. > static bool trans_SBB_mr(DisasContext *ctx, arg_SBB_mr *a) > { > TCGv val, mem; > mem = tcg_temp_new(); > val = rx_load_source(ctx, mem, a->ld, MO_32, a->rs); The "Note only mi==2 allowed" means that you need if (a->mi != 2) { return false; } here. There are a couple more of these. > +/* ret = arg1 + arg2 + psw_c */ > +static void rx_gen_adc_i32(TCGv ret, TCGv arg1, TCGv arg2) > +{ > + TCGv cf, z; > + cf = tcg_temp_new(); > + z = tcg_const_i32(0); > + tcg_gen_mov_i32(cf, cpu_psw_c); > + tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, arg1, z, arg2, z); > + tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, cpu_psw_s, cpu_psw_c, cf, z); Note that you don't need CF if you consume psw_c right away: tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, arg1, z, cpu_psw_c, z); tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, cpu_psw_s, cpu_psw_c, arg2, z); You did forget to free CF, btw. > +/* adc dsp[rs], rd */ > +static bool trans_ADC_mr(DisasContext *ctx, arg_ADC_mr *a) > +{ a->mi != 2. > +/* emul #imm, rd */ > +static bool trans_EMUL_ir(DisasContext *ctx, arg_EMUL_ir *a) > +{ > + TCGv imm = tcg_const_i32(a->imm); > + tcg_gen_muls2_i32(cpu_regs[a->rd], cpu_regs[a->rd + 1], > + cpu_regs[a->rd], imm); > + tcg_temp_free(imm); > + return true; > +} > + > +/* emul rs, rd */ > +/* emul dsp[rs], rd */ > +static bool trans_EMUL_mr(DisasContext *ctx, arg_EMUL_mr *a) > +{ > + TCGv val, mem; > + mem = tcg_temp_new(); > + val = rx_load_source(ctx, mem, a->ld, a->mi, a->rs); > + tcg_gen_muls2_i32(cpu_regs[a->rd], cpu_regs[a->rd + 1], > + cpu_regs[a->rd], val); Both of these need to check for rd == 15 and return false. > +static bool trans_EMULU_ir(DisasContext *ctx, arg_EMULU_ir *a) > +{ > + TCGv imm = tcg_const_i32(a->imm); > + if (a->rd > 14) { > + g_assert_not_reached(); > + } You cannot make this assertion, since this is under control of the guest. You need to return false instead, so that illegal instruction is signaled. (And move the tcg_const_i32 below, so that you don't leak that temp.) > +static bool trans_EMULU_mr(DisasContext *ctx, arg_EMULU_mr *a) > +{ > + TCGv val, mem; > + if (a->rd > 14) { > + g_assert_not_reached(); > + } Likewise. > +/* divu rs, rd */ > +/* divu dsp[rs], rd */ > +static bool trans_DIVU_mr(DisasContext *ctx, arg_DIVU_mr *a) > +{ > + TCGv val, mem; > + mem = tcg_temp_new(); > + val = rx_load_source(ctx, mem, a->ld, a->mi, a->rs); > + gen_helper_divu(cpu_regs[a->rd], cpu_env, cpu_regs[a->rd], val); > + tcg_temp_free(mem); > + return true; > +} You can use a shim function like static void rx_gen_divu(TCGv ret, TCGv arg1, TCGv arg2) { gen_helper_divu(ret, cpu_env, arg1, arg2); } to allow you to use the other rx_gen_* helpers above. > + > + > +/* shll #imm:5, rd */ > +/* shll #imm:5, rs2, rd */ > +static bool trans_SHLL_irr(DisasContext *ctx, arg_SHLL_irr *a) > +{ > + TCGv tmp; > + tmp = tcg_temp_new(); > + if (a->imm) { > + tcg_gen_sari_i32(cpu_psw_c, cpu_regs[a->rs2], 32 - a->imm); > + tcg_gen_shli_i32(cpu_regs[a->rd], cpu_regs[a->rs2], a->imm); > + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_psw_o, cpu_psw_c, 0); > + tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_psw_c, 0xffffffff); > + tcg_gen_or_i32(cpu_psw_o, cpu_psw_o, tmp); > + tcg_gen_setcondi_i32(TCG_COND_NE, cpu_psw_c, cpu_psw_c, 0); > + } else { > + tcg_gen_movi_i32(cpu_psw_c, 0); > + tcg_gen_movi_i32(cpu_psw_o, 0); Missing tcg_gen_mov_i32(cpu_regs[a->rd], cpu_regs[a->rs2]); here. > + }> + tcg_gen_mov_i32(cpu_psw_z, cpu_regs[a->rd]); > + tcg_gen_mov_i32(cpu_psw_s, cpu_regs[a->rd]); > + return true; > +} > + > +/* shll rs, rd */ > +static bool trans_SHLL_rr(DisasContext *ctx, arg_SHLL_rr *a) > +{ > + TCGLabel *l1, *l2; > + TCGv count, tmp; > + > + l1 = gen_new_label(); > + l2 = gen_new_label(); > + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_regs[a->rs], 0, l1); > + count = tcg_const_i32(32); > + tmp = tcg_temp_new(); > + tcg_gen_sub_i32(count, count, cpu_regs[a->rs]); > + tcg_gen_sar_i32(cpu_psw_c, cpu_regs[a->rd], count); > + tcg_gen_shl_i32(cpu_regs[a->rd], cpu_regs[a->rd], cpu_regs[a->rs]); > + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_psw_o, cpu_psw_c, 0); > + tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_psw_c, 0xffffffff); > + tcg_gen_or_i32(cpu_psw_o, cpu_psw_o, tmp); > + tcg_gen_setcondi_i32(TCG_COND_NE, cpu_psw_c, cpu_psw_c, 0); > + tcg_gen_br(l2); > + gen_set_label(l1); > + tcg_gen_movi_i32(cpu_psw_c, 0); > + tcg_gen_movi_i32(cpu_psw_o, 0); Likewise. > + gen_set_label(l2); > + tcg_gen_mov_i32(cpu_psw_z, cpu_regs[a->rd]); > + tcg_gen_mov_i32(cpu_psw_s, cpu_regs[a->rd]); > + tcg_temp_free(count); > + tcg_temp_free(tmp); > + return true; > +} > + > +static inline void shiftr_imm(uint32_t rd, uint32_t rs, uint32_t imm, > + unsigned int alith) > +{ > + static void (* const gen_sXri[])(TCGv ret, TCGv arg1, int arg2) = { > + tcg_gen_shri_i32, tcg_gen_sari_i32, > + }; > + tcg_debug_assert(alith < 2); > + if (imm) { > + gen_sXri[alith](cpu_regs[rd], cpu_regs[rs], imm - 1); > + tcg_gen_andi_i32(cpu_psw_c, cpu_regs[rd], 0x00000001); > + gen_sXri[alith](cpu_regs[rd], cpu_regs[rd], 1); > + } else { > + tcg_gen_movi_i32(cpu_psw_c, 0); Likewise. > + } > + tcg_gen_movi_i32(cpu_psw_o, 0); > + tcg_gen_mov_i32(cpu_psw_z, cpu_regs[rd]); > + tcg_gen_mov_i32(cpu_psw_s, cpu_regs[rd]); > +} > + > +static inline void shiftr_reg(uint32_t rd, uint32_t rs, unsigned int alith) > +{ > + TCGLabel *skipz, *done; > + TCGv count; > + static void (* const gen_sXri[])(TCGv ret, TCGv arg1, int arg2) = { > + tcg_gen_shri_i32, tcg_gen_sari_i32, > + }; > + static void (* const gen_sXr[])(TCGv ret, TCGv arg1, TCGv arg2) = { > + tcg_gen_shr_i32, tcg_gen_sar_i32, > + }; > + tcg_debug_assert(alith < 2); > + skipz = gen_new_label(); > + done = gen_new_label(); > + count = tcg_temp_new(); > + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_regs[rs], 0, skipz); > + tcg_gen_subi_i32(count, cpu_regs[rs], 1); > + gen_sXr[alith](cpu_regs[rd], cpu_regs[rd], count); > + tcg_gen_andi_i32(cpu_psw_c, cpu_regs[rd], 0x00000001); > + gen_sXri[alith](cpu_regs[rd], cpu_regs[rd], 1); > + tcg_gen_br(done); > + gen_set_label(skipz); > + tcg_gen_movi_i32(cpu_psw_c, 0); Likewise. > +/* revw rs, rd */ > +static bool trans_REVW(DisasContext *ctx, arg_REVW *a) > +{ > + TCGv hi, lo; > + > + hi = tcg_temp_new(); > + lo = tcg_temp_new(); > + tcg_gen_shri_i32(hi, cpu_regs[a->rs], 16); > + tcg_gen_bswap16_i32(hi, hi); > + tcg_gen_shli_i32(hi, hi, 16); > + tcg_gen_bswap16_i32(lo, cpu_regs[a->rs]); Previous comment re bswap16 requires zero-extension still applies. > +/* conditional branch helper */ > +static void rx_bcnd_main(DisasContext *ctx, int cd, int dst) > +{ > + TCGv z, t, f; > + DisasCompare dc; > + switch (cd) { > + case 0 ... 13: > + dc.temp = tcg_temp_new(); > + z = tcg_const_i32(0); > + t = tcg_const_i32(ctx->pc + dst); > + f = tcg_const_i32(ctx->base.pc_next); > + psw_cond(&dc, cd); > + tcg_gen_movcond_i32(dc.cond, cpu_pc, dc.value, z, t, f); lab_true = gen_new_label(); tcg_gen_brcondi_i32(dc.cond, dc.value, lab_true); gen_goto_tb(ctx, 0, ctx->base.pc_next); gen_set_label(lab_true); gen_goto_tb(ctx, 1, ctx->pc + dst); > + tcg_temp_free(t); > + tcg_temp_free(f); > + tcg_temp_free(dc.temp); > + tcg_temp_free(z); > + break; > + case 14: > + /* always true case */ > + tcg_gen_movi_i32(cpu_pc, ctx->pc + dst); gen_goto_tb(ctx, 0, ctx->pc + dst); > + break; > + case 15: > + /* always false case */ > + tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next); No need to do anything here; just return. > +#define MULMAC(op) \ > + do { \ > + TCGv regs = tcg_const_i32(a->rs << 4 | a->rs2); \ I really don't like passing register numbers to helpers as immediates. > +/* mulhi rs,rs2 */ > +static bool trans_MULHI(DisasContext *ctx, arg_MULHI *a) > +{ > + MULMAC(mulhi); tcg_gen_ext_i32_i64(tmp1, cpu_regs[a->rs]); tcg_gen_ext_i32_i64(tmp2, cpu_regs[a->rs2]); tcg_gen_sari_i64(tmp1, tmp1, 16); tcg_gen_andi_i64(tmp2, tmp2, ~0xffff); tcg_gen_mul_i64(cpu_acc, tmp1, tmp2); > +static bool trans_MULLO(DisasContext *ctx, arg_MULLO *a) > +{ > + MULMAC(mullo); tcg_gen_extu_i32_i64(tmp1, cpu_regs[a->rs]); tcg_gen_extu_i32_i64(tmp2, cpu_regs[a->rs2]); tcg_gen_ext16s_i64(tmp1, tmp1); tcg_gen_ext16s_i64(tmp2, tmp2); tcg_gen_mul_i64(tmp1, tmp1, tmp2); tcg_gen_shli_i64(cpu_acc, tmp1, 16); > +static bool trans_MACHI(DisasContext *ctx, arg_MACHI *a) > +{ > + MULMAC(machi); tcg_gen_ext_i32_i64(tmp1, cpu_regs[a->rs]); tcg_gen_ext_i32_i64(tmp2, cpu_regs[a->rs2]); tcg_gen_sari_i64(tmp1, tmp1, 16); tcg_gen_andi_i64(tmp2, tmp2, ~0xffff); tcg_gen_mul_i64(tmp1, tmp1, tmp2); tcg_gen_add_i64(cpu_acc, cpu_acc, tmp1); > +static bool trans_MACLO(DisasContext *ctx, arg_MACLO *a) > +{ > + MULMAC(maclo); tcg_gen_extu_i32_i64(tmp1, cpu_regs[a->rs]); tcg_gen_extu_i32_i64(tmp2, cpu_regs[a->rs2]); tcg_gen_ext16s_i64(tmp1, tmp1); tcg_gen_ext16s_i64(tmp2, tmp2); tcg_gen_mul_i64(tmp1, tmp1, tmp2); tcg_gen_shli_i64(tmp1, tmp1, 16); tcg_gen_add_i64(cpu_acc, cpu_acc, tmp1); > +/* sat rd */ > +static bool trans_SAT(DisasContext *ctx, arg_SAT *a) > +{ > + TCGv rd = tcg_const_i32(a->rd); > + gen_helper_sat(cpu_env, rd); tcg_gen_sari_i32(tmp, cpu_psw_s, 31); tcg_gen_xori_i32(tmp, tmp, 0x80000000); tcg_gen_movcond_i32(TCG_COND_LT, cpu_regs[a->rd], cpu_psw_o, zero, temp, cpu_regs[a->rd]); > +static inline void rx_bclrm(TCGv mem, TCGv mask) > +{ > + TCGv val; > + val = tcg_temp_new(); > + rx_gen_ld(MO_8, val, mem); > + tcg_gen_not_i32(mask, mask); > + tcg_gen_and_i32(val, val, mask); tcg_gen_andc_i32(val, val, mask); > +static inline void rx_bclrr(TCGv reg, TCGv mask) > +{ > + tcg_gen_not_i32(mask, mask); > + tcg_gen_and_i32(reg, reg, mask); Likewise. > + static bool cat3(trans_, name, _rr)(DisasContext *ctx, \ > + cat3(arg_, name, _rr) * a) \ > + { \ > + TCGv mask; \ > + mask = tcg_const_i32(1); \ > + tcg_gen_shl_i32(mask, mask, cpu_regs[a->rs]); \ This shift needs to be masked; only the low bits of cpu_regs[a->rs] are used. > + cat3(rx_, op, r)(cpu_regs[a->rd], mask); \ > + tcg_temp_free(mask); \ > + return true; \ > + } \ > + static bool cat3(trans_, name, _rm)(DisasContext *ctx, \ > + cat3(arg_, name, _rm) * a) \ > + { \ > + TCGv mask, mem, addr; \ > + mask = tcg_const_i32(1); \ > + tcg_gen_shl_i32(mask, mask, cpu_regs[a->rd]); \ Likewise. > + mem = tcg_temp_new(); \ > + addr = rx_index_addr(ctx, mem, a->ld, MO_8, a->rs); \ > + cat3(rx_, op, m)(addr, mask); \ > + tcg_temp_free(mem); \ > + tcg_temp_free(mask); \ > + return true; \ > + } > + > +BITOP(BSET, bset) > +BITOP(BCLR, bclr) > +BITOP(BTST, btst) > +BITOP(BNOT, bnot) > + > +static inline void bmcnd_op(TCGv val, TCGCond cond, int pos) > +{ > + TCGv bit; > + DisasCompare dc; > + dc.temp = tcg_temp_new(); > + bit = tcg_temp_new(); > + psw_cond(&dc, cond); > + tcg_gen_andi_i32(val, val, ~(1 << pos)); > + tcg_gen_setcondi_i32(dc.cond, bit, dc.value, 0); > + tcg_gen_shli_i32(bit, bit, pos); > + tcg_gen_or_i32(val, val, bit); tcg_gen_setcondi_i32(dc.cond, bit, dc.value, 0); tcg_gen_deposit_i32(val, val, bit, pos, 1); > +static void rx_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) > +{ This is where you should initialize ctx->env. > +void restore_state_to_opc(CPURXState *env, TranslationBlock *tb, > + target_ulong *data) > +{ > + env->pc = data[0]; > + env->psw = data[1]; > + rx_cpu_unpack_psw(env, 1); You should not be doing this unpack and restore, as all psw bits are properly computed now. > +# Note that sz=3 overlaps SMOVF > +# RPMA.b > +RMPA 0111 1111 1000 1100 sz=0 > +# RPMA.w > +RMPA 0111 1111 1000 1101 sz=1 > +# RPMA.l > +RMPA 0111 1111 1000 1110 sz=2 # SMOVF # RMPA.<bwl> { SMOVF 0111 1111 1000 1111 RMPA 0111 1111 1000 11 sz:2 } > +# Note that sz=3 overlaps SMOVB > +# SSTR.b ... > +# Note that sz=3 overlaps SCMPU > +# SUNTIL.b ... > +# Note that sz=3 overlaps SMOVU > +# SWHILE.b Likewise. r~