Hi Jia. On Fri, Jun 8, 2012 at 4:00 AM, Jia Liu <pro...@gmail.com> wrote:
[...] >>> + case 0x0009: >>> + switch (op1) { >>> + case 0x03: /*l.div*/ >>> + LOG_DIS("l.div r%d, r%d, r%d\n", rd, ra, rb); >>> + { >>> + TCGv_i32 sr_ove; >>> + int lab = gen_new_label(); >>> + sr_ove = tcg_temp_new(); >>> + tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_OV); >>> + tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_CY); >>> + tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE); >>> + if (rb == 0) { >>> + tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab); >>> + gen_exception(dc, EXCP_RANGE); >>> + gen_set_label(lab); >>> + } else { >>> + if (ra == 0xffffffff && rb == 0x80000000) { >> >> Cannot do that: ra and rb are register numbers, not the values >> contained in these registers. >> Hence you need to generate code that will check these combinations of >> register values. >> > > case 0x03: /*l.div*/ > LOG_DIS("l.div r%d, r%d, r%d\n", rd, ra, rb); > { > int lab0 = gen_new_label(); > int lab1 = gen_new_label(); > int lab2 = gen_new_label(); > TCGv_i32 sr_ove = tcg_temp_new_i32(); > tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE); > if (rb == 0) { > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_OV); > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_CY); > tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab0); > gen_exception(dc, EXCP_RANGE); > gen_set_label(lab0); > } else { > tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_R[rb], > 0x00000000, lab1); > tcg_gen_brcondi_tl(TCG_COND_NE, cpu_R[ra], > 0xffffffff, lab2); > tcg_gen_brcondi_tl(TCG_COND_NE, cpu_R[rb], > 0x80000000, lab2); > gen_set_label(lab1); > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_OV); > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_CY); > tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab2); > gen_exception(dc, EXCP_RANGE); > gen_set_label(lab2); > tcg_gen_div_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]); You still divide by zero/overflow here in case SR_OVE is not set. What value goes to the rd register in case of division by 0? Maybe you should skip the division altogether? > } > tcg_temp_free_i32(sr_ove); > } > break; > > > is this right? Much better now (: >>> + tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, >>> lab); >>> + gen_exception(dc, EXCP_RANGE); >>> + gen_set_label(lab); >>> + } else { >>> + tcg_gen_div_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]); >>> + } >>> + } >>> + tcg_temp_free(sr_ove); >>> + } >>> + break; >>> + >>> + default: >>> + gen_illegal_exception(dc); >>> + break; >>> + } >>> + break; >>> + >>> + case 0x000a: >>> + switch (op1) { >>> + case 0x03: /*l.divu*/ >>> + LOG_DIS("l.divu r%d, r%d, r%d\n", rd, ra, rb); >>> + if (rb == 0) { >>> + TCGv_i32 sr_ove; >>> + int lab = gen_new_label(); >>> + sr_ove = tcg_temp_new(); >>> + tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_OV); >>> + tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_CY); >>> + tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE); >>> + tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab); >>> + gen_exception(dc, EXCP_RANGE); >>> + gen_set_label(lab); >>> + tcg_temp_free(sr_ove); >>> + } else if (rb != 0) { >> >> 'if (rb != 0)' and the following 'else' block are redundant here. >> >> I feel that I repeatedly fail to explain what's wrong with these div/divu >> implementations; could you please add testcases for l.div and l.divu >> that divide by the register other than r0 that contains 0 value? >> > > and > > case 0x03: /*l.divu*/ > LOG_DIS("l.divu r%d, r%d, r%d\n", rd, ra, rb); > { > int lab0 = gen_new_label(); > int lab1 = gen_new_label(); > TCGv_i32 sr_ove = tcg_temp_new(); > tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE); > if (rb == 0) { > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_OV); > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_CY); > tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab0); > gen_exception(dc, EXCP_RANGE); > gen_set_label(lab0); > } else { > tcg_gen_brcondi_tl(TCG_COND_NE, cpu_R[rb], > 0x00000000, lab1); > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_OV); > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_CY); > tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab1); > gen_exception(dc, EXCP_RANGE); > gen_set_label(lab1); > tcg_gen_divu_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]); Ditto. > } > tcg_temp_free_i32(sr_ove); > } > break; > > is this OK? > >>> + tcg_gen_divu_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]); >>> + } else { >>> + break; >>> + } >>> + break; >>> + >>> + default: >>> + gen_illegal_exception(dc); >>> + break; >>> + } >>> + break; >>> + >>> + case 0x000b: >>> + switch (op1) { >>> + case 0x03: /*l.mulu*/ >>> + LOG_DIS("l.mulu r%d, r%d, r%d\n", rd, ra, rb); >>> + if (rb != 0 && ra != 0) { >>> + TCGv result = tcg_temp_new(); >>> + TCGv high = tcg_temp_new(); >>> + TCGv tra = tcg_temp_new(); >>> + TCGv trb = tcg_temp_new(); >>> + TCGv_i32 sr_ove = tcg_temp_new(); >>> + int lab = gen_new_label(); >>> + int lab2 = gen_new_label(); >>> + tcg_gen_extu_i32_i64(tra, cpu_R[ra]); >>> + tcg_gen_extu_i32_i64(trb, cpu_R[rb]); >>> + tcg_gen_mul_tl(result, cpu_R[ra], cpu_R[rb]); >> >> You've calculated tra and trb but haven't uses them here. >> >>> + tcg_gen_shri_tl(high, result, (sizeof(target_ulong) * 8)); >> >> You probably meant result and high to be _i64. >> >>> + tcg_gen_brcondi_tl(TCG_COND_EQ, high, 0x0, lab); >>> + tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_CY); >>> + tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_OV); >>> + tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE); >>> + tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab2); >>> + gen_exception(dc, EXCP_RANGE); >>> + gen_set_label(lab); >>> + gen_set_label(lab2); >> >> No need to set two labels at one point. >> > > case 0x03: /*l.mulu*/ > LOG_DIS("l.mulu r%d, r%d, r%d\n", rd, ra, rb); > if (rb != 0 && ra != 0) { > TCGv_i64 result = tcg_temp_new_i64(); > TCGv_i64 tra = tcg_temp_new_i64(); > TCGv_i64 trb = tcg_temp_new_i64(); > TCGv high = tcg_temp_new(); > TCGv_i32 sr_ove = tcg_temp_new(); > int lab = gen_new_label(); > tcg_gen_extu_i32_i64(tra, cpu_R[ra]); > tcg_gen_extu_i32_i64(trb, cpu_R[rb]); > tcg_gen_mul_i64(result, tra, trb); > tcg_temp_free(tra); > tcg_temp_free(trb); > tcg_gen_shri_i64(high, result, (sizeof(target_ulong) * 8)); TARGET_LONG_BITS? > tcg_gen_brcondi_tl(TCG_COND_EQ, high, 0x00000000, lab); > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_CY); > tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_OV); Not an issue, but you could just tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_CY | SR_OV); > tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE); > tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab); > gen_exception(dc, EXCP_RANGE); > gen_set_label(lab); > tcg_temp_free(high); > tcg_gen_trunc_i64_tl(cpu_R[rd], result); > tcg_temp_free(result); > tcg_temp_free(sr_ove); > } else { > tcg_gen_movi_tl(cpu_R[rd], 0); > } > break; > > is it right this time? Looks good to me. >>> + tcg_gen_trunc_i64_tl(cpu_R[rd], result); >>> + tcg_temp_free(result); >>> + tcg_temp_free(high); >>> + tcg_temp_free(sr_ove); >>> + tcg_temp_free(tra); >>> + tcg_temp_free(trb); >>> + } else { >>> + tcg_gen_movi_tl(cpu_R[rd], 0); >>> + } >>> + break; >>> + >>> + default: >>> + gen_illegal_exception(dc); >>> + break; >>> + } >>> + break; >>> + >> >> [...] >> >>> + case 0x13: /*l.maci*/ >>> + LOG_DIS("l.maci %d, r%d, %d\n", I5, ra, I11); >>> + { >>> + TCGv t1 = tcg_temp_new(); >>> + TCGv t2 = tcg_temp_new(); >>> + TCGv ttmp = tcg_temp_new(); /* store machi maclo*/ >>> + ttmp = tcg_const_tl(tmp); >> >> Leaked previous ttmp temporary. >> >>> + tcg_gen_mul_tl(t0, cpu_R[ra], ttmp); >> >> What t0? >> >>> + tcg_gen_ext_i32_i64(t1, t0); >>> + tcg_gen_concat_i32_i64(t2, maclo, machi); >>> + tcg_gen_add_i64(t2, t2, t1); >>> + tcg_gen_trunc_i64_i32(maclo, t2); >>> + tcg_gen_shri_i64(t2, t2, 32); >>> + tcg_gen_trunc_i64_i32(machi, t2); >>> + tcg_temp_free(t0); >>> + tcg_temp_free(t1); >>> + tcg_temp_free(t2); >> >> Leaked ttmp temporary. > > > case 0x13: /*l.maci*/ > LOG_DIS("l.maci %d, r%d, %d\n", I5, ra, I11); > { > TCGv t1 = tcg_temp_new(); > TCGv t2 = tcg_temp_new(); > TCGv ttmp = tcg_const_tl(tmp); /* store machi maclo*/ > tcg_gen_mul_tl(ttmp, cpu_R[ra], ttmp); > tcg_gen_ext_i32_i64(t1, ttmp); > tcg_gen_concat_i32_i64(t2, maclo, machi); > tcg_gen_add_i64(t2, t2, t1); > tcg_gen_trunc_i64_i32(maclo, t2); > tcg_gen_shri_i64(t2, t2, 32); > tcg_gen_trunc_i64_i32(machi, t2); > tcg_temp_free(ttmp); > tcg_temp_free(t1); > tcg_temp_free(t2); > } > break; > > >> >>> + } >>> + break; >> >> [...] >> >>> + case 0x20: /*l.ld*/ >>> + LOG_DIS("l.ld r%d, r%d, %d\n", rd, ra, I16); >>> + tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(I16, 16)); >>> + tcg_gen_qemu_ld64(cpu_R[rd], t0, dc->mem_idx); >> >> Cannot ld64 into _tl register. >> > > case 0x20: /*l.ld*/ > LOG_DIS("l.ld r%d, r%d, %d\n", rd, ra, I16); > { > check_ob64s(dc); This throws an exception in the 32-bit registers case, right? > TCGv_i64 t0 = tcg_temp_new_i64(); > tcg_gen_addi_i64(t0, cpu_R[ra], sign_extend(I16, 16)); > tcg_gen_qemu_ld64(cpu_R[rd], t0, dc->mem_idx); > tcg_temp_free_i64(t0); > } > break; > [...] >>> +static void dec_mac(DisasContext *dc, CPUOpenRISCState *env, uint32_t insn) >>> +{ >>> + uint32_t op0; >>> + uint32_t ra, rb; >>> + op0 = field(insn, 0, 4); >>> + ra = field(insn, 16, 5); >>> + rb = field(insn, 11, 5); >>> + TCGv_i64 t0 = tcg_temp_new(); >>> + TCGv_i64 t1 = tcg_temp_new(); >>> + TCGv_i64 t2 = tcg_temp_new(); >>> + switch (op0) { >>> + case 0x0001: /*l.mac*/ >>> + LOG_DIS("l.mac r%d, r%d\n", ra, rb); >>> + { >>> + tcg_gen_mul_tl(t0, cpu_R[ra], cpu_R[rb]); >>> + tcg_gen_ext_i32_i64(t1, t0); >>> + tcg_gen_concat_i32_i64(t2, maclo, machi); >>> + tcg_gen_add_i64(t2, t2, t1); >>> + tcg_gen_trunc_i64_i32(maclo, t2); >>> + tcg_gen_shri_i64(t2, t2, 32); >>> + tcg_gen_trunc_i64_i32(machi, t2); >>> + tcg_temp_free(t0); >>> + tcg_temp_free(t1); >>> + tcg_temp_free(t2); >> >> Instead of freeing temporaries repeatedly in some cases (and >> leaking them in others) you could free them once after the switch. >> > > case 0x0001: /*l.mac*/ > LOG_DIS("l.mac r%d, r%d\n", ra, rb); > { > TCGv_i64 t0 = tcg_temp_new(); > TCGv_i64 t1 = tcg_temp_new(); > TCGv_i64 t2 = tcg_temp_new(); > tcg_gen_mul_tl(t0, cpu_R[ra], cpu_R[rb]); t0 should be _tl according to that. > tcg_gen_ext_i32_i64(t1, t0); > tcg_gen_concat_i32_i64(t2, maclo, machi); > tcg_gen_add_i64(t2, t2, t1); > tcg_gen_trunc_i64_i32(maclo, t2); > tcg_gen_shri_i64(t2, t2, 32); > tcg_gen_trunc_i64_i32(machi, t2); > tcg_temp_free(t0); > tcg_temp_free(t1); > tcg_temp_free(t2); > } > break; > > I think define use and free them separately make code more clear :-) Ok, at your preference. Looks like I completely missed temporaries vs local temporaries issue spotted by Richard, so expect more review rounds. Jia, is there kernel/rootfs image available for openrisc? -- Thanks. -- Max