On 02/03/2016 09:06, Hervé Poussineau wrote: >> > > I just reconfirmed that > d6a2914984c89fa0a3125b9842e0cbf68de79a3d~1 + > 88c73d16ad1b6c22a2ab082064d0d521f756296a works, > while > d6a2914984c89fa0a3125b9842e0cbf68de79a3d + > 88c73d16ad1b6c22a2ab082064d0d521f756296a bugchecks. > > a5af12871fd4601c44f08d9e49131e9ca13ef102 that you tested is after > d6a2914984c89fa0a3125b9842e0cbf68de79a3d which broke Windows 9x, that's > why you're seeing it broken. > > It also has been a long time that Windows 9x doesn't work with > -enable-kvm. Maybe even always...
This was tricky because the faulting instruction is some kind of generated thunk. In the good QEMU it is like: 0x000000008030a4da: mov $0x16f,%ax 0x000000008030a4dd: mov %ax,%es 0x000000008030a4df: movzwl %cx,%ecx 0x000000008030a4e3: addr32 mov %es:-0x400aa28c(%ecx),%edx 0x000000008030a4ec: ljmpl $0x167,$0xbff71903 In the bad one, the address is %es:0(%ecx) and everything goes south from there. Therefore I copied the old code, changed to generate the effective address in a new temp cpu_A1, then I added a helper that gets the two effective addresses and asserts if they mismatch. Sure enough it fired on an addr16 mov %gs:(%bx),%eax The issue is that the /* ADDSEG will only be false in 16-bit mode for LEA. */ comment is false when in 32-bit mode but with an addr16 prefix. I still have to forward port it, and test on a newer commit, but the attached patch can boot Windows 98. Paolo diff --git a/target-i386/translate.c b/target-i386/translate.c index 282f4a1..0a2d091 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -61,6 +61,7 @@ /* global register indexes */ static TCGv_ptr cpu_env; static TCGv cpu_A0; +static TCGv cpu_A1; static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; @@ -419,6 +420,18 @@ static inline void gen_op_add_reg_T0(TCGMemOp size, int reg) gen_op_mov_reg_v(size, reg, cpu_tmp0); } +static inline void gen_op_addl_A1_seg(DisasContext *s, int reg) +{ + tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base)); + if (CODE64(s)) { + tcg_gen_ext32u_tl(cpu_A1, cpu_A1); + tcg_gen_add_tl(cpu_A1, cpu_A1, cpu_tmp0); + } else { + tcg_gen_add_tl(cpu_A1, cpu_A1, cpu_tmp0); + tcg_gen_ext32u_tl(cpu_A1, cpu_A1); + } +} + static inline void gen_op_addl_A0_seg(DisasContext *s, int reg) { tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base)); @@ -485,13 +498,13 @@ static void gen_lea_v_seg(DisasContext *s, TCGv a0, int def_seg, int ovr_seg) break; case MO_16: /* 16 bit address */ - if (ovr_seg < 0) { - ovr_seg = def_seg; - } tcg_gen_ext16u_tl(cpu_A0, a0); - /* ADDSEG will only be false in 16-bit mode for LEA. */ - if (!s->addseg) { - return; + if (ovr_seg < 0) { + if (s->addseg) { + ovr_seg = def_seg; + } else { + return; + } } a0 = cpu_A0; break; @@ -1838,12 +1851,198 @@ static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c) } } +static void gen_lea_modrm_old(CPUX86State *env, DisasContext *s, int modrm) +{ + target_long disp; + int havesib; + int base; + int index; + int scale; + int mod, rm, code, override, must_add_seg; + TCGv sum; + + override = s->override; + must_add_seg = s->addseg; + if (override >= 0) + must_add_seg = 1; + mod = (modrm >> 6) & 3; + rm = modrm & 7; + + switch (s->aflag) { + case MO_64: + case MO_32: + havesib = 0; + base = rm; + index = -1; + scale = 0; + + if (base == 4) { + havesib = 1; + code = cpu_ldub_code(env, s->pc++); + scale = (code >> 6) & 3; + index = ((code >> 3) & 7) | REX_X(s); + if (index == 4) { + index = -1; /* no index */ + } + base = (code & 7); + } + base |= REX_B(s); + + switch (mod) { + case 0: + if ((base & 7) == 5) { + base = -1; + disp = (int32_t)cpu_ldl_code(env, s->pc); + s->pc += 4; + if (CODE64(s) && !havesib) { + disp += s->pc + s->rip_offset; + } + } else { + disp = 0; + } + break; + case 1: + disp = (int8_t)cpu_ldub_code(env, s->pc++); + break; + default: + case 2: + disp = (int32_t)cpu_ldl_code(env, s->pc); + s->pc += 4; + break; + } + + /* For correct popl handling with esp. */ + if (base == R_ESP && s->popl_esp_hack) { + disp += s->popl_esp_hack; + } + + /* Compute the address, with a minimum number of TCG ops. */ + TCGV_UNUSED(sum); + if (index >= 0) { + if (scale == 0) { + sum = cpu_regs[index]; + } else { + tcg_gen_shli_tl(cpu_A1, cpu_regs[index], scale); + sum = cpu_A1; + } + if (base >= 0) { + tcg_gen_add_tl(cpu_A1, sum, cpu_regs[base]); + sum = cpu_A1; + } + } else if (base >= 0) { + sum = cpu_regs[base]; + } + if (TCGV_IS_UNUSED(sum)) { + tcg_gen_movi_tl(cpu_A1, disp); + } else { + tcg_gen_addi_tl(cpu_A1, sum, disp); + } + + if (must_add_seg) { + if (override < 0) { + if (base == R_EBP || base == R_ESP) { + override = R_SS; + } else { + override = R_DS; + } + } + + tcg_gen_ld_tl(cpu_tmp0, cpu_env, + offsetof(CPUX86State, segs[override].base)); + if (CODE64(s)) { + if (s->aflag == MO_32) { + tcg_gen_ext32u_tl(cpu_A1, cpu_A1); + } + tcg_gen_add_tl(cpu_A1, cpu_A1, cpu_tmp0); + return; + } + + tcg_gen_add_tl(cpu_A1, cpu_A1, cpu_tmp0); + } + + if (s->aflag == MO_32) { + tcg_gen_ext32u_tl(cpu_A1, cpu_A1); + } + break; + + case MO_16: + switch (mod) { + case 0: + if (rm == 6) { + disp = cpu_lduw_code(env, s->pc); + s->pc += 2; + tcg_gen_movi_tl(cpu_A1, disp); + rm = 0; /* avoid SS override */ + goto no_rm; + } else { + disp = 0; + } + break; + case 1: + disp = (int8_t)cpu_ldub_code(env, s->pc++); + break; + default: + case 2: + disp = (int16_t)cpu_lduw_code(env, s->pc); + s->pc += 2; + break; + } + + sum = cpu_A1; + switch (rm) { + case 0: + tcg_gen_add_tl(cpu_A1, cpu_regs[R_EBX], cpu_regs[R_ESI]); + break; + case 1: + tcg_gen_add_tl(cpu_A1, cpu_regs[R_EBX], cpu_regs[R_EDI]); + break; + case 2: + tcg_gen_add_tl(cpu_A1, cpu_regs[R_EBP], cpu_regs[R_ESI]); + break; + case 3: + tcg_gen_add_tl(cpu_A1, cpu_regs[R_EBP], cpu_regs[R_EDI]); + break; + case 4: + sum = cpu_regs[R_ESI]; + break; + case 5: + sum = cpu_regs[R_EDI]; + break; + case 6: + sum = cpu_regs[R_EBP]; + break; + default: + case 7: + sum = cpu_regs[R_EBX]; + break; + } + tcg_gen_addi_tl(cpu_A1, sum, disp); + tcg_gen_ext16u_tl(cpu_A1, cpu_A1); + no_rm: + if (must_add_seg) { + if (override < 0) { + if (rm == 2 || rm == 3 || rm == 6) { + override = R_SS; + } else { + override = R_DS; + } + } + gen_op_addl_A1_seg(s, override); + } + break; + + default: + tcg_abort(); + } +} + static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm) { target_long disp; int havesib, base, index, scale; int mod, rm, code, def_seg, ovr_seg; TCGv sum; + target_long pc = s->pc, save_pc; def_seg = R_DS; ovr_seg = s->override; @@ -1985,6 +2184,11 @@ static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm) } gen_lea_v_seg(s, sum, def_seg, ovr_seg); + save_pc = s->pc; + s->pc = pc; + gen_lea_modrm_old(env, s, modrm); + assert(s->pc == save_pc); + gen_helper_compare(cpu_A0, cpu_A1); } static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm) @@ -7858,6 +8062,7 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) cpu_T[0] = tcg_temp_new(); cpu_T[1] = tcg_temp_new(); cpu_A0 = tcg_temp_new(); + cpu_A1 = tcg_temp_new(); cpu_tmp0 = tcg_temp_new(); cpu_tmp1_i64 = tcg_temp_new_i64();