We weren't computing flags for lzcnt at all. Signed-off-by: Richard Henderson <r...@twiddle.net> --- target-i386/helper.h | 5 ++--- target-i386/int_helper.c | 11 +++-------- target-i386/translate.c | 50 ++++++++++++++++++++++++++++++++---------------- 3 files changed, 38 insertions(+), 28 deletions(-)
diff --git a/target-i386/helper.h b/target-i386/helper.h index ddf6936..2b3e2ae 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -194,9 +194,8 @@ DEF_HELPER_3(frstor, void, env, tl, int) DEF_HELPER_3(fxsave, void, env, tl, int) DEF_HELPER_3(fxrstor, void, env, tl, int) -DEF_HELPER_FLAGS_1(bsf, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(bsr, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_2(lzcnt, TCG_CALL_NO_RWG_SE, tl, tl, int) +DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c index 7bec4eb..3b56075 100644 --- a/target-i386/int_helper.c +++ b/target-i386/int_helper.c @@ -456,19 +456,14 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0) #endif /* bit operations */ -target_ulong helper_bsf(target_ulong t0) +target_ulong helper_ctz(target_ulong t0) { return ctztl(t0); } -target_ulong helper_lzcnt(target_ulong t0, int wordsize) +target_ulong helper_clz(target_ulong t0) { - return clztl(t0) - (TARGET_LONG_BITS - wordsize); -} - -target_ulong helper_bsr(target_ulong t0) -{ - return clztl(t0) ^ (TARGET_LONG_BITS - 1); + return clztl(t0); } target_ulong helper_pdep(target_ulong src, target_ulong mask) diff --git a/target-i386/translate.c b/target-i386/translate.c index a9bc21d..c049e42 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -7151,33 +7151,49 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_movi_tl(cpu_cc_dst, 0); } break; - case 0x1bc: /* bsf */ - case 0x1bd: /* bsr */ + case 0x1bc: /* bsf / tzcnt */ + case 0x1bd: /* bsr / lzcnt */ ot = dflag + OT_WORD; modrm = cpu_ldub_code(env, s->pc++); reg = ((modrm >> 3) & 7) | rex_r; gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); gen_extu(ot, cpu_T[0]); - if ((b & 1) && (prefixes & PREFIX_REPZ) && - (s->cpuid_ext3_features & CPUID_EXT3_ABM)) { - switch (ot) { - case OT_WORD: - gen_helper_lzcnt(cpu_T[0], cpu_T[0], tcg_const_i32(16)); - break; - case OT_LONG: - gen_helper_lzcnt(cpu_T[0], cpu_T[0], tcg_const_i32(32)); - break; - case OT_QUAD: - gen_helper_lzcnt(cpu_T[0], cpu_T[0], tcg_const_i32(64)); - break; - } + + /* Note that lzcnt and tzcnt are in different extensions. */ + if ((prefixes & PREFIX_REPZ) + && (b & 1 + ? s->cpuid_ext3_features & CPUID_EXT3_ABM + : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) { + int size = 8 << ot; + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + if (b & 1) { + /* For lzcnt, reduce the target_ulong result by the + number of zeros that we expect to find at the top. */ + gen_helper_clz(cpu_T[0], cpu_T[0]); + tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size); + } else { + /* For tzcnt, a zero input must return the operand size: + force all bits outside the operand size to 1. */ + target_ulong mask = (target_ulong)-2 << (size - 1); + tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask); + gen_helper_ctz(cpu_T[0], cpu_T[0]); + } + /* For lzcnt/tzcnt, C and Z bits are defined and are + related to the result. */ + gen_op_update1_cc(); + set_cc_op(s, CC_OP_BMILGB + ot); } else { + /* For bsr/bsf, only the Z bit is defined and its related + to the input and not the result. */ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); set_cc_op(s, CC_OP_LOGICB + ot); if (b & 1) { - gen_helper_bsr(cpu_T[0], cpu_T[0]); + /* For bsr, return the bit index of the first 1 bit, + not the count of leading zeros. */ + gen_helper_clz(cpu_T[0], cpu_T[0]); + tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1); } else { - gen_helper_bsf(cpu_T[0], cpu_T[0]); + gen_helper_ctz(cpu_T[0], cpu_T[0]); } } gen_op_mov_reg_T0(ot, reg); -- 1.7.11.7