From: Andi Kleen <a...@linux.intel.com> In my tests the optimized glibc out of line strcmp is always faster than using inline rep ; cmpsb, even for small strings. The Intel optimization manual also recommends to not use it. So remove the cmpstrnsi instruction.
Tested on Sandy Bridge, Westmere Intel CPUs. gcc/: 2014-09-27 Andi Kleen <a...@linux.intel.com> * config/i386/i386.md (cmpstrnsi, cmpintqi): Remove expanders. --- gcc/config/i386/i386.md | 85 ------------------------------------------------- 1 file changed, 85 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 98df8e1..1d2f1a5 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16097,91 +16097,6 @@ (const_string "*"))) (set_attr "mode" "QI")]) -(define_expand "cmpstrnsi" - [(set (match_operand:SI 0 "register_operand") - (compare:SI (match_operand:BLK 1 "general_operand") - (match_operand:BLK 2 "general_operand"))) - (use (match_operand 3 "general_operand")) - (use (match_operand 4 "immediate_operand"))] - "" -{ - rtx addr1, addr2, out, outlow, count, countreg, align; - - if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS) - FAIL; - - /* Can't use this if the user has appropriated ecx, esi or edi. */ - if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - FAIL; - - out = operands[0]; - if (!REG_P (out)) - out = gen_reg_rtx (SImode); - - addr1 = copy_addr_to_reg (XEXP (operands[1], 0)); - addr2 = copy_addr_to_reg (XEXP (operands[2], 0)); - if (addr1 != XEXP (operands[1], 0)) - operands[1] = replace_equiv_address_nv (operands[1], addr1); - if (addr2 != XEXP (operands[2], 0)) - operands[2] = replace_equiv_address_nv (operands[2], addr2); - - count = operands[3]; - countreg = ix86_zero_extend_to_Pmode (count); - - /* %%% Iff we are testing strict equality, we can use known alignment - to good advantage. This may be possible with combine, particularly - once cc0 is dead. */ - align = operands[4]; - - if (CONST_INT_P (count)) - { - if (INTVAL (count) == 0) - { - emit_move_insn (operands[0], const0_rtx); - DONE; - } - emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align, - operands[1], operands[2])); - } - else - { - rtx (*gen_cmp) (rtx, rtx); - - gen_cmp = (TARGET_64BIT - ? gen_cmpdi_1 : gen_cmpsi_1); - - emit_insn (gen_cmp (countreg, countreg)); - emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align, - operands[1], operands[2])); - } - - outlow = gen_lowpart (QImode, out); - emit_insn (gen_cmpintqi (outlow)); - emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow)); - - if (operands[0] != out) - emit_move_insn (operands[0], out); - - DONE; -}) - -;; Produce a tri-state integer (-1, 0, 1) from condition codes. - -(define_expand "cmpintqi" - [(set (match_dup 1) - (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) - (set (match_dup 2) - (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) - (parallel [(set (match_operand:QI 0 "register_operand") - (minus:QI (match_dup 1) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "" -{ - operands[1] = gen_reg_rtx (QImode); - operands[2] = gen_reg_rtx (QImode); -}) - ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is ;; zero. Emit extra code to make sure that a zero-length compare is EQ. -- 2.1.1