We used to expand memcmp to "repz cmpsb" via cmpstrnsi. It was changed by
commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f Author: Nick Clifton <ni...@redhat.com> Date: Fri Aug 12 16:26:11 2011 +0000 builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern. * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern. * doc/md.texi (cmpstrn): Note that the comparison stops if both fetched bytes are zero. (cmpstr): Likewise. (cmpmem): Note that the comparison does not stop if both of the fetched bytes are zero. Duplicate the cmpstrn pattern for cmpmem and expand cmpmem to "repz cmpsb" for -minline-all-stringops. The only difference is that the length argument of cmpmem is guaranteed to be less than or equal to lengths of 2 memory areas. gcc/ PR target/95151 * config/i386/i386-expand.c (ix86_expand_cmpstrn_or_cmpmem): New function. * config/i386/i386-protos.h (ix86_expand_cmpstrn_or_cmpmem): New prototype. * config/i386/i386.md (cmpmemsi): New pattern. gcc/testsuite/ PR target/95151 * gcc.target/i386/pr95151-1.c: New test. * gcc.target/i386/pr95151-2.c: Likewise. * gcc.target/i386/pr95151-3.c: Likewise. * gcc.target/i386/pr95151-4.c: Likewise. --- gcc/config/i386/i386-expand.c | 84 +++++++++++++++++++++++ gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.md | 84 ++++++----------------- gcc/testsuite/gcc.target/i386/pr95151-1.c | 17 +++++ gcc/testsuite/gcc.target/i386/pr95151-2.c | 10 +++ gcc/testsuite/gcc.target/i386/pr95151-3.c | 18 +++++ gcc/testsuite/gcc.target/i386/pr95151-4.c | 11 +++ 7 files changed, 162 insertions(+), 63 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-4.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 270585decb2..9e27564037b 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -7656,6 +7656,90 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, return true; } +/* Expand cmpstrn or memcmp. */ + +bool +ix86_expand_cmpstrn_or_cmpmem (rtx result, rtx src1, rtx src2, + rtx length, rtx align, bool is_cmpstrn) +{ + if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS) + return false; + + /* Can't use this if the user has appropriated ecx, esi or edi. */ + if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) + return false; + + if (is_cmpstrn) + { + /* For strncmp, length is the maximum length, which can be larger + than actual string lengths. We can expand the cmpstrn pattern + to "repz cmpsb" only if one of the strings is a constant so + that expand_builtin_strncmp() can write the length argument to + be the minimum of the const string length and the actual length + argument. Otherwise, "repz cmpsb" may pass the 0 byte. */ + tree t1 = MEM_EXPR (src1); + tree t2 = MEM_EXPR (src2); + if (!((t1 && TREE_CODE (t1) == MEM_REF + && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR + && (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0)) + == STRING_CST)) + || (t2 && TREE_CODE (t2) == MEM_REF + && TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR + && (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0)) + == STRING_CST)))) + return false; + } + else + { + /* Expand memcmp to "repz cmpsb" only for -minline-all-stringops + since "repz cmpsb" can be much slower than memcmp function + implemented with vector instructions, see + + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052 + */ + if (!TARGET_INLINE_ALL_STRINGOPS) + return false; + } + + rtx addr1 = copy_addr_to_reg (XEXP (src1, 0)); + rtx addr2 = copy_addr_to_reg (XEXP (src2, 0)); + if (addr1 != XEXP (src1, 0)) + src1 = replace_equiv_address_nv (src1, addr1); + if (addr2 != XEXP (src2, 0)) + src2 = replace_equiv_address_nv (src2, addr2); + + /* NB: Make a copy of the data length to avoid changing the original + data length by cmpstrnqi patterns. */ + length = ix86_zero_extend_to_Pmode (length); + rtx lengthreg = gen_reg_rtx (Pmode); + emit_move_insn (lengthreg, length); + + /* If we are testing strict equality, we can use known alignment to + good advantage. This may be possible with combine, particularly + once cc0 is dead. */ + if (CONST_INT_P (length)) + { + if (length == const0_rtx) + { + emit_move_insn (result, const0_rtx); + return true; + } + emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, lengthreg, align, + src1, src2)); + } + else + { + emit_insn (gen_cmp_1 (Pmode, lengthreg, lengthreg)); + emit_insn (gen_cmpstrnqi_1 (addr1, addr2, lengthreg, align, + src1, src2)); + } + + rtx out = gen_lowpart (QImode, result); + emit_insn (gen_cmpintqi (out)); + emit_move_insn (result, gen_rtx_SIGN_EXTEND (SImode, out)); + + return true; +} /* Expand the appropriate insns for doing strlen if not just doing repnz; scasb diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 39fcaa0ad5f..238aa650b61 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -71,6 +71,7 @@ extern int avx_vperm2f128_parallel (rtx par, machine_mode mode); extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx); extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, bool); +extern bool ix86_expand_cmpstrn_or_cmpmem (rtx, rtx, rtx, rtx, rtx, bool); extern bool constant_address_p (rtx); extern bool legitimate_pic_operand_p (rtx); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c2de1bdaf42..dec8dd17168 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -17777,6 +17777,22 @@ (define_insn "*rep_stosqi" (const_string "*"))) (set_attr "mode" "QI")]) +(define_expand "cmpmemsi" + [(set (match_operand:SI 0 "register_operand" "") + (compare:SI (match_operand:BLK 1 "memory_operand" "") + (match_operand:BLK 2 "memory_operand" "") ) ) + (use (match_operand 3 "general_operand")) + (use (match_operand 4 "immediate_operand"))] + "" +{ + if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1], + operands[2], operands[3], + operands[4], false)) + DONE; + else + FAIL; +}) + (define_expand "cmpstrnsi" [(set (match_operand:SI 0 "register_operand") (compare:SI (match_operand:BLK 1 "general_operand") @@ -17785,70 +17801,12 @@ (define_expand "cmpstrnsi" (use (match_operand 4 "immediate_operand"))] "" { - rtx addr1, addr2, countreg, align, out; - - if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS) - FAIL; - - /* Can't use this if the user has appropriated ecx, esi or edi. */ - if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - FAIL; - - /* One of the strings must be a constant. If so, expand_builtin_strncmp() - will have rewritten the length arg to be the minimum of the const string - length and the actual length arg. If both strings are the same and - shorter than the length arg, repz cmpsb will not stop at the 0 byte and - will incorrectly base the results on chars past the 0 byte. */ - tree t1 = MEM_EXPR (operands[1]); - tree t2 = MEM_EXPR (operands[2]); - if (!((t1 && TREE_CODE (t1) == MEM_REF - && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0)) == STRING_CST) - || (t2 && TREE_CODE (t2) == MEM_REF - && TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0)) == STRING_CST))) - FAIL; - - addr1 = copy_addr_to_reg (XEXP (operands[1], 0)); - addr2 = copy_addr_to_reg (XEXP (operands[2], 0)); - if (addr1 != XEXP (operands[1], 0)) - operands[1] = replace_equiv_address_nv (operands[1], addr1); - if (addr2 != XEXP (operands[2], 0)) - operands[2] = replace_equiv_address_nv (operands[2], addr2); - - /* NB: Make a copy of the data length to avoid changing the original - data length by cmpstrnqi patterns. */ - rtx count = ix86_zero_extend_to_Pmode (operands[3]); - countreg = gen_reg_rtx (Pmode); - emit_move_insn (countreg, count); - - /* %%% Iff we are testing strict equality, we can use known alignment - to good advantage. This may be possible with combine, particularly - once cc0 is dead. */ - align = operands[4]; - - if (CONST_INT_P (operands[3])) - { - if (operands[3] == const0_rtx) - { - emit_move_insn (operands[0], const0_rtx); - DONE; - } - emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align, - operands[1], operands[2])); - } + if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1], + operands[2], operands[3], + operands[4], true)) + DONE; else - { - emit_insn (gen_cmp_1 (Pmode, countreg, countreg)); - emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align, - operands[1], operands[2])); - } - - out = gen_lowpart (QImode, operands[0]); - emit_insn (gen_cmpintqi (out)); - emit_move_insn (operands[0], gen_rtx_SIGN_EXTEND (SImode, out)); - - DONE; + FAIL; }) ;; Produce a tri-state integer (-1, 0, 1) from condition codes. diff --git a/gcc/testsuite/gcc.target/i386/pr95151-1.c b/gcc/testsuite/gcc.target/i386/pr95151-1.c new file mode 100644 index 00000000000..54a7510042a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95151-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -minline-all-stringops" } */ + +struct foo +{ + char array[257]; +}; + +extern struct foo x; + +int +func (struct foo i) +{ + return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2; +} + +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr95151-2.c b/gcc/testsuite/gcc.target/i386/pr95151-2.c new file mode 100644 index 00000000000..8f9d8ee1bf4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95151-2.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -minline-all-stringops" } */ + +int +func (void *d, void *s, unsigned int l) +{ + return __builtin_memcmp (d, s, l) ? 1 : 2; +} + +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr95151-3.c b/gcc/testsuite/gcc.target/i386/pr95151-3.c new file mode 100644 index 00000000000..14cbdec4c93 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95151-3.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-inline-all-stringops" } */ + +struct foo +{ + char array[257]; +}; + +extern struct foo x; + +int +func (struct foo i) +{ + return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2; +} + +/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */ +/* { dg-final { scan-assembler-not "cmpsb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr95151-4.c b/gcc/testsuite/gcc.target/i386/pr95151-4.c new file mode 100644 index 00000000000..c93b2b60c5b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95151-4.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-inline-all-stringops" } */ + +int +func (void *d, void *s, unsigned int l) +{ + return __builtin_memcmp (d, s, l) ? 1 : 2; +} + +/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */ +/* { dg-final { scan-assembler-not "cmpsb" } } */ -- 2.26.2