This patch addresses PR middle-end/105135, a missed-optimization regression affecting mainline. I agree with Jakub's comment that the middle-end optimizations are sound, reducing basic blocks and conditional expressions at the tree-level, but requiring backend's to recognize conditional move instructions/idioms if/when beneficial. This patch introduces two new define_insn_and_split in i386.md to recognize two additional cmove idioms.
The first recognizes (PR105135's): int foo(int x, int y, int z) { return ((x < y) << 5) + z; } and transforms (the 6 insns, 13 bytes): xorl %eax, %eax ;; 2 bytes cmpl %esi, %edi ;; 2 bytes setl %al ;; 3 bytes sall $5, %eax ;; 3 bytes addl %edx, %eax ;; 2 bytes ret ;; 1 byte into (the 4 insns, 9 bytes): cmpl %esi, %edi ;; 2 bytes leal 32(%rdx), %eax ;; 3 bytes cmovge %edx, %eax ;; 3 bytes ret ;; 1 byte The second catches the very closely related (from PR 98865): int bar(int x, int y, int z) { return -(x < y) & z; } and transforms the (6 insns, 12 bytes): xorl %eax, %eax ;; 2 bytes cmpl %esi, %edi ;; 2 bytes setl %al ;; 3 bytes negl %eax ;; 2 bytes andl %edx, %eax ;; 2 bytes ret ;; 1 byte into (4 insns, 8 bytes): xorl %eax, %eax ;; 2 bytes cmpl %esi, %edi ;; 2 bytes cmovl %edx, %eax ;; 3 bytes ret ;; 1 byte They both have in common that they recognize a setcc followed by two instructions, and replace them with one instruction and a cmov, which is typically a performance win, but always a size win. Fine tuning these decisions based on microarchitecture is much easier in the backend, than the middle-end. This patch has been tested on x86_64-pc-linux-gnu with make bootstrap and make -k check, both with and without --target_board=unix{-m32}, with no new failures. Ok for mainline? 2022-04-19 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog PR target/105135 * config/i386/i386.md (*xor_cmov<mode>): Transform setcc, negate then and into mov $0, followed by a cmov. (*lea_cmov<mode>): Transform setcc, ashift const then plus into lea followed by cmov. gcc/testsuite/ChangeLog PR target/105135 * gcc.target/i386/cmov10.c: New test case. * gcc.target/i386/cmov11.c: New test case. * gcc.target/i386/pr105135.c: New test case. Thanks in advance, Roger --
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c74edd1..5887688 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20751,6 +20751,52 @@ operands[9] = replace_rtx (operands[6], operands[0], operands[1], true); }) +;; Transform setcc;negate;and into mov_zero;cmov +(define_insn_and_split "*xor_cmov<mode>" + [(set (match_operand:SWI248 0 "register_operand") + (and:SWI248 + (neg:SWI248 (match_operator:SWI248 1 "ix86_comparison_operator" + [(match_operand 2 "flags_reg_operand") + (const_int 0)])) + (match_operand:SWI248 3 "register_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 4) (const_int 0)) + (set (match_dup 0) + (if_then_else:SWI248 (match_op_dup 1 [(match_dup 2) (const_int 0)]) + (match_dup 3) (match_dup 4)))] +{ + operands[4] = gen_reg_rtx (<MODE>mode); +}) + +;; Transform setcc;ashift_const;plus into lea_const;cmov +(define_insn_and_split "*lea_cmov<mode>" + [(set (match_operand:SWI 0 "register_operand") + (plus:SWI (ashift:SWI (match_operator:SWI 1 "ix86_comparison_operator" + [(match_operand 2 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI 3 "const_int_operand")) + (match_operand:SWI 4 "register_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 5) (plus:<LEAMODE> (match_dup 4) (match_dup 6))) + (set (match_dup 0) + (if_then_else:<LEAMODE> (match_op_dup 1 [(match_dup 2) (const_int 0)]) + (match_dup 5) (match_dup 4)))] +{ + operands[5] = gen_reg_rtx (<LEAMODE>mode); + operands[6] = GEN_INT (1 << INTVAL (operands[3])); + if (<MODE>mode != <LEAMODE>mode) + { + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); + operands[4] = gen_lowpart (<LEAMODE>mode, operands[4]); + } +}) + (define_insn "movhf_mask" [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v") (unspec:HF diff --git a/gcc/testsuite/gcc.target/i386/cmov10.c b/gcc/testsuite/gcc.target/i386/cmov10.c new file mode 100644 index 0000000..c04fdd8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cmov10.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +int foo(int x, int y, int z) +{ + return ((x < y) << 5) + z; +} + +/* { dg-final { scan-assembler "cmovge" } } */ diff --git a/gcc/testsuite/gcc.target/i386/cmov11.c b/gcc/testsuite/gcc.target/i386/cmov11.c new file mode 100644 index 0000000..65f2bfc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cmov11.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +int foo(int x, int y, int z) +{ + return -(x < y) & z; +} + +/* { dg-final { scan-assembler "cmovl" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr105135.c b/gcc/testsuite/gcc.target/i386/pr105135.c new file mode 100644 index 0000000..3ed3c9e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105135.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +char to_lower_1(const char c) { return c + ((c >= 'A' && c <= 'Z') * 32); } + +char to_lower_2(const char c) { return c + (((c >= 'A') & (c <= 'Z')) * 32); } + +char to_lower_3(const char c) { + if (c >= 'A' && c <= 'Z') { + return c + 32; + } + return c; +} + +/* { dg-final { scan-assembler-not "setbe" } } */ +/* { dg-final { scan-assembler-not "sall" } } */