This patch addresses PR middle-end/105135, a missed-optimization regression
affecting mainline.  I agree with Jakub's comment that the middle-end
optimizations are sound, reducing basic blocks and conditional expressions
at the tree-level, but requiring backend's to recognize conditional move
instructions/idioms if/when beneficial.  This patch introduces two new
define_insn_and_split in i386.md to recognize two additional cmove idioms.

The first recognizes (PR105135's):

int foo(int x, int y, int z)
{
  return ((x < y) << 5) + z;
}

and transforms (the 6 insns, 13 bytes):

        xorl    %eax, %eax      ;; 2 bytes
        cmpl    %esi, %edi      ;; 2 bytes
        setl    %al             ;; 3 bytes
        sall    $5, %eax        ;; 3 bytes
        addl    %edx, %eax      ;; 2 bytes
        ret                     ;; 1 byte

into (the 4 insns, 9 bytes):

        cmpl    %esi, %edi      ;; 2 bytes
        leal    32(%rdx), %eax  ;; 3 bytes
        cmovge  %edx, %eax      ;; 3 bytes
        ret                     ;; 1 byte


The second catches the very closely related (from PR 98865):

int bar(int x, int y, int z)
{
  return -(x < y) & z;
}

and transforms the (6 insns, 12 bytes):
        xorl    %eax, %eax      ;; 2 bytes
        cmpl    %esi, %edi      ;; 2 bytes
        setl    %al             ;; 3 bytes
        negl    %eax            ;; 2 bytes
        andl    %edx, %eax      ;; 2 bytes
        ret                     ;; 1 byte

into (4 insns, 8 bytes):
        xorl    %eax, %eax      ;; 2 bytes
        cmpl    %esi, %edi      ;; 2 bytes
        cmovl   %edx, %eax      ;; 3 bytes
        ret                     ;; 1 byte

They both have in common that they recognize a setcc followed by two
instructions, and replace them with one instruction and a cmov, which
is typically a performance win, but always a size win.  Fine tuning
these decisions based on microarchitecture is much easier in the
backend, than the middle-end.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2022-04-19  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        PR target/105135
        * config/i386/i386.md (*xor_cmov<mode>): Transform setcc, negate
        then and into mov $0, followed by a cmov.
        (*lea_cmov<mode>): Transform setcc, ashift const then plus into
        lea followed by cmov.

gcc/testsuite/ChangeLog
        PR target/105135
        * gcc.target/i386/cmov10.c: New test case.
        * gcc.target/i386/cmov11.c: New test case.
        * gcc.target/i386/pr105135.c: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c74edd1..5887688 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -20751,6 +20751,52 @@
   operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
 })
 
+;; Transform setcc;negate;and into mov_zero;cmov
+(define_insn_and_split "*xor_cmov<mode>"
+  [(set (match_operand:SWI248 0 "register_operand")
+       (and:SWI248
+         (neg:SWI248 (match_operator:SWI248 1 "ix86_comparison_operator"
+                       [(match_operand 2 "flags_reg_operand")
+                        (const_int 0)]))
+         (match_operand:SWI248 3 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_CMOVE && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 4) (const_int 0))
+   (set (match_dup 0)
+       (if_then_else:SWI248 (match_op_dup 1 [(match_dup 2) (const_int 0)])
+                            (match_dup 3) (match_dup 4)))]
+{
+  operands[4] = gen_reg_rtx (<MODE>mode);
+})
+
+;; Transform setcc;ashift_const;plus into lea_const;cmov
+(define_insn_and_split "*lea_cmov<mode>"
+  [(set (match_operand:SWI 0 "register_operand")
+       (plus:SWI (ashift:SWI (match_operator:SWI 1 "ix86_comparison_operator"
+                               [(match_operand 2 "flags_reg_operand")
+                                (const_int 0)])
+                             (match_operand:SWI 3 "const_int_operand"))
+                 (match_operand:SWI 4 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_CMOVE && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 5) (plus:<LEAMODE> (match_dup 4) (match_dup 6)))
+   (set (match_dup 0)
+       (if_then_else:<LEAMODE> (match_op_dup 1 [(match_dup 2) (const_int 0)])
+                               (match_dup 5) (match_dup 4)))]
+{
+  operands[5] = gen_reg_rtx (<LEAMODE>mode);
+  operands[6] = GEN_INT (1 << INTVAL (operands[3]));
+  if (<MODE>mode != <LEAMODE>mode)
+    {
+      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+      operands[4] = gen_lowpart (<LEAMODE>mode, operands[4]);
+    }
+})
+
 (define_insn "movhf_mask"
   [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
        (unspec:HF
diff --git a/gcc/testsuite/gcc.target/i386/cmov10.c 
b/gcc/testsuite/gcc.target/i386/cmov10.c
new file mode 100644
index 0000000..c04fdd8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/cmov10.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+int foo(int x, int y, int z)
+{
+  return ((x < y) << 5) + z;
+}
+
+/* { dg-final { scan-assembler "cmovge" } } */
diff --git a/gcc/testsuite/gcc.target/i386/cmov11.c 
b/gcc/testsuite/gcc.target/i386/cmov11.c
new file mode 100644
index 0000000..65f2bfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/cmov11.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+int foo(int x, int y, int z)
+{
+  return -(x < y) & z;
+}
+
+/* { dg-final { scan-assembler "cmovl" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr105135.c 
b/gcc/testsuite/gcc.target/i386/pr105135.c
new file mode 100644
index 0000000..3ed3c9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105135.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+char to_lower_1(const char c) { return c + ((c >= 'A' && c <= 'Z') * 32); }
+
+char to_lower_2(const char c) { return c + (((c >= 'A') & (c <= 'Z')) * 32); }
+
+char to_lower_3(const char c) {
+    if (c >= 'A' && c <= 'Z') {
+        return c + 32;
+    }
+    return c;
+}
+
+/* { dg-final { scan-assembler-not "setbe" } } */
+/* { dg-final { scan-assembler-not "sall" } } */

Reply via email to