This patch addresses a (minor) missed-optimization regression revealed
by Richard Biener's example/variant in comment #1 of PR target/92578.

int foo(int moves, int movecnt, int komove) {
    int newcnt = movecnt;
    if (moves == komove)
        newcnt -= 2;
    return newcnt;
}

Comparing code generation on godbolt.org shows an interesting evolution
over time, as changes in register allocation affect the cmove sequence.

GCC 4.1.2 (4 instructions, suboptimal mov after cmov).
        leal    -2(%rsi), %eax
        cmpl    %edx, %edi
        cmove   %eax, %esi
        movl    %esi, %eax

GCC 4.4-4.7 (3 instructions, optimal)
        leal    -2(%rsi), %eax
        cmpl    %edx, %edi
        cmovne  %esi, %eax

GCC 5-7 (4 instructions, suboptimal mov before cmov)
        leal    -2(%rsi), %ecx
        movl    %esi, %eax
        cmpl    %edx, %edi
        cmove   %ecx, %eax

GCC 8 (4 instructions, suboptimal mov before cmov, reordered)
        movl    %esi, %eax
        leal    -2(%rsi), %ecx
        cmpl    %edx, %edi
        cmove   %ecx, %eax

GCC 9-trunk (5 instructions, two suboptimal movs before cmov)
        movl    %edx, %ecx
        movl    %esi, %eax
        leal    -2(%rsi), %edx
        cmpl    %ecx, %edi
        cmove   %edx, %eax

The challenge is that x86's two operand conditional moves, that require
the destination to be one of the (register) sources, are tricky for reload,
whose heuristics unify pseudos early (greedily?).  In this case, we have
the equivalent of "pseudo1 = cond ? pseudo2 : expression", and we'd like
to see "pseudo1 = expression; pseudo1 = cond ? pseudo1 : pseudo2", but
alas reload (currently and quite reasonably) prefers to place pseudo1 and
pseudo2 in the same hard register if possible.  Hence the solution is to
fixup/tweak the register allocation during peephole2, as previously with
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575998.html

Instead of a single peephole2 to catch just the current idiom (last above),
I've added the four peephole2s that would catch each of the (historical)
suboptimal variants above and transform them into the ideal 3 insn form.
Instrumenting the compiler shows, for example, that the (earliest) movl
after cmov triggers over 50 times during stage2 of a GCC bootstrap.


This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}, with
no new failures.  Ok for mainline?  Or if this regression isn't serious
enough for stage4 (or these patterns considered too risky), for stage1
when it reopens?  I suspect the poor interaction between cmove usage
and register allocation is one source of confusion when comparing code
generation with vs. without cmove (the other major source of confusion
being that well-predicted branches are free, but that prediction-quality
is poorly predictable).


2022-04-25  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        PR target/92578
        * config/i386/i386.md (peephole2): Eliminate register-to-register
        moves by inverting the condition of a conditional move.

gcc/testsuite/ChangeLog
        PR target/92578
        * gcc.target/i386/pr92758.c: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c74edd1..db6034a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -20751,6 +20751,158 @@
   operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
 })
 
+;; Eliminate a reg-reg mov by inverting the condition of a cmov (#3).
+;; cmov r0,r1; mov r1,r0 -> cmov r1,r0
+(define_peephole2
+ [(set (match_operand:SWI248 0 "general_reg_operand")
+       (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+                            [(reg FLAGS_REG) (const_int 0)])
+       (match_operand:SWI248 2 "general_reg_operand")
+       (match_operand:SWI248 3 "general_reg_operand")))
+  (set (match_operand:SWI248 4 "general_reg_operand")
+       (match_dup 0))]
+  "TARGET_CMOVE
+   && ((REGNO (operands[0]) == REGNO (operands[2])
+       && REGNO (operands[3]) == REGNO (operands[4]))
+       || (REGNO (operands[0]) == REGNO (operands[3])
+          && REGNO (operands[2]) == REGNO (operands[4])))
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 4) (if_then_else:SWI248 (match_dup 1)
+                                          (match_dup 2)
+                                          (match_dup 3)))])
+
+;; Eliminate a reg-reg mov by inverting the condition of a cmov (#4).
+;; mov r1,r0; mov x,r2; cmp; cmov r2,r0 -> mov x,r0; cmp; cmov r1,r0
+(define_peephole2
+ [(set (match_operand:SWI248 0 "general_reg_operand")
+       (match_operand:SWI248 1 "general_reg_operand"))
+  (set (match_operand:SWI248 2 "general_reg_operand")
+       (match_operand:SWI248 3))
+  (set (match_operand 4 "flags_reg_operand") (match_operand 5))
+  (set (match_dup 0)
+       (if_then_else:SWI248 (match_operator 6 "ix86_comparison_operator"
+                            [(match_dup 4) (const_int 0)])
+       (match_operand:SWI248 7 "general_reg_operand")
+       (match_operand:SWI248 8 "general_reg_operand")))]
+  "TARGET_CMOVE
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && REGNO (operands[1]) != REGNO (operands[2])
+   && REGNO (operands[7]) != REGNO (operands[8])
+   /* insns 1 and 2 feed the cmove insn 4. */
+   && (REGNO (operands[2]) == REGNO (operands[7])
+       || REGNO (operands[2]) == REGNO (operands[8]))
+   && !reg_overlap_mentioned_p (operands[0], operands[5])
+   && !reg_overlap_mentioned_p (operands[2], operands[5])
+   && peep2_reg_dead_p (4, operands[2])"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 0) (if_then_else:SWI248 (match_dup 6)
+                                          (match_dup 7)
+                                          (match_dup 8)))]
+{
+  operands[3] = replace_rtx (operands[3], operands[0], operands[1], true);
+  if (REGNO (operands[2]) == REGNO (operands[7]))
+    {
+      operands[7] = operands[0];
+      operands[8] = operands[1];
+    }
+  else
+    {
+      operands[7] = operands[1];
+      operands[8] = operands[0];
+    }
+})
+
+;; Eliminate a reg-reg mov by inverting the condition of a cmov (#5).
+;; mov x,r0; mov r1,r2; cmp; cmov r0,r2 -> mov x,r2; cmp; cmov r1,r2
+(define_peephole2
+ [(set (match_operand:SWI248 0 "general_reg_operand")
+       (match_operand:SWI248 1))
+  (set (match_operand:SWI248 2 "general_reg_operand")
+       (match_operand:SWI248 3 "general_reg_operand"))
+  (set (match_operand 4 "flags_reg_operand") (match_operand 5))
+  (set (match_dup 2)
+       (if_then_else:SWI248 (match_operator 6 "ix86_comparison_operator"
+                            [(match_dup 4) (const_int 0)])
+       (match_operand:SWI248 7 "general_reg_operand")
+       (match_operand:SWI248 8 "general_reg_operand")))]
+  "TARGET_CMOVE
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && REGNO (operands[2]) != REGNO (operands[3])
+   && REGNO (operands[7]) != REGNO (operands[8])
+   /* insns 1 and 2 feed the cmove insn 4. */
+   && (REGNO (operands[0]) == REGNO (operands[7])
+       || REGNO (operands[0]) == REGNO (operands[8]))
+   && !reg_overlap_mentioned_p (operands[0], operands[5])
+   && !reg_overlap_mentioned_p (operands[2], operands[5])
+   && peep2_reg_dead_p (4, operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 2) (if_then_else:SWI248 (match_dup 6)
+                                          (match_dup 7)
+                                          (match_dup 8)))]
+{
+  if (REGNO (operands[0]) == REGNO (operands[7]))
+    {
+      operands[7] = operands[2];
+      operands[8] = operands[3];
+    }
+  else
+    {
+      operands[7] = operands[3];
+      operands[8] = operands[2];
+    }
+})
+
+;; Eliminate two reg-reg movs by inverting the condition of a cmov (#6).
+;; mov r1,r0; mov x,r2; cmp; cmov r2,r0 -> mov x,r0; cmp; cmov r1,r0
+(define_peephole2
+ [(set (match_operand:SWI248 0 "general_reg_operand")
+       (match_operand:SWI248 1 "general_reg_operand"))
+  (set (match_operand 9 "general_reg_operand")
+       (match_operand 10 "general_reg_operand"))
+  (set (match_operand:SWI248 2 "general_reg_operand")
+       (match_operand:SWI248 3))
+  (set (match_operand 4 "flags_reg_operand") (match_operand 5))
+  (set (match_dup 0)
+       (if_then_else:SWI248 (match_operator 6 "ix86_comparison_operator"
+                            [(match_dup 4) (const_int 0)])
+       (match_operand:SWI248 7 "general_reg_operand")
+       (match_operand:SWI248 8 "general_reg_operand")))]
+  "TARGET_CMOVE
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && REGNO (operands[0]) != REGNO (operands[9])
+   && REGNO (operands[0]) != REGNO (operands[10])
+   && REGNO (operands[1]) != REGNO (operands[2])
+   && REGNO (operands[1]) != REGNO (operands[9])
+   && REGNO (operands[7]) != REGNO (operands[8])
+   /* insns 1 and 3 feed the cmove insn 5. */
+   && (REGNO (operands[2]) == REGNO (operands[7])
+       || REGNO (operands[2]) == REGNO (operands[8]))
+   && !reg_overlap_mentioned_p (operands[0], operands[5])
+   && !reg_overlap_mentioned_p (operands[2], operands[5])
+   && peep2_reg_dead_p (5, operands[2])"
+  [(set (match_dup 9) (match_dup 10))
+   (set (match_dup 0) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 0) (if_then_else:SWI248 (match_dup 6)
+                                          (match_dup 7)
+                                          (match_dup 8)))]
+{
+  operands[3] = replace_rtx (operands[3], operands[0], operands[1], true);
+  if (REGNO (operands[2]) == REGNO (operands[7]))
+    {
+      operands[7] = operands[0];
+      operands[8] = operands[1];
+    }
+  else
+    {
+      operands[7] = operands[1];
+      operands[8] = operands[0];
+    }
+})
+
 (define_insn "movhf_mask"
   [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
        (unspec:HF
diff --git a/gcc/testsuite/gcc.target/i386/pr92578.c 
b/gcc/testsuite/gcc.target/i386/pr92578.c
new file mode 100644
index 0000000..eb3af26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92578.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+int foo(int moves, int movecnt, int komove) {
+    int newcnt = movecnt;
+    if (moves == komove)
+        newcnt -= 2;
+    return newcnt;
+}
+
+/* { dg-final { scan-assembler-not "\[ \t]movl\[ \t]" } } */
+/* { dg-final { scan-assembler "\[ \t]cmovne\[ \t]" } } */
+

Reply via email to