This patch addresses a (minor) missed-optimization regression revealed by Richard Biener's example/variant in comment #1 of PR target/92578.
int foo(int moves, int movecnt, int komove) { int newcnt = movecnt; if (moves == komove) newcnt -= 2; return newcnt; } Comparing code generation on godbolt.org shows an interesting evolution over time, as changes in register allocation affect the cmove sequence. GCC 4.1.2 (4 instructions, suboptimal mov after cmov). leal -2(%rsi), %eax cmpl %edx, %edi cmove %eax, %esi movl %esi, %eax GCC 4.4-4.7 (3 instructions, optimal) leal -2(%rsi), %eax cmpl %edx, %edi cmovne %esi, %eax GCC 5-7 (4 instructions, suboptimal mov before cmov) leal -2(%rsi), %ecx movl %esi, %eax cmpl %edx, %edi cmove %ecx, %eax GCC 8 (4 instructions, suboptimal mov before cmov, reordered) movl %esi, %eax leal -2(%rsi), %ecx cmpl %edx, %edi cmove %ecx, %eax GCC 9-trunk (5 instructions, two suboptimal movs before cmov) movl %edx, %ecx movl %esi, %eax leal -2(%rsi), %edx cmpl %ecx, %edi cmove %edx, %eax The challenge is that x86's two operand conditional moves, that require the destination to be one of the (register) sources, are tricky for reload, whose heuristics unify pseudos early (greedily?). In this case, we have the equivalent of "pseudo1 = cond ? pseudo2 : expression", and we'd like to see "pseudo1 = expression; pseudo1 = cond ? pseudo1 : pseudo2", but alas reload (currently and quite reasonably) prefers to place pseudo1 and pseudo2 in the same hard register if possible. Hence the solution is to fixup/tweak the register allocation during peephole2, as previously with https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575998.html Instead of a single peephole2 to catch just the current idiom (last above), I've added the four peephole2s that would catch each of the (historical) suboptimal variants above and transform them into the ideal 3 insn form. Instrumenting the compiler shows, for example, that the (earliest) movl after cmov triggers over 50 times during stage2 of a GCC bootstrap. This patch has been tested on x86_64-pc-linux-gnu with make bootstrap and make -k check, both with and without --target_board=unix{-m32}, with no new failures. Ok for mainline? Or if this regression isn't serious enough for stage4 (or these patterns considered too risky), for stage1 when it reopens? I suspect the poor interaction between cmove usage and register allocation is one source of confusion when comparing code generation with vs. without cmove (the other major source of confusion being that well-predicted branches are free, but that prediction-quality is poorly predictable). 2022-04-25 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog PR target/92578 * config/i386/i386.md (peephole2): Eliminate register-to-register moves by inverting the condition of a conditional move. gcc/testsuite/ChangeLog PR target/92578 * gcc.target/i386/pr92758.c: New test case. Thanks in advance, Roger --
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c74edd1..db6034a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20751,6 +20751,158 @@ operands[9] = replace_rtx (operands[6], operands[0], operands[1], true); }) +;; Eliminate a reg-reg mov by inverting the condition of a cmov (#3). +;; cmov r0,r1; mov r1,r0 -> cmov r1,r0 +(define_peephole2 + [(set (match_operand:SWI248 0 "general_reg_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 2 "general_reg_operand") + (match_operand:SWI248 3 "general_reg_operand"))) + (set (match_operand:SWI248 4 "general_reg_operand") + (match_dup 0))] + "TARGET_CMOVE + && ((REGNO (operands[0]) == REGNO (operands[2]) + && REGNO (operands[3]) == REGNO (operands[4])) + || (REGNO (operands[0]) == REGNO (operands[3]) + && REGNO (operands[2]) == REGNO (operands[4]))) + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 4) (if_then_else:SWI248 (match_dup 1) + (match_dup 2) + (match_dup 3)))]) + +;; Eliminate a reg-reg mov by inverting the condition of a cmov (#4). +;; mov r1,r0; mov x,r2; cmp; cmov r2,r0 -> mov x,r0; cmp; cmov r1,r0 +(define_peephole2 + [(set (match_operand:SWI248 0 "general_reg_operand") + (match_operand:SWI248 1 "general_reg_operand")) + (set (match_operand:SWI248 2 "general_reg_operand") + (match_operand:SWI248 3)) + (set (match_operand 4 "flags_reg_operand") (match_operand 5)) + (set (match_dup 0) + (if_then_else:SWI248 (match_operator 6 "ix86_comparison_operator" + [(match_dup 4) (const_int 0)]) + (match_operand:SWI248 7 "general_reg_operand") + (match_operand:SWI248 8 "general_reg_operand")))] + "TARGET_CMOVE + && REGNO (operands[0]) != REGNO (operands[2]) + && REGNO (operands[1]) != REGNO (operands[2]) + && REGNO (operands[7]) != REGNO (operands[8]) + /* insns 1 and 2 feed the cmove insn 4. */ + && (REGNO (operands[2]) == REGNO (operands[7]) + || REGNO (operands[2]) == REGNO (operands[8])) + && !reg_overlap_mentioned_p (operands[0], operands[5]) + && !reg_overlap_mentioned_p (operands[2], operands[5]) + && peep2_reg_dead_p (4, operands[2])" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 0) (if_then_else:SWI248 (match_dup 6) + (match_dup 7) + (match_dup 8)))] +{ + operands[3] = replace_rtx (operands[3], operands[0], operands[1], true); + if (REGNO (operands[2]) == REGNO (operands[7])) + { + operands[7] = operands[0]; + operands[8] = operands[1]; + } + else + { + operands[7] = operands[1]; + operands[8] = operands[0]; + } +}) + +;; Eliminate a reg-reg mov by inverting the condition of a cmov (#5). +;; mov x,r0; mov r1,r2; cmp; cmov r0,r2 -> mov x,r2; cmp; cmov r1,r2 +(define_peephole2 + [(set (match_operand:SWI248 0 "general_reg_operand") + (match_operand:SWI248 1)) + (set (match_operand:SWI248 2 "general_reg_operand") + (match_operand:SWI248 3 "general_reg_operand")) + (set (match_operand 4 "flags_reg_operand") (match_operand 5)) + (set (match_dup 2) + (if_then_else:SWI248 (match_operator 6 "ix86_comparison_operator" + [(match_dup 4) (const_int 0)]) + (match_operand:SWI248 7 "general_reg_operand") + (match_operand:SWI248 8 "general_reg_operand")))] + "TARGET_CMOVE + && REGNO (operands[0]) != REGNO (operands[2]) + && REGNO (operands[0]) != REGNO (operands[3]) + && REGNO (operands[2]) != REGNO (operands[3]) + && REGNO (operands[7]) != REGNO (operands[8]) + /* insns 1 and 2 feed the cmove insn 4. */ + && (REGNO (operands[0]) == REGNO (operands[7]) + || REGNO (operands[0]) == REGNO (operands[8])) + && !reg_overlap_mentioned_p (operands[0], operands[5]) + && !reg_overlap_mentioned_p (operands[2], operands[5]) + && peep2_reg_dead_p (4, operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 2) (if_then_else:SWI248 (match_dup 6) + (match_dup 7) + (match_dup 8)))] +{ + if (REGNO (operands[0]) == REGNO (operands[7])) + { + operands[7] = operands[2]; + operands[8] = operands[3]; + } + else + { + operands[7] = operands[3]; + operands[8] = operands[2]; + } +}) + +;; Eliminate two reg-reg movs by inverting the condition of a cmov (#6). +;; mov r1,r0; mov x,r2; cmp; cmov r2,r0 -> mov x,r0; cmp; cmov r1,r0 +(define_peephole2 + [(set (match_operand:SWI248 0 "general_reg_operand") + (match_operand:SWI248 1 "general_reg_operand")) + (set (match_operand 9 "general_reg_operand") + (match_operand 10 "general_reg_operand")) + (set (match_operand:SWI248 2 "general_reg_operand") + (match_operand:SWI248 3)) + (set (match_operand 4 "flags_reg_operand") (match_operand 5)) + (set (match_dup 0) + (if_then_else:SWI248 (match_operator 6 "ix86_comparison_operator" + [(match_dup 4) (const_int 0)]) + (match_operand:SWI248 7 "general_reg_operand") + (match_operand:SWI248 8 "general_reg_operand")))] + "TARGET_CMOVE + && REGNO (operands[0]) != REGNO (operands[2]) + && REGNO (operands[0]) != REGNO (operands[9]) + && REGNO (operands[0]) != REGNO (operands[10]) + && REGNO (operands[1]) != REGNO (operands[2]) + && REGNO (operands[1]) != REGNO (operands[9]) + && REGNO (operands[7]) != REGNO (operands[8]) + /* insns 1 and 3 feed the cmove insn 5. */ + && (REGNO (operands[2]) == REGNO (operands[7]) + || REGNO (operands[2]) == REGNO (operands[8])) + && !reg_overlap_mentioned_p (operands[0], operands[5]) + && !reg_overlap_mentioned_p (operands[2], operands[5]) + && peep2_reg_dead_p (5, operands[2])" + [(set (match_dup 9) (match_dup 10)) + (set (match_dup 0) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 0) (if_then_else:SWI248 (match_dup 6) + (match_dup 7) + (match_dup 8)))] +{ + operands[3] = replace_rtx (operands[3], operands[0], operands[1], true); + if (REGNO (operands[2]) == REGNO (operands[7])) + { + operands[7] = operands[0]; + operands[8] = operands[1]; + } + else + { + operands[7] = operands[1]; + operands[8] = operands[0]; + } +}) + (define_insn "movhf_mask" [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v") (unspec:HF diff --git a/gcc/testsuite/gcc.target/i386/pr92578.c b/gcc/testsuite/gcc.target/i386/pr92578.c new file mode 100644 index 0000000..eb3af26 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92578.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +int foo(int moves, int movecnt, int komove) { + int newcnt = movecnt; + if (moves == komove) + newcnt -= 2; + return newcnt; +} + +/* { dg-final { scan-assembler-not "\[ \t]movl\[ \t]" } } */ +/* { dg-final { scan-assembler "\[ \t]cmovne\[ \t]" } } */ +