This patch resolves PR target/110104, a missed optimization on x86 around adc with memory operands. In i386.md, there's a peephole2 after the pattern for *add<mode>3_cc_overflow_1 that converts the sequence reg = add(reg,mem); mem = reg [where the reg is dead afterwards] into the equivalent mem = add(mem,reg). The equivalent peephole2 for adc is missing (after addcarry<mode>), and is added by this patch.
For the example code provided in the bugzilla PR: Before: movq %rsi, %rax mulq %rdx addq %rax, (%rdi) movq %rdx, %rax adcq 8(%rdi), %rax adcq $0, 16(%rdi) movq %rax, 8(%rdi) ret After: movq %rsi, %rax mulq %rdx addq %rax, (%rdi) adcq %rdx, 8(%rdi) adcq $0, 16(%rdi) ret Note that the addq in this example has been transformed by the existing peephole2 described above. This patch has been tested on x86_64-pc-linux-gnu with make bootstrap and make -k check, both with and without --target_board=unix{-m32} with no new failures. Ok for mainline? 2023-06-07 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog PR target/110104 * config/i386/i386.md (define_peephole2): Transform reg=adc(reg,mem) followed by mem=reg into mem=adc(mem,reg) when applicable. gcc/testsuite/ChangeLog PR target/110104 * gcc.target/i386/pr110104.c: New test case. Thanks in advance, Roger --
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e6ebc46..33ec45f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -7870,6 +7870,51 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) +;; peephole2 for addcarry<mode> matching one for *add<mode>3_cc_overflow_1. +;; reg = adc(reg,mem); mem = reg -> mem = adc(mem,reg). +(define_peephole2 + [(parallel + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 3 "ix86_carry_flag_operator" + [(match_operand 2 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI48 0 "general_reg_operand")) + (match_operand:SWI48 1 "memory_operand"))) + (plus:<DWI> + (zero_extend:<DWI> (match_dup 1)) + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_dup 2) (const_int 0)])))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 3 + [(match_dup 2) (const_int 0)]) + (match_dup 0)) + (match_dup 1)))]) + (set (match_dup 1) (match_dup 0))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (2, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_op_dup 3 [(match_dup 2) (const_int 0)]) + (match_dup 1)) + (match_dup 0))) + (plus:<DWI> + (zero_extend:<DWI> (match_dup 0)) + (match_op_dup 4 + [(match_dup 2) (const_int 0)])))) + (set (match_dup 1) + (plus:SWI48 (plus:SWI48 (match_op_dup 3 + [(match_dup 2) (const_int 0)]) + (match_dup 1)) + (match_dup 0)))])]) + (define_expand "addcarry<mode>_0" [(parallel [(set (reg:CCC FLAGS_REG) diff --git a/gcc/testsuite/gcc.target/i386/pr110104.c b/gcc/testsuite/gcc.target/i386/pr110104.c new file mode 100644 index 0000000..bd814f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110104.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ + +typedef unsigned long long u64; +typedef unsigned __int128 u128; +void testcase1(u64 *acc, u64 a, u64 b) +{ + u128 res = (u128)a*b; + u64 lo = res, hi = res >> 64; + unsigned char cf = 0; + cf = __builtin_ia32_addcarryx_u64 (cf, lo, acc[0], acc+0); + cf = __builtin_ia32_addcarryx_u64 (cf, hi, acc[1], acc+1); + cf = __builtin_ia32_addcarryx_u64 (cf, 0, acc[2], acc+2); +} + +/* { dg-final { scan-assembler-times "movq" 1 } } */