Hi, The patch fixes the gcc.target/i386/pr49095.c FAIL in PR61225. The test case tends to check a peephole2 optimization, which optimizes the following sequence
2: bx:SI=ax:SI 25: ax:SI=[bx:SI] 7: {ax:SI=ax:SI-0x1;clobber flags:CC;} 8: [bx:SI]=ax:SI 9: flags:CCZ=cmp(ax:SI,0) to 2: bx:SI=ax:SI 41: {flags:CCZ=cmp([bx:SI]-0x1,0);[bx:SI]=[bx:SI]-0x1;} The enhanced shrink-wrapping, which calls copyprop_hardreg_forward changes the INSN 25 to 25: ax:SI=[ax:SI] Then peephole2 can not optimize it since two memory_operands look like different. To fix it, the patch adds another peephole2 rule to read one more insn. From the register copy, it knows the address is the same. Bootstrap and no make check regression on X86-64. OK for trunk? Thanks! -Zhenqiang ChangeLog: 2014-05-21 Zhenqiang Chen <zhenqiang.c...@linaro.org> Part of PR rtl-optimization/61225 * config/i386/i386.md: New peephole2 rule. * rtl.c (rtx_equal_p_if_reg_equal): New function. * rtl.h (rtx_equal_p_if_reg_equal): New prototype. testsuite/ChangeLog: 2014-05-21 Zhenqiang Chen <zhenqiang.c...@linaro.org> * gcc.dg/pr61225.c: New test. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 44e80ec..40639a5 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -17021,6 +17021,49 @@ operands[5], const0_rtx); }) +;; Attempt to use arith or logical operations with memory outputs with +;; setting of flags. The difference from previous pattern is that it includes +;; one more register copy insn, which is copy-forwarded to the address. +(define_peephole2 + [(set (match_operand:SWI 6 "register_operand") + (match_operand:SWI 0 "register_operand")) + (set (match_dup 0) + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_dup 0) + (match_operator:SWI 3 "plusminuslogic_operator" + [(match_dup 0) + (match_operand:SWI 2 "<nonmemory_operand>")])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand:SWI 7 "memory_operand") (match_dup 0)) + (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (5, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[6]) + && (GET_MODE (operands[0]) == GET_MODE (operands[6])) + && (MEM_ADDR_SPACE (operands[1]) == MEM_ADDR_SPACE (operands[7])) + && rtx_equal_p_if_reg_equal (operands[0], operands[6], + XEXP (operands[1], 0), XEXP (operands[7], 0)) + && (<MODE>mode != QImode + || immediate_operand (operands[2], QImode) + || q_regs_operand (operands[2], QImode)) + && ix86_match_ccmode (peep2_next_insn (4), + (GET_CODE (operands[3]) == PLUS + || GET_CODE (operands[3]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(set (match_dup 6) (match_dup 0)) + (parallel [(set (match_dup 4) (match_dup 5)) + (set (match_dup 1) (match_op_dup 3 [(match_dup 1) + (match_dup 2)]))])] +{ + operands[4] = SET_DEST (PATTERN (peep2_next_insn (4))); + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode, + copy_rtx (operands[1]), + copy_rtx (operands[2])); + operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]), + operands[5], const0_rtx); +}) + (define_peephole2 [(parallel [(set (match_operand:SWI 0 "register_operand") (match_operator:SWI 2 "plusminuslogic_operator" diff --git a/gcc/rtl.c b/gcc/rtl.c index 520f9a8..99418fc 100644 --- a/gcc/rtl.c +++ b/gcc/rtl.c @@ -654,6 +654,82 @@ rtx_equal_p (const_rtx x, const_rtx y) return 1; } +/* Return 1 if X and Y are equal rtx's if RX used in X and RY used + Y are equal. */ + +int +rtx_equal_p_if_reg_equal (const_rtx rx, const_rtx ry, + const_rtx x, const_rtx y) +{ + int i; + enum rtx_code code; + const char *fmt; + + gcc_assert (REG_P (rx) && REG_P (ry) && x && y); + + code = GET_CODE (x); + /* Rtx's of different codes cannot be equal. */ + if (code != GET_CODE (y)) + return 0; + + if (rtx_equal_p (x, y) == 1) + return 1; + + /* Since rx == ry, if x == rx && y == ry, x == y. */ + if (REG_P (x) && REG_P (y) + && GET_MODE (x) == GET_MODE (rx) + && REGNO (x) == REGNO (rx) + && GET_MODE (y) == GET_MODE (ry) + && REGNO (y) == REGNO (ry)) + return 1; + + /* Compare the elements. If any pair of corresponding elements + fail to match, return 0 for the whole thing. */ + + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + switch (fmt[i]) + { + case 'e': + if (rtx_equal_p (XEXP (x, i), XEXP (y, i)) == 0) + { + rtx xi = XEXP (x, i); + rtx yi = XEXP (y, i); + + if (!(REG_P (xi) && REG_P (yi) + && GET_MODE (xi) == GET_MODE (rx) + && REGNO (xi) == REGNO (rx) + && GET_MODE (yi) == GET_MODE (ry) + && REGNO (yi) == REGNO (ry))) + return 0; + } + break; + case 'w': + if (XWINT (x, i) != XWINT (y, i)) + return 0; + break; + + case 'n': + case 'i': + if (XINT (x, i) != XINT (y, i)) + return 0; + break; + + case 'u': + case '0': + case 't': + break; + + default: + if (rtx_equal_p (XEXP (x, i), XEXP (y, i)) == 0) + return 0; + break; + } + } + return 1; +} + /* Iteratively hash rtx X. */ hashval_t diff --git a/gcc/rtl.h b/gcc/rtl.h index 10ae1e9..7f9e9e7 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -1983,6 +1983,8 @@ extern unsigned int rtx_size (const_rtx); extern rtx shallow_copy_rtx_stat (const_rtx MEM_STAT_DECL); #define shallow_copy_rtx(a) shallow_copy_rtx_stat (a MEM_STAT_INFO) extern int rtx_equal_p (const_rtx, const_rtx); +extern int rtx_equal_p_if_reg_equal (const_rtx, const_rtx, + const_rtx, const_rtx); extern hashval_t iterative_hash_rtx (const_rtx, hashval_t); /* In emit-rtl.c */ diff --git a/gcc/testsuite/gcc.target/i386/pr61225.c b/gcc/testsuite/gcc.target/i386/pr61225.c new file mode 100644 index 0000000..1b33e9c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr61225.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-Os" } */ +/* { dg-options "-Os -mregparm=2" { target ia32 } } */ + +void foo (void *); + +int * +f1 (int *x) +{ + if (!--*(x)) + foo (x); + return x; +} + +int * +f2 (int *x) +{ + if (!--*(x + 4)) + foo (x); + return x; +} + +/* { dg-final { scan-assembler-not "test\[lq\]" } } */