This patch improves existing peephole optimizations that merge individual LDRs into LDM, in the case that the order of registers in LDR instructions is not ascending, but the loaded values can be reordered because their uses commute.
There are two changes: * use rtx__equal_p to compare operands (instead of plain ==) * identify more cases of dead registers in the pattern. For example, the following sequence LDR r1, [r2] LDR r0, [r2, #4] ADD r0, r0, r1 can be transformed into LDRD r0, r1, [r2] ADD r0, r0, r1 when r1 is dead after ADD. Such optimization opportunities are missed by the existing peephole conditions, because r0 is not dead after ADD. This patch enables such transformations. No regression on qemu for --target=arm-none-eabi --with-cpu=cortex-a15 and a9. This patch was originally submitted as part of a sequence of patches improving LDRD/STRD/LDM/STM generation: http://gcc.gnu.org/ml/gcc-patches/2011-11/msg00920.html but it is independent and it fixes a failures in one of the regression tests: PASS: gcc.target/arm/pr40457-1.c scan-assembler ldm Is it OK for GCC 4.7 Stage 4 ? Thank you, Greta gcc/ChangeLog 2012-02-28 Greta Yorsh <greta.yo...@arm.com> * config/arm/arm-ldmstm.ml: Improved conditions of peepholes that generate LDM followed by a commutative operator. * config/arm/ldmstm.md: Regenerated.
diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml index 221edd2..5f5a5e0 100644 --- a/gcc/config/arm/arm-ldmstm.ml +++ b/gcc/config/arm/arm-ldmstm.ml @@ -216,9 +216,10 @@ let write_ldm_commutative_peephole thumb = Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\")]))\n" indent (nregs * 2 + 3); Printf.printf "%s (clobber (reg:CC CC_REGNUM))])]\n" indent end; - Printf.printf " \"(((operands[%d] == operands[0] && operands[%d] == operands[1])\n" (nregs * 2 + 2) (nregs * 2 + 3); - Printf.printf " || (operands[%d] == operands[0] && operands[%d] == operands[1]))\n" (nregs * 2 + 3) (nregs * 2 + 2); - Printf.printf " && peep2_reg_dead_p (%d, operands[0]) && peep2_reg_dead_p (%d, operands[1]))\"\n" (nregs + 1) (nregs + 1); + Printf.printf " \"(((rtx_equal_p (operands[%d], operands[0]) && rtx_equal_p (operands[%d], operands[1]))\n" (nregs * 2 + 2) (nregs * 2 + 3); + Printf.printf " || (rtx_equal_p (operands[%d], operands[0]) && rtx_equal_p (operands[%d], operands[1])))\n" (nregs * 2 + 3) (nregs * 2 + 2); + Printf.printf " && (peep2_reg_dead_p (%d, operands[0]) || rtx_equal_p (operands[0], operands[%d]))\n" (nregs + 1) (nregs * 2); + Printf.printf " && (peep2_reg_dead_p (%d, operands[1]) || rtx_equal_p (operands[1], operands[%d])))\"\n" (nregs + 1) (nregs * 2); begin if thumb then Printf.printf " [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))]\n" diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md index 5db4a32..5db3d57 100644 --- a/gcc/config/arm/ldmstm.md +++ b/gcc/config/arm/ldmstm.md @@ -1160,9 +1160,10 @@ [(match_operand:SI 6 "s_register_operand" "") (match_operand:SI 7 "s_register_operand" "")])) (clobber (reg:CC CC_REGNUM))])] - "(((operands[6] == operands[0] && operands[7] == operands[1]) - || (operands[7] == operands[0] && operands[6] == operands[1])) - && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[1]))" + "(((rtx_equal_p (operands[6], operands[0]) && rtx_equal_p (operands[7], operands[1])) + || (rtx_equal_p (operands[7], operands[0]) && rtx_equal_p (operands[6], operands[1]))) + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4])))" [(parallel [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)])) (clobber (reg:CC CC_REGNUM))])] @@ -1180,9 +1181,10 @@ (match_operator:SI 5 "commutative_binary_operator" [(match_operand:SI 6 "s_register_operand" "") (match_operand:SI 7 "s_register_operand" "")]))] - "(((operands[6] == operands[0] && operands[7] == operands[1]) - || (operands[7] == operands[0] && operands[6] == operands[1])) - && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[1]))" + "(((rtx_equal_p (operands[6], operands[0]) && rtx_equal_p (operands[7], operands[1])) + || (rtx_equal_p (operands[7], operands[0]) && rtx_equal_p (operands[6], operands[1]))) + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4])))" [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] { if (!gen_ldm_seq (operands, 2, true))