Hello! Attached patch adds zero-extended variants of PLUS and MULT simple LEA peephole2s. Patch also disables PLUS peephole2s on TARGET_OPT_AGU targets (atom), since we already split correct LEAs according to LEA vs ADD priority.
2012-08-10 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (simple LEA peephole2s): Add zero-extend variants of PLUS and MULT simple LEA patterns. Disable PLUS patterns for TARGET_OPT_AGU. Tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 190298) +++ config/i386/i386.md (working copy) @@ -17317,12 +17317,15 @@ ;; Attempt to convert simple lea to add/shift. ;; These can be created by move expanders. +;; Disable PLUS peepholes on TARGET_OPT_AGU, since all +;; relevant lea instructions were already split. (define_peephole2 [(set (match_operand:SWI48 0 "register_operand") (plus:SWI48 (match_dup 0) (match_operand:SWI48 1 "<nonmemory_operand>")))] - "peep2_regno_dead_p (0, FLAGS_REG)" + "!TARGET_OPT_AGU + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])]) @@ -17330,7 +17333,8 @@ [(set (match_operand:SWI48 0 "register_operand") (plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>") (match_dup 0)))] - "peep2_regno_dead_p (0, FLAGS_REG)" + "!TARGET_OPT_AGU + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])]) @@ -17338,9 +17342,9 @@ [(set (match_operand:SI 0 "register_operand") (subreg:SI (plus:DI (match_operand:DI 1 "register_operand") (match_operand:DI 2 "nonmemory_operand")) 0))] - "TARGET_64BIT - && peep2_regno_dead_p (0, FLAGS_REG) - && REGNO (operands[0]) == REGNO (operands[1])" + "TARGET_64BIT && !TARGET_OPT_AGU + && REGNO (operands[0]) == REGNO (operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "operands[2] = gen_lowpart (SImode, operands[2]);") @@ -17349,27 +17353,81 @@ [(set (match_operand:SI 0 "register_operand") (subreg:SI (plus:DI (match_operand:DI 1 "nonmemory_operand") (match_operand:DI 2 "register_operand")) 0))] - "TARGET_64BIT - && peep2_regno_dead_p (0, FLAGS_REG) - && REGNO (operands[0]) == REGNO (operands[2])" + "TARGET_64BIT && !TARGET_OPT_AGU + && REGNO (operands[0]) == REGNO (operands[2]) + && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] "operands[1] = gen_lowpart (SImode, operands[1]);") (define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand"))))] + "TARGET_64BIT && !TARGET_OPT_AGU + && REGNO (operands[0]) == REGNO (operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonmemory_operand") + (match_operand:SI 2 "register_operand"))))] + "TARGET_64BIT && !TARGET_OPT_AGU + && REGNO (operands[0]) == REGNO (operands[2]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (subreg:SI (plus:DI (match_dup 0) + (match_operand:DI 1 "nonmemory_operand")) 0)))] + "TARGET_64BIT && !TARGET_OPT_AGU + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_lowpart (SImode, operands[0]); +}) + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (subreg:SI (plus:DI (match_operand:DI 1 "nonmemory_operand") + (match_dup 0)) 0)))] + "TARGET_64BIT && !TARGET_OPT_AGU + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_lowpart (SImode, operands[0]); +}) + +(define_peephole2 [(set (match_operand:SWI48 0 "register_operand") (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const_int_operand")))] "exact_log2 (INTVAL (operands[1])) >= 0 && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 2))) + [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] - "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") (define_peephole2 [(set (match_operand:SI 0 "register_operand") (subreg:SI (mult:DI (match_operand:DI 1 "register_operand") - (match_operand:DI 2 "const_int_operand")) 0))] + (match_operand:DI 2 "const_int_operand")) 0))] "TARGET_64BIT && exact_log2 (INTVAL (operands[2])) >= 0 && REGNO (operands[0]) == REGNO (operands[1]) @@ -17378,6 +17436,36 @@ (clobber (reg:CC FLAGS_REG))])] "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "const_int_operand"))))] + "TARGET_64BIT + && exact_log2 (INTVAL (operands[2])) >= 0 + && REGNO (operands[0]) == REGNO (operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (zero_extend (ashift:SI (match_dup 1) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (subreg:SI (mult:DI (match_dup 0) + (match_operand:DI 1 "const_int_operand")) 0)))] + "TARGET_64BIT + && exact_log2 (INTVAL (operands[2])) >= 0 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (zero_extend:DI (ashift:SI (match_dup 2) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1]))); + operands[2] = gen_lowpart (SImode, operands[0]); +}) + ;; The ESP adjustments can be done by the push and pop instructions. Resulting ;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes. ;; On many CPUs it is also faster, since special hardware to avoid esp