Hello! Attached patch enables ix86_avoid_lea_for_addr to process zero-extended addresses. This patch should help atom performance, especially in x32 mode.
Please note the complication with insn re-recognition in ix86_avoid_lea_for_addr, to solve the problem as described in the comment: /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[] array behind our backs. To make things worse, zero-extended oeprands (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they also satisfy operand constraints of one of many *lea<mode> insn patterns. However, at this point we are looking only if the original insn is performing inherent zero extension, and will emit split insn sequence in SImode for this case. */ 2012-07-27 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.c (ix86_avoid_lea_for_addr): Handle zero-extended addresses. (ix86_split_lea_for_addr): Unconditionally convert target and all address operands to requested mode. * config/i386/i386.md (*lea<mode>): Determine mode of split insn sequence from the original insn pattern. Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}, also when configured with "--with-arch=core2 --with-cpu=atom" I will wait a day or two for possible comments, before the patch is committed to mainline SVN. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 189904) +++ config/i386/i386.md (working copy) @@ -3474,13 +3474,28 @@ (match_operand:SI 1 "x86_64_zext_general_operand" "rmWz,0,r ,m ,r ,m")))] "TARGET_64BIT" - "@ - mov{l}\t{%1, %k0|%k0, %1} - # - movd\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1} - %vmovd\t{%1, %0|%0, %1} - %vmovd\t{%1, %0|%0, %1}" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + if (ix86_use_lea_for_mov (insn, operands)) + return "lea{l}\t{%E1, %k0|%k0, %E1}"; + else + return "mov{l}\t{%1, %k0|%k0, %1}"; + + case TYPE_MULTI: + return "#"; + + case TYPE_MMXMOV: + return "movd\t{%1, %0|%0, %1}"; + + case TYPE_SSEMOV: + return "%vmovd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} [(set_attr "type" "imovx,multi,mmxmov,mmxmov,ssemov,ssemov") (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex") (set_attr "prefix_0f" "0,*,*,*,*,*") @@ -5479,7 +5494,26 @@ "reload_completed && ix86_avoid_lea_for_addr (insn, operands)" [(const_int 0)] { - ix86_split_lea_for_addr (operands, <MODE>mode); + enum machine_mode mode = <MODE>mode; + rtx addr; + + /* ix86_avoid_lea_for_addr re-recognizes insn and changes operands[] + array behind our backs. To make things worse, zero-extended oeprands + (zero_extend:DI (addr:SI)) are re-recognized as (addr:DI), since they + also satisfy operand constraints of one of many *lea<mode> insn patterns. + + However, at this point we are looking only if the original insn + is performing inherent zero extension, and will emit + split insn sequence in SImode for this case. */ + addr = SET_SRC (PATTERN (curr_insn)); + + /* Emit all operations in SImode for zero-extended addresses. Recall + that x86_64 inheretly zero-extends SImode operations to DImode. */ + if (GET_CODE (addr) == ZERO_EXTEND + || GET_CODE (addr) == AND) + mode = SImode; + + ix86_split_lea_for_addr (operands, mode); DONE; } [(set_attr "type" "lea") Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 189904) +++ config/i386/i386.c (working copy) @@ -17036,11 +17036,6 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[]) struct ix86_address parts; int ok; - /* FIXME: Handle zero-extended addresses. */ - if (GET_CODE (operands[1]) == ZERO_EXTEND - || GET_CODE (operands[1]) == AND) - return false; - /* Check we need to optimize. */ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) return false; @@ -17124,7 +17119,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_ It is assumed that it is allowed to clobber flags register at lea position. */ -extern void +void ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode) { unsigned int regno0, regno1, regno2; @@ -17135,7 +17130,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach ok = ix86_decompose_address (operands[1], &parts); gcc_assert (ok); - target = operands[0]; + target = gen_lowpart (mode, operands[0]); regno0 = true_regnum (target); regno1 = INVALID_REGNUM; @@ -17143,18 +17138,19 @@ ix86_split_lea_for_addr (rtx operands[], enum mach if (parts.base) { - if (GET_MODE (parts.base) != mode) - parts.base = gen_lowpart (mode, parts.base); + parts.base = gen_lowpart (mode, parts.base); regno1 = true_regnum (parts.base); } if (parts.index) { - if (GET_MODE (parts.index) != mode) - parts.index = gen_lowpart (mode, parts.index); + parts.index = gen_lowpart (mode, parts.index); regno2 = true_regnum (parts.index); } + if (parts.disp) + parts.disp = gen_lowpart (mode, parts.disp); + if (parts.scale > 1) { /* Case r1 = r1 + ... */