It looks like Jan's patch at
http://gcc.gnu.org/ml/gcc-patches/2005-07/msg02021.html is causing a
code size regression in i686. I'll consider a 32-bit target, and this
testcase
char **
VTallocbuf(char **allbuf, unsigned long savelines)
{
return &allbuf[savelines];
}
For i686 we produce
movl 8(%esp), %eax
movl 4(%esp), %edx
sall $2, %eax
addl %edx, %eax
instead of
movl 8(%esp), %eax
movl 4(%esp), %edx
leal (%edx,%eax,4), %eax
However even in this case a no-lea code could be feasible:
movl 8(%esp), %eax
sall $2, %eax
addl 4(%esp), %eax
And this is exactly the rtl we have until peephole2, where this peephole
splits the instruction:
;; Don't do logical operations with memory inputs.
(define_peephole2
[(match_scratch:SI 2 "r")
(parallel [(set (match_operand:SI 0 "register_operand" "")
(match_operator:SI 3 "arith_or_logical_operator"
[(match_dup 0)
(match_operand:SI 1 "memory_operand" "")]))
(clobber (reg:CC FLAGS_REG))])]
"! optimize_size && ! TARGET_READ_MODIFY"
[(set (match_dup 2) (match_dup 1))
(parallel [(set (match_dup 0)
(match_op_dup 3 [(match_dup 0) (match_dup 2)]))
(clobber (reg:CC FLAGS_REG))])]
"")
I think that Jan's patch should be conditionalized: if !optimize_size &&
!TARGET_READ_MODIFY, the transformation he removed will be done anyway,
and too late in the game.
Let's see what the hunks do...
Index: expr.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expr.c,v
retrieving revision 1.806
diff -c -3 -p -r1.806 expr.c
*** expr.c 25 Jul 2005 12:04:45 -0000 1.806
--- expr.c 29 Jul 2005 12:10:40 -0000
*************** expand_expr_real_1 (tree exp, rtx target
*** 6578,6595 ****
target = 0;
}
- /* If will do cse, generate all results into pseudo registers
- since 1) that allows cse to find more things
- and 2) otherwise cse could produce an insn the machine
- cannot support. An exception is a CONSTRUCTOR into a multi-word
- MEM: that's much more likely to be most efficient into the MEM.
- Another is a CALL_EXPR which must return in memory. */
-
- if (! cse_not_expected && mode != BLKmode && target
- && (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
- && ! (code == CONSTRUCTOR && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
- && ! (code == CALL_EXPR && aggregate_value_p (exp, exp)))
- target = 0;
switch (code)
{
I think this ought to be left in. Not because CSE can find more things,
but because in general an instruction selection pass ought to recombine
MEMs at their usage points if it is worthwhile. To this end,
ix86_rtx_costs could be taught about TARGET_READ_MODIFY, with something
like:
case MEM:
if (!optimize && !TARGET_READ_MODIFY
&& GET_RTX_CLASS (outer_code) == RTX_BIN_ARITH)
*total++;
break;
Also consider that we still lack tree-based load PRE (PR23455), so we
still need CSE to "find more things". Load PRE is the major obstacle
towards removing CSE path following.
--- 6578,6583 ----
Index: optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.287
diff -c -3 -p -r1.287 optabs.c
*** optabs.c 12 Jul 2005 09:20:02 -0000 1.287
--- optabs.c 29 Jul 2005 12:10:41 -0000
*************** expand_vec_cond_expr (tree vec_cond_expr
*** 5475,5481 ****
if (icode == CODE_FOR_nothing)
return 0;
! if (!target)
target = gen_reg_rtx (mode);
/* Get comparison rtx. First expand both cond expr operands. */
--- 5475,5481 ----
if (icode == CODE_FOR_nothing)
return 0;
! if (!target || !insn_data[icode].operand[0].predicate (target, mode))
target = gen_reg_rtx (mode);
/* Get comparison rtx. First expand both cond expr operands. */
This is partially unrelated, it does not hurt.
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.843
diff -c -3 -p -r1.843 i386.c
*** config/i386/i386.c 18 Jul 2005 06:39:18 -0000 1.843
--- config/i386/i386.c 29 Jul 2005 12:10:42 -0000
*************** ix86_fixup_binary_operands (enum rtx_cod
*** 8154,8170 ****
&& GET_RTX_CLASS (code) != RTX_COMM_ARITH)
src1 = force_reg (mode, src1);
- /* If optimizing, copy to regs to improve CSE */
- if (optimize && ! no_new_pseudos)
- {
- if (GET_CODE (dst) == MEM)
- dst = gen_reg_rtx (mode);
- if (GET_CODE (src1) == MEM)
- src1 = force_reg (mode, src1);
- if (GET_CODE (src2) == MEM)
- src2 = force_reg (mode, src2);
- }
-
src1 = operands[1] = src1;
src2 = operands[2] = src2;
return dst;
This should be reinstated and conditionalized as
if (optimize && !optimize_size && !no_new_pseudos
&& !TARGET_READ_MODIFY)
and likewise in ix86_expand_unary_operator.
*************** ix86_expand_fp_absneg_operator (enum rtx
*** 8410,8416 ****
matching_memory = false;
if (MEM_P (dst))
{
! if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
matching_memory = true;
else
dst = gen_reg_rtx (mode);
--- 8626,8632 ----
matching_memory = false;
if (MEM_P (dst))
{
! if (rtx_equal_p (dst, src))
matching_memory = true;
else
dst = gen_reg_rtx (mode);
This should become something like
if (!rtx_equal_p (dst, src)
|| (optimize && !optimize_size && !no_new_pseudos
&& !TARGET_READ_MODIFY))
dst = gen_reg_rtx (mode);
else
matching_memory = true;
Reversing the if allows to write the same test as above.
Does this make sense?
Paolo