Hi, this is improved patch I am testing. The basic idea is to remove push expanders for cases where we do not have push instruction anyway. emit_move_insns then resorts to unconditonally call move expander with push operand. I expended ix86_expand_vector_move to handle it gracefully and for that I borrowed emit_move_resolve_push function from expr.c since it seemed pointless to preserve duplicated logic in ix86_expand_push.
I can easily imagine that scheduling around function call sequences matters, so I also updated push/pop expanders to preserve memory attributes. Eventually I found the attributes to be blank because of logic in expr.c that clears alias info when sibcall is enabled. We can now do better by only disabling it in functions that actually do sibcalls. Bootstrap/regtest running on x86_64-linux, OK for the non-i386 parts if it passes? Honza * expr.c (emit_move_resolve_push): Export; be bit more selective on when to clear alias set. * expr.h (emit_move_resolve_push): Declare. * function.h (struct function): Add tail_call_marked. * tree-tailcall.c (optimize_tail_call): Set tail_call_marked. * config/i386/i386-protos.h (ix86_expand_push): Remove. * config/i386/i386.md (TImode move expander): De not call ix86_expand_push. (FP push expanders): Preserve memory attributes. * config/i386/sse.md (push<mode>1): Remove. * config/i386/i386.c (ix86_expand_vector_move): Handle push operation. (ix86_expand_push): Remove. * config/i386/mmx.md (push<mode>1): Remove. Index: expr.c =================================================================== --- expr.c (revision 206946) +++ expr.c (working copy) @@ -3221,7 +3221,7 @@ /* A subroutine of emit_move_insn_1. X is a push_operand in MODE. Return an equivalent MEM that does not use an auto-increment. */ -static rtx +rtx emit_move_resolve_push (enum machine_mode mode, rtx x) { enum rtx_code code = GET_CODE (XEXP (x, 0)); @@ -4070,7 +4070,7 @@ { set_mem_attributes (dest, type, 1); - if (flag_optimize_sibling_calls) + if (cfun->tail_call_marked) /* Function incoming arguments may overlap with sibling call outgoing arguments and we cannot allow reordering of reads from function arguments with stores to outgoing arguments Index: expr.h =================================================================== --- expr.h (revision 206946) +++ expr.h (working copy) @@ -413,6 +413,7 @@ extern rtx emit_move_complex_push (enum machine_mode, rtx, rtx); extern rtx emit_move_complex_parts (rtx, rtx); +extern rtx emit_move_resolve_push (enum machine_mode, rtx); /* Push a block of length SIZE (perhaps variable) and return an rtx to address the beginning of the block. */ Index: function.h =================================================================== --- function.h (revision 206946) +++ function.h (working copy) @@ -667,6 +667,9 @@ /* Nonzero if the current function contains any loops with nonzero value in loop->simduid. */ unsigned int has_simduid_loops : 1; + + /* Set when the tail call has been identified. */ + unsigned int tail_call_marked : 1; }; /* Add the decl D to the local_decls list of FUN. */ Index: tree-tailcall.c =================================================================== --- tree-tailcall.c (revision 206946) +++ tree-tailcall.c (working copy) @@ -909,6 +909,7 @@ gimple stmt = gsi_stmt (t->call_gsi); gimple_call_set_tail (stmt, true); + cfun->tail_call_marked = true; if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Found tail call "); Index: config/i386/i386-protos.h =================================================================== --- config/i386/i386-protos.h (revision 206946) +++ config/i386/i386-protos.h (working copy) @@ -84,7 +84,6 @@ extern void ix86_expand_move (enum machine_mode, rtx[]); extern void ix86_expand_vector_move (enum machine_mode, rtx[]); extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]); -extern void ix86_expand_push (enum machine_mode, rtx); extern rtx ix86_fixup_binary_operands (enum rtx_code, enum machine_mode, rtx[]); extern void ix86_fixup_binary_operands_no_copy (enum rtx_code, Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 206946) +++ config/i386/i386.md (working copy) @@ -1818,8 +1818,6 @@ { if (TARGET_64BIT) ix86_expand_move (TImode, operands); - else if (push_operand (operands[0], TImode)) - ix86_expand_push (TImode, operands[1]); else ix86_expand_vector_move (TImode, operands); DONE; @@ -2665,7 +2663,11 @@ (match_operand:TF 1 "sse_reg_operand"))] "TARGET_SSE && reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) - (set (mem:TF (reg:P SP_REG)) (match_dup 1))]) + (set (match_dup 0) (match_dup 1))] +{ + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) (define_insn "*pushxf" [(set (match_operand:XF 0 "push_operand" "=<,<") @@ -2691,8 +2693,12 @@ (match_operand:XF 1 "fp_register_operand"))] "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) - (set (mem:XF (reg:P SP_REG)) (match_dup 1))] - "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));") + (set (match_dup 0) (match_dup 1))] +{ + operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode)); + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) (define_insn "*pushdf" [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") @@ -2713,7 +2719,11 @@ (match_operand:DF 1 "any_fp_register_operand"))] "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) - (set (mem:DF (reg:P SP_REG)) (match_dup 1))]) + (set (match_dup 0) (match_dup 1))] +{ + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) (define_insn "*pushsf_rex64" [(set (match_operand:SF 0 "push_operand" "=X,X,X") @@ -2747,8 +2757,12 @@ (match_operand:SF 1 "any_fp_register_operand"))] "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) - (set (mem:SF (reg:P SP_REG)) (match_dup 1))] - "operands[2] = GEN_INT (-<P:MODE_SIZE>);") + (set (match_dup 0) (match_dup 1))] +{ + operands[2] = GEN_INT (-<P:MODE_SIZE>); + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) (define_split [(set (match_operand:SF 0 "push_operand") Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 206946) +++ config/i386/sse.md (working copy) @@ -911,14 +911,6 @@ operands[2] = CONST0_RTX (DFmode); }) -(define_expand "push<mode>1" - [(match_operand:VMOVE 0 "register_operand")] - "TARGET_SSE" -{ - ix86_expand_push (<MODE>mode, operands[0]); - DONE; -}) - (define_expand "movmisalign<mode>" [(set (match_operand:VMOVE 0 "nonimmediate_operand") (match_operand:VMOVE 1 "nonimmediate_operand"))] Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 206946) +++ config/i386/i386.c (working copy) @@ -16795,6 +16795,9 @@ rtx op0 = operands[0], op1 = operands[1]; unsigned int align = GET_MODE_ALIGNMENT (mode); + if (push_operand (op0, VOIDmode)) + op0 = emit_move_resolve_push (mode, op0); + /* Force constants other than zero into memory. We do not know how the instructions used to build constants modify the upper 64 bits of the register, once we have that information we may be able @@ -17222,30 +17225,6 @@ gcc_unreachable (); } -/* Expand a push in MODE. This is some mode for which we do not support - proper push instructions, at least from the registers that we expect - the value to live in. */ - -void -ix86_expand_push (enum machine_mode mode, rtx x) -{ - rtx tmp; - - tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, - GEN_INT (-GET_MODE_SIZE (mode)), - stack_pointer_rtx, 1, OPTAB_DIRECT); - if (tmp != stack_pointer_rtx) - emit_move_insn (stack_pointer_rtx, tmp); - - tmp = gen_rtx_MEM (mode, stack_pointer_rtx); - - /* When we push an operand onto stack, it has to be aligned at least - at the function argument boundary. However since we don't have - the argument type, we can't determine the actual argument - boundary. */ - emit_move_insn (tmp, x); -} - /* Helper function of ix86_fixup_binary_operands to canonicalize operand order. Returns true if the operands should be swapped. */ Index: config/i386/mmx.md =================================================================== --- config/i386/mmx.md (revision 206946) +++ config/i386/mmx.md (working copy) @@ -213,14 +213,6 @@ [(const_int 0)] "ix86_split_long_move (operands); DONE;") -(define_expand "push<mode>1" - [(match_operand:MMXMODE 0 "register_operand")] - "TARGET_MMX" -{ - ix86_expand_push (<MODE>mode, operands[0]); - DONE; -}) - (define_expand "movmisalign<mode>" [(set (match_operand:MMXMODE 0 "nonimmediate_operand") (match_operand:MMXMODE 1 "nonimmediate_operand"))]