Hi, This patch adds a small improvement about sibling tail-calls. We producing without that patch an useless load of address into register. See for this the testcases sibcall-1.c, and sibcall-3.c. The testcase sibcall-3.c is just an demonstration about other missed opportunities for sibcall tail-calls and I added it to the testcases for completion. I test here just 32-bit to make testcases simple. Nevertheless all those tests apply in general to 64-bit, too.
This patch addresses partial the general issue shown in PR 60104. I will sent with follow-up patches for it. ChangeLog 2014-05-22 Kai Tietz <kti...@redhat.com> PR target/60104 * config/i386/i386-protos.h (x86_sibcall_memory_p): New prototype. * config/i386/i386.md (!SIBLING_CALL_P): Add alternative check for x86_sibcall_memory_p. * config/i386/i386.c (x86_sibcall_memory_p): New function. (x86_sibcall_memory_p_1): New helper function. (x86_output_mi_thunk): Allow memory if x86_sibcall_memory_p is true. 2014-05-22 Kai Tietz <kti...@redhat.com> PR target/60104 * gcc.target/i386/sibcall-1.c: New test. * gcc.target/i386/sibcall-2.c: New test. * gcc.target/i386/sibcall-3.c: New test. Regression-tested for x86_64-unknown-linux-gnu (multilib), x86_64-w64-mingw32, and i686-pc-cygwin. Ok for apply? Index: gcc/config/i386/i386-protos.h =================================================================== --- gcc/config/i386/i386-protos.h (revision 210716) +++ gcc/config/i386/i386-protos.h (working copy) @@ -303,6 +303,7 @@ extern enum attr_cpu ix86_schedule; #endif extern const char * ix86_output_call_insn (rtx insn, rtx call_op); +extern bool x86_sibcall_memory_p (rtx insn); #ifdef RTX_CODE /* Target data for multipass lookahead scheduling. Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 210716) +++ gcc/config/i386/i386.c (working copy) @@ -38752,6 +38752,69 @@ x86_can_output_mi_thunk (const_tree thunk ATTRIBUT return true; } +/* Helper routine of x86_sibcall_memory_p. + + RFLAGS is used to prevent to recurse on PLUS, and MULT expressions. */ + +static bool +x86_sibcall_memory_p_1 (rtx op, int rflags) +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF: + return true; + case MULT: + if (rflags & 2) + return false; + + rflags |= 2; /* Mark to prevent recursion on MULT. */ + + if (x86_sibcall_memory_p_1 (XEXP (op, 0), rflags) + && x86_sibcall_memory_p_1 (XEXP (op, 1), rflags)) + return true; + break; + case PLUS: + if (rflags & 1) + return false; + + rflags |= 1; /* Makr to prevent recursion on PLUS. */ + + if (x86_sibcall_memory_p_1 (XEXP (op, 0), rflags) + && x86_sibcall_memory_p_1 (XEXP (op, 1), rflags)) + return true; + break; + default: + if (REG_P (op)) + { + op = GET_CODE (op) == SUBREG ? SUBREG_REG (op) : op; + return REGNO (op) == AX_REG; + } + else if (CONSTANT_P (op)) + return true; + break; + } + + return false; +} + +/* Function returns TRUE if operand OP is an memory based on a + symbol-reference or based on accumulator-register. + Otherwise it returns FALSE. */ + +bool +x86_sibcall_memory_p (rtx op) +{ + if (!MEM_P (op)) + return false; + + op = XEXP (op, 0); + + if (GET_CODE (op) == CONST) + op = XEXP (op, 0); + + return x86_sibcall_memory_p_1 (op, 0); +} + /* Output the assembler code for a thunk function. THUNK_DECL is the declaration for the thunk function itself, FUNCTION is the decl for the target function. DELTA is an immediate constant offset to be @@ -38891,7 +38954,16 @@ x86_output_mi_thunk (FILE *file, For our purposes here, we can get away with (ab)using a jump pattern, because we're going to do no optimization. */ if (MEM_P (fnaddr)) - emit_jump_insn (gen_indirect_jump (fnaddr)); + { + if (x86_sibcall_memory_p (fnaddr)) + { + tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx); + tmp = emit_call_insn (tmp); + SIBLING_CALL_P (tmp) = 1; + } + else + emit_jump_insn (gen_indirect_jump (fnaddr)); + } else { if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) Index: gcc/config/i386/i386.md =================================================================== --- gcc/config/i386/i386.md (revision 210716) +++ gcc/config/i386/i386.md (working copy) @@ -11362,7 +11362,7 @@ (define_insn "*call" [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>zw")) (match_operand 1))] - "!SIBLING_CALL_P (insn)" + "!SIBLING_CALL_P (insn) || x86_sibcall_memory_p (operands[0])" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) @@ -11371,7 +11371,7 @@ [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzw")) (match_operand 1)) (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])] - "TARGET_64BIT && !SIBLING_CALL_P (insn)" + "TARGET_64BIT && (!SIBLING_CALL_P (insn) || x86_sibcall_memory_p (operands[0]))" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) @@ -11401,7 +11401,7 @@ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 2 "immediate_operand" "i")))] - "!TARGET_64BIT && !SIBLING_CALL_P (insn)" + "!TARGET_64BIT && (!SIBLING_CALL_P (insn) || x86_sibcall_memory_p (operands[0]))" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) @@ -11445,7 +11445,7 @@ [(set (match_operand 0) (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>zw")) (match_operand 2)))] - "!SIBLING_CALL_P (insn)" + "!SIBLING_CALL_P (insn) || x86_sibcall_memory_p (operands[1])" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) @@ -11463,7 +11463,7 @@ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzw")) (match_operand 2))) (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])] - "TARGET_64BIT && !SIBLING_CALL_P (insn)" + "TARGET_64BIT && (!SIBLING_CALL_P (insn) || x86_sibcall_memory_p (operands[1]))" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) @@ -11488,7 +11488,7 @@ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "i")))] - "!TARGET_64BIT && !SIBLING_CALL_P (insn)" + "!TARGET_64BIT && (!SIBLING_CALL_P (insn) || x86_sibcall_memory_p (operands[1]))" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) Index: gcc.target/i386/sibcall-1.c =================================================================== --- gcc.target/i386/sibcall-1.c (revision 0) +++ gcc.target/i386/sibcall-1.c (working copy) @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ia32 } */ +/* { dg-options "-O2" } */ + +extern int (*foo)(int); + +int boo (int a) +{ + return (*foo) (a); +} + +/* { dg-final { scan-assembler-not "mov" } } */ Index: gcc.target/i386/sibcall-2.c =================================================================== --- gcc.target/i386/sibcall-2.c (revision 0) +++ gcc.target/i386/sibcall-2.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile { xfail { *-*-* } } } */ +/* { dg-require-effective-target ia32 } */ +/* { dg-options "-O2" } */ + +extern int doo1 (int); +extern int doo2 (int); +extern void bar (char *); + +int foo (int a) +{ + char s[256]; + bar (s); + return (a < 0 ? doo1 : doo2) (a); +} + +/* { dg-final { scan-assembler-not "call[ \t]*.%eax" } } */ Index: gcc.target/i386/sibcall-3.c =================================================================== --- gcc.target/i386/sibcall-3.c (revision 0) +++ gcc.target/i386/sibcall-3.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ia32 } */ +/* { dg-options "-O2" } */ + +extern +#ifdef _WIN32 + __declspec (dllimport) +#endif + void foo (int a); + +void bar (int a) +{ + return foo (a); +} + +/* { dg-final { scan-assembler-not "jmp[ \t]*.%eax" } } */