This patch allows us to recognize that even if the argument to memcpy
lives across the call, we can allocate it to a call-used register by
reusing the return value of the function.
First, the patch sets the existing "fn spec" attribute for
memcpy/memmove. This is translated to a new form of
CALL_INSN_FUNCTION_USAGE, a (set (returnreg) (argreg)). This is
recognized by IRA to adjust costs, and for communicating to caller-save
that the register can be restored cheaply.
The optimization only triggers if the argument is passed in a register,
which should be the case in the majority of sane ABIs. The effect on the
new testcase:
pushq %rbx | subq $8, %rsp
movslq %edx, %rdx movslq %edx, %rdx
movq %rdi, %rbx <
call memcpy call memcpy
movq %rbx, %rax | addq $8, %rsp
popq %rbx <
ret ret
Bootstrapped with all languages on i686-linux, and bootstrapped and
tested minus Ada on x86_64-linux. There's one Go test which seems to
fail randomly both with and without the patch:
FAIL: go.test/test/stack.go execution, -O2 -g
Ok?
Bernd
* attribs.c (decl_attributes): Avoid emitting a warning if
ATTR_FLAG_BUILT_IN.
* doc/rtl.texi (CALL_INSN_FUNCTION_USAGE): Use lowercase for rtx
codes. Document meaning of sets inside CALL_INSN_FUNCTION_USAGE.
* c-family/c-common.c (DEF_ATTR_STRING): Define and undefine as
necessary.
* builtin-attrs.def (DEF_ATTR_FOR_STRING): Define. Use it to
define a string "1".
(ATTR_RET1_NOTHROW_NONNULL_LEAF): New attr definition.
* builtins.def (BUILT_IN_MEMCPY, BUILT_IN_MEMMOVE): Use it for
these functions.
* postreload.c (reload_combine): Deal with SETs inside
CALL_INSN_FUNCTION_USAGE.
* caller-save.c (setup_save_areas, save_call_clobbered_regs):
Look for REG_RETURNED notes and use a cheap restore if possible.
* ira-int.h (struct ira_allocno): New member cheap_calls_crossed_num.
(ALLOCNO_CHEAP_CALLS_CROSSED_NUM): New macro.
* ira-lives.c (pseudo_regno_single_word_and_live_p): New static
function.
(process_bb_node_lives): Look for SETs in CALL_INSN_FUNCTION_USAGE,
and set ALLOCNO_CHEAP_CALLS_CROSSED_NUM if possible. Also make
a REG_RETURNED note in that case.
* ira.c (setup_reg_renumber): Change assert to allow cases where
allocnos only cross calls for which they are cheap to restore.
* ira-costs.c (ira_tune_allocno_costs): Compare
ALLOCNO_CALLS_CROSSED_NUM to ALLOCNO_CHEAP_CALLS_CROSSED_NUM rather
than 0.
* reg-notes.def (REG_RETURNED): New note.
* cse.c (cse_insn): Likewise.
* sched-deps.c (sched_analyze_insn): Likewise.
* expr.c (init_block_move_fn): Set a "fn spec" attribute.
* calls.c (decl_return_flags): New static function.
(expand_call): Generate a SET in CALL_INSN_FUNCTION_USAGE for
functions that return one of their arguments.
* lto/lto-lang.c (handle_fnspec_attribute): New static function.
(lto_attribute_table): Add "fn spec".
(DEF_ATTR_STRING): Define and undefine along with the other macros.
* regcprop.c (struct kill_set_value_data): New.
(kill_set_value): Interpret data as a pointer to such a struct.
Do nothing if the caller wants the register to be ignored.
(copyprop_hardreg_forward_1): Handle SETs in CALL_INSN_FUNCTION_USAGE.
testsuite/
* gcc.target/i386/retarg.c: New test.
Index: gcc/attribs.c
===================================================================
--- gcc/attribs.c (revision 186712)
+++ gcc/attribs.c (working copy)
@@ -312,8 +312,9 @@ decl_attributes (tree *node, tree attrib
if (spec == NULL)
{
- warning (OPT_Wattributes, "%qE attribute directive ignored",
- name);
+ if (!(flags & (int) ATTR_FLAG_BUILT_IN))
+ warning (OPT_Wattributes, "%qE attribute directive ignored",
+ name);
continue;
}
else if (list_length (args) < spec->min_length
Index: gcc/doc/rtl.texi
===================================================================
--- gcc/doc/rtl.texi (revision 186712)
+++ gcc/doc/rtl.texi (working copy)
@@ -3455,20 +3455,26 @@ unpredictably.
@code{call_insn} insns have the same extra fields as @code{insn} insns,
accessed in the same way and in addition contain a field
@code{CALL_INSN_FUNCTION_USAGE}, which contains a list (chain of
-@code{expr_list} expressions) containing @code{use} and @code{clobber}
-expressions that denote hard registers and @code{MEM}s used or
-clobbered by the called function.
+@code{expr_list} expressions) containing @code{use}, @code{clobber} and
+sometimes @code{set} expressions that denote hard registers and
+@code{mem}s used or clobbered by the called function.
-A @code{MEM} generally points to a stack slots in which arguments passed
+A @code{mem} generally points to a stack slots in which arguments passed
to the libcall by reference (@pxref{Register Arguments,
TARGET_PASS_BY_REFERENCE}) are stored. If the argument is
caller-copied (@pxref{Register Arguments, TARGET_CALLEE_COPIES}),
-the stack slot will be mentioned in @code{CLOBBER} and @code{USE}
-entries; if it's callee-copied, only a @code{USE} will appear, and the
-@code{MEM} may point to addresses that are not stack slots.
+the stack slot will be mentioned in @code{clobber} and @code{use}
+entries; if it's callee-copied, only a @code{use} will appear, and the
+@code{mem} may point to addresses that are not stack slots.
-@code{CLOBBER}ed registers in this list augment registers specified in
-@code{CALL_USED_REGISTERS} (@pxref{Register Basics}).
+Registers occurring inside a @code{clobber} in this list augment
+registers specified in @code{CALL_USED_REGISTERS} (@pxref{Register
+Basics}).
+
+If the list contains a @code{set} involving two registers, it indicates
+that the function returns one of its arguments. Such a @code{set} may
+look like a no-op if the same register holds the argument and the return
+value.
@findex code_label
@findex CODE_LABEL_NUMBER
Index: gcc/c-family/c-common.c
===================================================================
--- gcc/c-family/c-common.c (revision 186712)
+++ gcc/c-family/c-common.c (working copy)
@@ -4570,11 +4570,13 @@ enum built_in_attribute
{
#define DEF_ATTR_NULL_TREE(ENUM) ENUM,
#define DEF_ATTR_INT(ENUM, VALUE) ENUM,
+#define DEF_ATTR_STRING(ENUM, VALUE) ENUM,
#define DEF_ATTR_IDENT(ENUM, STRING) ENUM,
#define DEF_ATTR_TREE_LIST(ENUM, PURPOSE, VALUE, CHAIN) ENUM,
#include "builtin-attrs.def"
#undef DEF_ATTR_NULL_TREE
#undef DEF_ATTR_INT
+#undef DEF_ATTR_STRING
#undef DEF_ATTR_IDENT
#undef DEF_ATTR_TREE_LIST
ATTR_LAST
@@ -5893,6 +5895,8 @@ c_init_attributes (void)
built_in_attributes[(int) ENUM] = NULL_TREE;
#define DEF_ATTR_INT(ENUM, VALUE) \
built_in_attributes[(int) ENUM] = build_int_cst (integer_type_node, VALUE);
+#define DEF_ATTR_STRING(ENUM, VALUE) \
+ built_in_attributes[(int) ENUM] = build_string (strlen (VALUE), VALUE);
#define DEF_ATTR_IDENT(ENUM, STRING) \
built_in_attributes[(int) ENUM] = get_identifier (STRING);
#define DEF_ATTR_TREE_LIST(ENUM, PURPOSE, VALUE, CHAIN) \
Index: gcc/postreload.c
===================================================================
--- gcc/postreload.c (revision 186712)
+++ gcc/postreload.c (working copy)
@@ -1357,8 +1357,10 @@ reload_combine (void)
for (link = CALL_INSN_FUNCTION_USAGE (insn); link;
link = XEXP (link, 1))
{
- rtx usage_rtx = XEXP (XEXP (link, 0), 0);
- if (REG_P (usage_rtx))
+ rtx setuse = XEXP (link, 0);
+ rtx usage_rtx = XEXP (setuse, 0);
+ if ((GET_CODE (setuse) == USE || GET_CODE (setuse) == CLOBBER)
+ && REG_P (usage_rtx))
{
unsigned int i;
unsigned int start_reg = REGNO (usage_rtx);
Index: gcc/builtin-attrs.def
===================================================================
--- gcc/builtin-attrs.def (revision 186712)
+++ gcc/builtin-attrs.def (working copy)
@@ -59,6 +59,14 @@ DEF_ATTR_FOR_INT (5)
DEF_ATTR_FOR_INT (6)
#undef DEF_ATTR_FOR_INT
+/* Construct a tree for a given string and a list containing it. */
+#define DEF_ATTR_FOR_STRING(ENUM, VALUE) \
+ DEF_ATTR_STRING (ATTR_##ENUM, VALUE) \
+ DEF_ATTR_TREE_LIST (ATTR_LIST_##ENUM, ATTR_NULL, \
+ ATTR_##ENUM, ATTR_NULL)
+DEF_ATTR_FOR_STRING (STR1, "1")
+#undef DEF_ATTR_FOR_STRING
+
/* Construct a tree for a list of two integers. */
#define DEF_LIST_INT_INT(VALUE1, VALUE2) \
DEF_ATTR_TREE_LIST (ATTR_LIST_##VALUE1##_##VALUE2, ATTR_NULL, \
@@ -84,6 +92,7 @@ DEF_ATTR_IDENT (ATTR_NONNULL, "nonnull")
DEF_ATTR_IDENT (ATTR_NORETURN, "noreturn")
DEF_ATTR_IDENT (ATTR_NOTHROW, "nothrow")
DEF_ATTR_IDENT (ATTR_LEAF, "leaf")
+DEF_ATTR_IDENT (ATTR_FNSPEC, "fn spec")
DEF_ATTR_IDENT (ATTR_PRINTF, "printf")
DEF_ATTR_IDENT (ATTR_ASM_FPRINTF, "asm_fprintf")
DEF_ATTR_IDENT (ATTR_GCC_DIAG, "gcc_diag")
@@ -170,6 +179,10 @@ DEF_ATTR_TREE_LIST (ATTR_NOTHROW_NONNULL
/* Nothrow const functions whose pointer parameter(s) are all nonnull. */
DEF_ATTR_TREE_LIST (ATTR_CONST_NOTHROW_NONNULL, ATTR_CONST, ATTR_NULL, \
ATTR_NOTHROW_NONNULL)
+/* Nothrow leaf functions whose pointer parameter(s) are all nonnull,
+ and which return their first argument. */
+DEF_ATTR_TREE_LIST (ATTR_RET1_NOTHROW_NONNULL_LEAF, ATTR_FNSPEC, ATTR_LIST_STR1, \
+ ATTR_NOTHROW_NONNULL_LEAF)
/* Nothrow const leaf functions whose pointer parameter(s) are all nonnull. */
DEF_ATTR_TREE_LIST (ATTR_CONST_NOTHROW_NONNULL_LEAF, ATTR_CONST, ATTR_NULL, \
ATTR_NOTHROW_NONNULL_LEAF)
Index: gcc/testsuite/gcc.target/i386/retarg.c
===================================================================
--- gcc/testsuite/gcc.target/i386/retarg.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/retarg.c (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target lp64 } */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+
+void *p (void *x, void *y, int z)
+{
+ memcpy (x, y, z);
+ return x;
+}
+
+/* { dg-final { scan-assembler-not "%rdi" } } */
Index: gcc/builtins.def
===================================================================
--- gcc/builtins.def (revision 186712)
+++ gcc/builtins.def (working copy)
@@ -532,8 +532,8 @@ DEF_EXT_LIB_BUILTIN (BUILT_IN_BZERO,
DEF_EXT_LIB_BUILTIN (BUILT_IN_INDEX, "index", BT_FN_STRING_CONST_STRING_INT, ATTR_PURE_NOTHROW_NONNULL_LEAF)
DEF_LIB_BUILTIN (BUILT_IN_MEMCHR, "memchr", BT_FN_PTR_CONST_PTR_INT_SIZE, ATTR_PURE_NOTHROW_NONNULL_LEAF)
DEF_LIB_BUILTIN (BUILT_IN_MEMCMP, "memcmp", BT_FN_INT_CONST_PTR_CONST_PTR_SIZE, ATTR_PURE_NOTHROW_NONNULL_LEAF)
-DEF_LIB_BUILTIN (BUILT_IN_MEMCPY, "memcpy", BT_FN_PTR_PTR_CONST_PTR_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
-DEF_LIB_BUILTIN (BUILT_IN_MEMMOVE, "memmove", BT_FN_PTR_PTR_CONST_PTR_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
+DEF_LIB_BUILTIN (BUILT_IN_MEMCPY, "memcpy", BT_FN_PTR_PTR_CONST_PTR_SIZE, ATTR_RET1_NOTHROW_NONNULL_LEAF)
+DEF_LIB_BUILTIN (BUILT_IN_MEMMOVE, "memmove", BT_FN_PTR_PTR_CONST_PTR_SIZE, ATTR_RET1_NOTHROW_NONNULL_LEAF)
DEF_EXT_LIB_BUILTIN (BUILT_IN_MEMPCPY, "mempcpy", BT_FN_PTR_PTR_CONST_PTR_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
DEF_LIB_BUILTIN (BUILT_IN_MEMSET, "memset", BT_FN_PTR_PTR_INT_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
DEF_EXT_LIB_BUILTIN (BUILT_IN_RINDEX, "rindex", BT_FN_STRING_CONST_STRING_INT, ATTR_PURE_NOTHROW_NONNULL_LEAF)
@@ -757,8 +757,8 @@ DEF_BUILTIN_STUB (BUILT_IN_ALLOCA_WITH_A
/* Object size checking builtins. */
DEF_GCC_BUILTIN (BUILT_IN_OBJECT_SIZE, "object_size", BT_FN_SIZE_CONST_PTR_INT, ATTR_PURE_NOTHROW_LEAF_LIST)
-DEF_EXT_LIB_BUILTIN (BUILT_IN_MEMCPY_CHK, "__memcpy_chk", BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
-DEF_EXT_LIB_BUILTIN (BUILT_IN_MEMMOVE_CHK, "__memmove_chk", BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
+DEF_EXT_LIB_BUILTIN (BUILT_IN_MEMCPY_CHK, "__memcpy_chk", BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE, ATTR_RET1_NOTHROW_NONNULL_LEAF)
+DEF_EXT_LIB_BUILTIN (BUILT_IN_MEMMOVE_CHK, "__memmove_chk", BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE, ATTR_RET1_NOTHROW_NONNULL_LEAF)
DEF_EXT_LIB_BUILTIN (BUILT_IN_MEMPCPY_CHK, "__mempcpy_chk", BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
DEF_EXT_LIB_BUILTIN (BUILT_IN_MEMSET_CHK, "__memset_chk", BT_FN_PTR_PTR_INT_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
DEF_EXT_LIB_BUILTIN (BUILT_IN_STPCPY_CHK, "__stpcpy_chk", BT_FN_STRING_STRING_CONST_STRING_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
Index: gcc/caller-save.c
===================================================================
--- gcc/caller-save.c (revision 186712)
+++ gcc/caller-save.c (working copy)
@@ -433,6 +433,8 @@ setup_save_areas (void)
/* Create hard reg saved regs. */
for (chain = reload_insn_chain; chain != 0; chain = next)
{
+ rtx cheap;
+
insn = chain->insn;
next = chain->next;
if (!CALL_P (insn)
@@ -466,6 +468,9 @@ setup_save_areas (void)
new_saved_hard_reg (regno, freq);
SET_HARD_REG_BIT (hard_regs_used, regno);
}
+ cheap = find_reg_note (insn, REG_RETURNED, NULL);
+ if (cheap)
+ cheap = XEXP (cheap, 0);
/* Look through all live pseudos, mark their hard registers. */
EXECUTE_IF_SET_IN_REG_SET
(&chain->live_throughout, FIRST_PSEUDO_REGISTER, regno, rsi)
@@ -473,7 +478,7 @@ setup_save_areas (void)
int r = reg_renumber[regno];
int bound;
- if (r < 0)
+ if (r < 0 || regno_reg_rtx[regno] == cheap)
continue;
bound = r + hard_regno_nregs[r][PSEUDO_REGNO_MODE (regno)];
@@ -508,12 +513,18 @@ setup_save_areas (void)
memset (saved_reg_conflicts, 0, saved_regs_num * saved_regs_num);
for (chain = reload_insn_chain; chain != 0; chain = next)
{
+ rtx cheap;
call_saved_regs_num = 0;
insn = chain->insn;
next = chain->next;
if (!CALL_P (insn)
|| find_reg_note (insn, REG_NORETURN, NULL))
continue;
+
+ cheap = find_reg_note (insn, REG_RETURNED, NULL);
+ if (cheap)
+ cheap = XEXP (cheap, 0);
+
REG_SET_TO_HARD_REG_SET (hard_regs_to_save,
&chain->live_throughout);
COPY_HARD_REG_SET (used_regs, call_used_reg_set);
@@ -546,7 +557,7 @@ setup_save_areas (void)
int r = reg_renumber[regno];
int bound;
- if (r < 0)
+ if (r < 0 || regno_reg_rtx[regno] == cheap)
continue;
bound = r + hard_regno_nregs[r][PSEUDO_REGNO_MODE (regno)];
@@ -796,6 +807,11 @@ save_call_clobbered_regs (void)
unsigned regno;
HARD_REG_SET hard_regs_to_save;
reg_set_iterator rsi;
+ rtx cheap;
+
+ cheap = find_reg_note (insn, REG_RETURNED, NULL);
+ if (cheap)
+ cheap = XEXP (cheap, 0);
/* Use the register life information in CHAIN to compute which
regs are live during the call. */
@@ -817,7 +833,7 @@ save_call_clobbered_regs (void)
int nregs;
enum machine_mode mode;
- if (r < 0)
+ if (r < 0 || regno_reg_rtx[regno] == cheap)
continue;
nregs = hard_regno_nregs[r][PSEUDO_REGNO_MODE (regno)];
mode = HARD_REGNO_CALLER_SAVE_MODE
@@ -851,6 +867,17 @@ save_call_clobbered_regs (void)
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (TEST_HARD_REG_BIT (hard_regs_saved, regno))
n_regs_saved++;
+
+ if (cheap
+ && HARD_REGISTER_P (cheap)
+ && TEST_HARD_REG_BIT (call_used_reg_set, REGNO (cheap)))
+ {
+ rtx call_set = single_set (insn);
+ rtx dest = SET_DEST (call_set);
+ rtx pat = gen_rtx_SET (VOIDmode, cheap,
+ copy_rtx (dest));
+ chain = insert_one_insn (chain, 0, -1, pat);
+ }
}
last = chain;
}
Index: gcc/ira-int.h
===================================================================
--- gcc/ira-int.h (revision 186712)
+++ gcc/ira-int.h (working copy)
@@ -376,6 +376,9 @@ struct ira_allocno
int call_freq;
/* Accumulated number of the intersected calls. */
int calls_crossed_num;
+ /* The number of calls across which it is live, but which should not
+ affect register preferences. */
+ int cheap_calls_crossed_num;
/* Array of usage costs (accumulated and the one updated during
coloring) for each hard register of the allocno class. The
member value can be NULL if all costs are the same and equal to
@@ -418,6 +421,7 @@ struct ira_allocno
#define ALLOCNO_HARD_REGNO(A) ((A)->hard_regno)
#define ALLOCNO_CALL_FREQ(A) ((A)->call_freq)
#define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num)
+#define ALLOCNO_CHEAP_CALLS_CROSSED_NUM(A) ((A)->cheap_calls_crossed_num)
#define ALLOCNO_MEM_OPTIMIZED_DEST(A) ((A)->mem_optimized_dest)
#define ALLOCNO_MEM_OPTIMIZED_DEST_P(A) ((A)->mem_optimized_dest_p)
#define ALLOCNO_SOMEWHERE_RENAMED_P(A) ((A)->somewhere_renamed_p)
Index: gcc/cse.c
===================================================================
--- gcc/cse.c (revision 186712)
+++ gcc/cse.c (working copy)
@@ -4312,7 +4312,8 @@ canonicalize_insn (rtx insn, struct set
if (CALL_P (insn))
{
for (tem = CALL_INSN_FUNCTION_USAGE (insn); tem; tem = XEXP (tem, 1))
- XEXP (tem, 0) = canon_reg (XEXP (tem, 0), insn);
+ if (GET_CODE (XEXP (tem, 0)) != SET)
+ XEXP (tem, 0) = canon_reg (XEXP (tem, 0), insn);
}
if (GET_CODE (x) == SET && GET_CODE (SET_SRC (x)) == CALL)
Index: gcc/expr.c
===================================================================
--- gcc/expr.c (revision 186877)
+++ gcc/expr.c (working copy)
@@ -1383,7 +1383,7 @@ init_block_move_fn (const char *asmspec)
{
if (!block_move_fn)
{
- tree args, fn;
+ tree args, fn, attrs, attr_args;
fn = get_identifier ("memcpy");
args = build_function_type_list (ptr_type_node, ptr_type_node,
@@ -1398,6 +1398,11 @@ init_block_move_fn (const char *asmspec)
DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
DECL_VISIBILITY_SPECIFIED (fn) = 1;
+ attr_args = build_tree_list (NULL_TREE, build_string (1, "1"));
+ attrs = tree_cons (get_identifier ("fn spec"), attr_args, NULL);
+
+ decl_attributes (&fn, attrs, ATTR_FLAG_BUILT_IN);
+
block_move_fn = fn;
}
@@ -8017,10 +8022,7 @@ expand_expr_real_2 (sepops ops, rtx targ
|| DECL_RTL (treeop1) == stack_pointer_rtx
|| DECL_RTL (treeop1) == arg_pointer_rtx))
{
- tree t = treeop1;
-
- treeop1 = TREE_OPERAND (treeop0, 0);
- TREE_OPERAND (treeop0, 0) = t;
+ gcc_unreachable ();
}
/* If the result is to be ptr_mode and we are adding an integer to
Index: gcc/ada/gcc-interface/utils.c
===================================================================
--- gcc/ada/gcc-interface/utils.c (revision 186712)
+++ gcc/ada/gcc-interface/utils.c (working copy)
@@ -5171,11 +5171,13 @@ enum built_in_attribute
{
#define DEF_ATTR_NULL_TREE(ENUM) ENUM,
#define DEF_ATTR_INT(ENUM, VALUE) ENUM,
+#define DEF_ATTR_STRING(ENUM, VALUE) ENUM,
#define DEF_ATTR_IDENT(ENUM, STRING) ENUM,
#define DEF_ATTR_TREE_LIST(ENUM, PURPOSE, VALUE, CHAIN) ENUM,
#include "builtin-attrs.def"
#undef DEF_ATTR_NULL_TREE
#undef DEF_ATTR_INT
+#undef DEF_ATTR_STRING
#undef DEF_ATTR_IDENT
#undef DEF_ATTR_TREE_LIST
ATTR_LAST
@@ -5191,6 +5193,8 @@ install_builtin_attributes (void)
built_in_attributes[(int) ENUM] = NULL_TREE;
#define DEF_ATTR_INT(ENUM, VALUE) \
built_in_attributes[(int) ENUM] = build_int_cst (NULL_TREE, VALUE);
+#define DEF_ATTR_STRING(ENUM, VALUE) \
+ built_in_attributes[(int) ENUM] = build_string (strlen (VALUE), VALUE);
#define DEF_ATTR_IDENT(ENUM, STRING) \
built_in_attributes[(int) ENUM] = get_identifier (STRING);
#define DEF_ATTR_TREE_LIST(ENUM, PURPOSE, VALUE, CHAIN) \
@@ -5201,6 +5205,7 @@ install_builtin_attributes (void)
#include "builtin-attrs.def"
#undef DEF_ATTR_NULL_TREE
#undef DEF_ATTR_INT
+#undef DEF_ATTR_STRING
#undef DEF_ATTR_IDENT
#undef DEF_ATTR_TREE_LIST
}
Index: gcc/ira-lives.c
===================================================================
--- gcc/ira-lives.c (revision 186712)
+++ gcc/ira-lives.c (working copy)
@@ -241,6 +241,24 @@ dec_register_pressure (enum reg_class pc
}
}
+/* Determine from the objects_live bitmap whether REGNO is currently live,
+ and occupies only one object. Return false if we have no information. */
+static bool
+pseudo_regno_single_word_and_live_p (int regno)
+{
+ ira_allocno_t a = ira_curr_regno_allocno_map[regno];
+ ira_object_t obj;
+
+ if (a == NULL)
+ return false;
+ if (ALLOCNO_NUM_OBJECTS (a) > 1)
+ return false;
+
+ obj = ALLOCNO_OBJECT (a, 0);
+
+ return sparseset_bit_p (objects_live, OBJECT_CONFLICT_ID (obj));
+}
+
/* Mark the pseudo register REGNO as live. Update all information about
live ranges and register pressure. */
static void
@@ -1185,6 +1203,62 @@ process_bb_node_lives (ira_loop_tree_nod
if (call_p)
{
+ /* Try to find a SET in the CALL_INSN_FUNCTION_USAGE, and from
+ there, try to find a pseudo that is live across the call but
+ can be cheaply reconstructed from the return value. */
+ rtx exp = CALL_INSN_FUNCTION_USAGE (insn);
+ rtx cheap_reg = NULL_RTX;
+ while (exp != NULL)
+ {
+ rtx x = XEXP (exp, 0);
+ if (GET_CODE (x) == SET)
+ {
+ exp = x;
+ break;
+ }
+ exp = XEXP (exp, 1);
+ }
+ if (exp != NULL)
+ {
+ rtx reg = SET_SRC (exp);
+ rtx prev = PREV_INSN (insn);
+ while (prev && !(INSN_P (prev)
+ && BLOCK_FOR_INSN (prev) != bb))
+ {
+ if (NONDEBUG_INSN_P (prev))
+ {
+ rtx set = single_set (prev);
+
+ if (set && rtx_equal_p (SET_DEST (set), reg))
+ {
+ rtx src = SET_SRC (set);
+ if (!REG_P (src) || HARD_REGISTER_P (src)
+ || !pseudo_regno_single_word_and_live_p (REGNO (src)))
+ break;
+ if (!modified_between_p (src, prev, insn))
+ cheap_reg = src;
+ break;
+ }
+ if (set && rtx_equal_p (SET_SRC (set), reg))
+ {
+ rtx dest = SET_DEST (set);
+ if (!REG_P (dest) || HARD_REGISTER_P (dest)
+ || !pseudo_regno_single_word_and_live_p (REGNO (dest)))
+ break;
+ if (!modified_between_p (dest, prev, insn))
+ cheap_reg = dest;
+ break;
+ }
+
+ if (reg_overlap_mentioned_p (reg, PATTERN (prev)))
+ break;
+ }
+ prev = PREV_INSN (prev);
+ }
+ }
+ if (cheap_reg != NULL_RTX)
+ add_reg_note (insn, REG_RETURNED, cheap_reg);
+
last_call_num++;
sparseset_clear (allocnos_processed);
/* The current set of live allocnos are live across the call. */
@@ -1226,6 +1300,9 @@ process_bb_node_lives (ira_loop_tree_nod
/* Mark it as saved at the next call. */
allocno_saved_at_call[num] = last_call_num + 1;
ALLOCNO_CALLS_CROSSED_NUM (a)++;
+ if (cheap_reg != NULL_RTX
+ && ALLOCNO_REGNO (a) == (int) REGNO (cheap_reg))
+ ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a)++;
}
}
Index: gcc/ira-build.c
===================================================================
--- gcc/ira-build.c (revision 186712)
+++ gcc/ira-build.c (working copy)
@@ -508,6 +508,7 @@ ira_create_allocno (int regno, bool cap_
ALLOCNO_HARD_REGNO (a) = -1;
ALLOCNO_CALL_FREQ (a) = 0;
ALLOCNO_CALLS_CROSSED_NUM (a) = 0;
+ ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a) = 0;
#ifdef STACK_REGS
ALLOCNO_NO_STACK_REG_P (a) = false;
ALLOCNO_TOTAL_NO_STACK_REG_P (a) = false;
Index: gcc/calls.c
===================================================================
--- gcc/calls.c (revision 186712)
+++ gcc/calls.c (working copy)
@@ -574,6 +574,41 @@ special_function_p (const_tree fndecl, i
return flags;
}
+/* Similar to special_function_p; return a set of ERF_ flags for the
+ function FNDECL. */
+static int
+decl_return_flags (tree fndecl)
+{
+ tree attr;
+ tree type = TREE_TYPE (fndecl);
+ if (!type)
+ return 0;
+
+ attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type));
+ if (!attr)
+ return 0;
+
+ attr = TREE_VALUE (TREE_VALUE (attr));
+ if (!attr || TREE_STRING_LENGTH (attr) < 1)
+ return 0;
+
+ switch (TREE_STRING_POINTER (attr)[0])
+ {
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ return ERF_RETURNS_ARG | (TREE_STRING_POINTER (attr)[0] - '1');
+
+ case 'm':
+ return ERF_NOALIAS;
+
+ case '.':
+ default:
+ return 0;
+ }
+}
+
/* Return nonzero when FNDECL represents a call to setjmp. */
int
@@ -2249,8 +2284,9 @@ expand_call (tree exp, rtx target, int i
(on machines that lack push insns), or 0 if space not preallocated. */
rtx argblock = 0;
- /* Mask of ECF_ flags. */
+ /* Mask of ECF_ and ERF_ flags. */
int flags = 0;
+ int return_flags = 0;
#ifdef REG_PARM_STACK_SPACE
/* Define the boundary of the register parm stack space that needs to be
saved, if any. */
@@ -2295,6 +2331,7 @@ expand_call (tree exp, rtx target, int i
{
fntype = TREE_TYPE (fndecl);
flags |= flags_from_decl_or_type (fndecl);
+ return_flags |= decl_return_flags (fndecl);
}
else
{
@@ -3107,6 +3144,20 @@ expand_call (tree exp, rtx target, int i
VOIDmode, void_type_node,
true);
+ if (pass == 1 && (return_flags & ERF_RETURNS_ARG))
+ {
+ int arg_nr = return_flags & ERF_RETURN_ARG_MASK;
+ if (PUSH_ARGS_REVERSED)
+ arg_nr = num_actuals - arg_nr - 1;
+ if (args[arg_nr].reg
+ && valreg
+ && REG_P (valreg)
+ && GET_MODE (args[arg_nr].reg) == GET_MODE (valreg))
+ call_fusage
+ = gen_rtx_EXPR_LIST (TYPE_MODE (TREE_TYPE (args[arg_nr].tree_value)),
+ gen_rtx_SET (VOIDmode, valreg, args[arg_nr].reg),
+ call_fusage);
+ }
/* All arguments and registers used for the call must be set up by
now! */
Index: gcc/lto/lto-lang.c
===================================================================
--- gcc/lto/lto-lang.c (revision 186712)
+++ gcc/lto/lto-lang.c (working copy)
@@ -53,6 +53,7 @@ static tree handle_returns_twice_attribu
static tree ignore_attribute (tree *, tree, tree, int, bool *);
static tree handle_format_attribute (tree *, tree, tree, int, bool *);
+static tree handle_fnspec_attribute (tree *, tree, tree, int, bool *);
static tree handle_format_arg_attribute (tree *, tree, tree, int, bool *);
/* Table of machine-independent attributes supported in GIMPLE. */
@@ -83,6 +84,8 @@ const struct attribute_spec lto_attribut
handle_sentinel_attribute, false },
{ "type generic", 0, 0, false, true, true,
handle_type_generic_attribute, false },
+ { "fn spec", 1, 1, false, true, true,
+ handle_fnspec_attribute, false },
{ "transaction_pure", 0, 0, false, true, true,
handle_transaction_pure_attribute, false },
/* For internal use only. The leading '*' both prevents its usage in
@@ -110,11 +113,13 @@ enum built_in_attribute
{
#define DEF_ATTR_NULL_TREE(ENUM) ENUM,
#define DEF_ATTR_INT(ENUM, VALUE) ENUM,
+#define DEF_ATTR_STRING(ENUM, VALUE) ENUM,
#define DEF_ATTR_IDENT(ENUM, STRING) ENUM,
#define DEF_ATTR_TREE_LIST(ENUM, PURPOSE, VALUE, CHAIN) ENUM,
#include "builtin-attrs.def"
#undef DEF_ATTR_NULL_TREE
#undef DEF_ATTR_INT
+#undef DEF_ATTR_STRING
#undef DEF_ATTR_IDENT
#undef DEF_ATTR_TREE_LIST
ATTR_LAST
@@ -483,6 +488,20 @@ handle_format_arg_attribute (tree * ARG_
}
+/* Handle a "fn spec" attribute; arguments as in
+ struct attribute_spec.handler. */
+
+static tree
+handle_fnspec_attribute (tree *node ATTRIBUTE_UNUSED, tree ARG_UNUSED (name),
+ tree args, int ARG_UNUSED (flags),
+ bool *no_add_attrs ATTRIBUTE_UNUSED)
+{
+ gcc_assert (args
+ && TREE_CODE (TREE_VALUE (args)) == STRING_CST
+ && !TREE_CHAIN (args));
+ return NULL_TREE;
+}
+
/* Cribbed from c-common.c. */
static void
@@ -568,6 +587,8 @@ lto_init_attributes (void)
built_in_attributes[(int) ENUM] = NULL_TREE;
#define DEF_ATTR_INT(ENUM, VALUE) \
built_in_attributes[(int) ENUM] = build_int_cst (NULL_TREE, VALUE);
+#define DEF_ATTR_STRING(ENUM, VALUE) \
+ built_in_attributes[(int) ENUM] = build_string (strlen (VALUE), VALUE);
#define DEF_ATTR_IDENT(ENUM, STRING) \
built_in_attributes[(int) ENUM] = get_identifier (STRING);
#define DEF_ATTR_TREE_LIST(ENUM, PURPOSE, VALUE, CHAIN) \
@@ -578,6 +599,7 @@ lto_init_attributes (void)
#include "builtin-attrs.def"
#undef DEF_ATTR_NULL_TREE
#undef DEF_ATTR_INT
+#undef DEF_ATTR_STRING
#undef DEF_ATTR_IDENT
#undef DEF_ATTR_TREE_LIST
}
Index: gcc/ira.c
===================================================================
--- gcc/ira.c (revision 186875)
+++ gcc/ira.c (working copy)
@@ -1962,6 +1962,8 @@ setup_reg_renumber (void)
call_used_reg_set))
{
ira_assert (!optimize || flag_caller_saves
+ || (ALLOCNO_CALLS_CROSSED_NUM (a)
+ == ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a))
|| regno >= ira_reg_equiv_len
|| ira_reg_equiv_const[regno]
|| ira_reg_equiv_invariant_p[regno]);
Index: gcc/sched-deps.c
===================================================================
--- gcc/sched-deps.c (revision 186712)
+++ gcc/sched-deps.c (working copy)
@@ -2871,7 +2871,7 @@ sched_analyze_insn (struct deps_desc *de
{
if (GET_CODE (XEXP (link, 0)) == CLOBBER)
sched_analyze_1 (deps, XEXP (link, 0), insn);
- else
+ else if (GET_CODE (XEXP (link, 0)) != SET)
sched_analyze_2 (deps, XEXP (link, 0), insn);
}
/* Don't schedule anything after a tail call, tail call needs
Index: gcc/ira-costs.c
===================================================================
--- gcc/ira-costs.c (revision 186712)
+++ gcc/ira-costs.c (working copy)
@@ -2107,7 +2107,8 @@ ira_tune_allocno_costs (void)
mode = ALLOCNO_MODE (a);
n = ira_class_hard_regs_num[aclass];
min_cost = INT_MAX;
- if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0)
+ if (ALLOCNO_CALLS_CROSSED_NUM (a)
+ != ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a))
{
ira_allocate_and_set_costs
(&ALLOCNO_HARD_REG_COSTS (a), aclass,
Index: gcc/reg-notes.def
===================================================================
--- gcc/reg-notes.def (revision 186712)
+++ gcc/reg-notes.def (working copy)
@@ -212,3 +212,8 @@ REG_NOTE (TM)
for pushed arguments. This will only be generated when
ACCUMULATE_OUTGOING_ARGS is false. */
REG_NOTE (ARGS_SIZE)
+
+/* Used for communication between IRA and caller-save.c, indicates
+ that the return value of a call can be used to reinitialize a
+ pseudo reg. */
+REG_NOTE (RETURNED)
Index: gcc/regcprop.c
===================================================================
--- gcc/regcprop.c (revision 186712)
+++ gcc/regcprop.c (working copy)
@@ -254,18 +254,27 @@ kill_clobbered_value (rtx x, const_rtx s
kill_value (x, vd);
}
+/* A structure passed as data to kill_set_value through note_stores. */
+struct kill_set_value_data
+{
+ struct value_data *vd;
+ rtx ignore_set_reg;
+};
+
/* Called through note_stores. If X is set, not clobbered, kill its
current value and install it as the root of its own value list. */
static void
kill_set_value (rtx x, const_rtx set, void *data)
{
- struct value_data *const vd = (struct value_data *) data;
+ struct kill_set_value_data *ksvd = (struct kill_set_value_data *) data;
+ if (rtx_equal_p (x, ksvd->ignore_set_reg))
+ return;
if (GET_CODE (set) != CLOBBER)
{
- kill_value (x, vd);
+ kill_value (x, ksvd->vd);
if (REG_P (x))
- set_value_regno (REGNO (x), GET_MODE (x), vd);
+ set_value_regno (REGNO (x), GET_MODE (x), ksvd->vd);
}
}
@@ -743,6 +752,7 @@ copyprop_hardreg_forward_1 (basic_block
rtx set;
bool replaced[MAX_RECOG_OPERANDS];
bool changed = false;
+ struct kill_set_value_data ksvd;
if (!NONDEBUG_INSN_P (insn))
{
@@ -976,14 +986,39 @@ copyprop_hardreg_forward_1 (basic_block
note_uses (&PATTERN (insn), cprop_find_used_regs, vd);
}
+ ksvd.vd = vd;
+ ksvd.ignore_set_reg = NULL_RTX;
+
/* Clobber call-clobbered registers. */
if (CALL_P (insn))
- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
- if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i))
- kill_value_regno (i, 1, vd);
+ {
+ int set_regno = INVALID_REGNUM;
+ int set_nregs = 0;
+ rtx exp;
+ for (exp = CALL_INSN_FUNCTION_USAGE (insn); exp; exp = XEXP (exp, 1))
+ {
+ rtx x = XEXP (exp, 0);
+ if (GET_CODE (x) == SET)
+ {
+ rtx dest = SET_DEST (x);
+ kill_value (dest, vd);
+ set_value_regno (REGNO (dest), GET_MODE (dest), vd);
+ copy_value (dest, SET_SRC (x), vd);
+ ksvd.ignore_set_reg = dest;
+ set_regno = REGNO (dest);
+ set_nregs
+ = hard_regno_nregs[set_regno][GET_MODE (dest)];
+ break;
+ }
+ }
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
+ && (i < set_regno || i >= set_regno + set_nregs))
+ kill_value_regno (i, 1, vd);
+ }
/* Notice stores. */
- note_stores (PATTERN (insn), kill_set_value, vd);
+ note_stores (PATTERN (insn), kill_set_value, &ksvd);
/* Notice copies. */
if (set && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))