Hello, this is an update on the LTO pass we've been working on. The optimization is called ipa-initcall-cp because it propagates constant values written to variables with static lifetimes (such as ones initialized in initialization functions).
This patch can be applied to: commit 3cce71b23f6ed221b4335b600e718d79903cc83d Date: Wed Dec 4 20:04:10 2019 +0000 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@278975 138bc75d-0d04-0410-961f-82ee72b054a4 This patch can now successfully compile all benchmarks on the SPEC CPU2017 benchmarking suite. ~12 of the benchmarks have variables that can be eliminated with this transformation. I am pasting the patch at the bottom of the e-mail. I'll be working on adding some of the test cases onto gcc's testing suite. In my previous e-mail I asked some questions. Some of them still remain, others I have answered: > Question #1: Can someone please tell me what would be the best kind of clones > to use in this use case? I think that the only API which makes sense here is create_version_clone_with_body because we want to modify the body of the clones. > Question #2: I found out that clones produced with > create_clone_version_with_body are not themselves versionable. What is the > reason behind this? Still not sure why this is the case. > Question #3: I have added a flag to cgraph_nodes to skip ipa-cp. This flag is > true for the original functions which have been removed from the call graph. > If this flag is removed, then cgraph_nodes removed from the call graph will > trigger an assertion in ipa-cp (ipa-cp.c:1195). I would like to not have to > add this flag to cgraph_node but I am not sure what to do. Still not sure how to remove this graph. Ideally, ipa-cp would not trigger that assertion because it shouldn't try to propagate constants on cgraph_nodes that are not connected to the graph any more. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 6b857bd..fb4ba09 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1370,6 +1370,7 @@ OBJS = \ internal-fn.o \ ipa-cp.o \ ipa-sra.o \ + ipa-initcall-cp.o \ ipa-devirt.o \ ipa-fnsummary.o \ ipa-polymorphic-call.o \ diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 7288440..0d56149 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -288,6 +288,7 @@ symbol_table::create_empty (void) node->type = SYMTAB_FUNCTION; node->frequency = NODE_FREQUENCY_NORMAL; node->count_materialization_scale = REG_BR_PROB_BASE; + node->skip_ipa_cp = false; cgraph_count++; return node; @@ -3548,6 +3549,7 @@ cgraph_node::get_untransformed_body (void) name = lto_get_decl_name_mapping (file_data, name); struct lto_in_decl_state *decl_state = lto_get_function_in_decl_state (file_data, decl); + gcc_assert (decl_state != NULL); cgraph_node *origin = this; while (origin->clone_of) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 9c086fe..806ed56 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -901,6 +901,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node { friend class symbol_table; + bool skip_ipa_cp; + /* Remove the node from cgraph and all inline clones inlined into it. Skip however removal of FORBIDDEN_NODE and return true if it needs to be removed. This allows to call the function from outer loop walking clone diff --git a/gcc/common.opt b/gcc/common.opt index 404b6aa..00c86dc 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -3344,4 +3344,10 @@ fipa-ra Common Report Var(flag_ipa_ra) Optimization Use caller save register across calls if possible. +finitcall-cp +Common Report Var(flag_initcall_cp) TBD. + +finitcall-cp-dryrun +Common Report Var(flag_initcall_cp_dryrun) TBD. + ; This comment is to ensure we retain the blank line above. diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 693c7a2..4169dfb 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -1187,7 +1187,7 @@ initialize_node_lattices (struct cgraph_node *node) gcc_checking_assert (node->has_gimple_body_p ()); - if (!ipa_get_param_count (info)) + if (!ipa_get_param_count (info) || node->skip_ipa_cp) disable = true; else if (node->local) { diff --git a/gcc/ipa-initcall-cp.c b/gcc/ipa-initcall-cp.c new file mode 100644 index 0000000..264bf38 --- /dev/null +++ b/gcc/ipa-initcall-cp.c @@ -0,0 +1,1467 @@ +/* Initcall constant propagation pass. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "tree-eh.h" +#include "gimple.h" +#include "gimple-expr.h" +#include "gimple-iterator.h" +#include "predict.h" +#include "alloc-pool.h" +#include "tree-pass.h" +#include "cgraph.h" +#include "diagnostic.h" +#include "fold-const.h" +#include "gimple-fold.h" +#include "symbol-summary.h" +#include "tree-vrp.h" +#include "ipa-prop.h" +#include "tree-pretty-print.h" +#include "tree-inline.h" +#include "ipa-fnsummary.h" +#include "ipa-utils.h" +#include "tree-ssa-ccp.h" +#include "stringpool.h" +#include "attribs.h" +#include "gimple-pretty-print.h" +#include "ssa.h" + +bool +reach_nodes_via_bb1 (cgraph_node *n2); +static bool +bb1_dominates_bb2 (basic_block, basic_block, cgraph_node *); +static bool +write_before_call (struct ipa_ref *, struct ipa_ref *); +static bool +call_before_read (struct ipa_ref *, struct ipa_ref *); +static hash_map<const char *, unsigned> *clone_num_suffixes1; +static hash_map<cgraph_node *, cgraph_node *> *func_to_clone; +static vec<struct cgraph_node *> +get_nondominated_callees (cgraph_node *caller, cgraph_node *callee, + bool *exitEarly = NULL); + +static bool +comdat_can_be_unshared_p_1 (symtab_node *node) +{ + if (!node->externally_visible) + return true; + if (node->address_can_be_compared_p ()) + { + struct ipa_ref *ref; + + for (unsigned int i = 0; node->iterate_referring (i, ref); i++) + if (ref->address_matters_p ()) + return false; + } + + /* If the symbol is used in some weird way, + * better to not touch it. */ + if (node->force_output) + return false; + + /* Explicit instantiations needs to be output when possibly + used externally. */ + if (node->forced_by_abi && TREE_PUBLIC (node->decl) + && (node->resolution != LDPR_PREVAILING_DEF_IRONLY + && !flag_whole_program)) + return false; + + /* Non-readonly and volatile variables cannot be duplicated. */ + if (is_a<varpool_node *> (node) + && (!TREE_READONLY (node->decl) || TREE_THIS_VOLATILE (node->decl))) + return false; + return true; +} + +/* COMDAT functions must be shared only if they have address taken, + otherwise we can produce our own private implementation with + -fwhole-program. + Return true when turning COMDAT function static cannot lead to wrong + code when the resulting object links with a library defining same + COMDAT. + + Virtual functions do have their addresses taken from the vtables, + but in C++ there is no way to compare their addresses + for equality. */ + +static bool +comdat_can_be_unshared_p (symtab_node *node) +{ + if (!comdat_can_be_unshared_p_1 (node)) + return false; + if (node->same_comdat_group) + { + symtab_node *next; + + /* If more than one function is in the same COMDAT group, it must + be shared even if just one function in the comdat group has + address taken. */ + for (next = node->same_comdat_group; next != node; + next = next->same_comdat_group) + if (!comdat_can_be_unshared_p_1 (next)) + return false; + } + return true; +} + +/* Return true when function NODE should + * be considered externally visible. */ + +static bool +cgraph_externally_visible_p (struct cgraph_node *node, bool whole_program) +{ + while (node->transparent_alias && node->definition) + node = node->get_alias_target (); + if (!node->definition) + return false; + if (!TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl)) + return false; + + /* Do not try to localize built-in functions yet. + One of problems is that we + end up mangling their asm for + WHOPR that makes it impossible to call them + using the implicit built-in declarations + anymore. Similarly this enables + us to remove them as unreachable before + actual calls may appear during + expansion or folding. */ + if (fndecl_built_in_p (node->decl)) + return true; + + /* If linker counts on us, we must preserve the function. */ + if (node->used_from_object_file_p ()) + return true; + if (DECL_PRESERVE_P (node->decl)) + return true; + if (lookup_attribute ("externally_visible", DECL_ATTRIBUTES (node->decl))) + return true; + if (lookup_attribute ("noipa", DECL_ATTRIBUTES (node->decl))) + return true; + if (TARGET_DLLIMPORT_DECL_ATTRIBUTES + && lookup_attribute ("dllexport", DECL_ATTRIBUTES (node->decl))) + return true; + if (node->resolution == LDPR_PREVAILING_DEF_IRONLY) + return false; + /* When doing LTO or whole program, we can bring COMDAT functions + static. + This improves code quality and we know we will duplicate them at + most twice + (in the case that we are not using plugin and link with object + file implementing same COMDAT) */ + if (((in_lto_p || whole_program) && !flag_incremental_link) + && DECL_COMDAT (node->decl) && comdat_can_be_unshared_p (node)) + return false; + + /* When doing link time optimizations, + * hidden symbols become local. */ + if ((in_lto_p && !flag_incremental_link) + && (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN + || DECL_VISIBILITY (node->decl) == VISIBILITY_INTERNAL) + /* Be sure that node is defined in IR file, not in other object + file. In that case we don't set + used_from_other_object_file. */ + && node->definition) + ; + else if (!whole_program) + return true; + + if (MAIN_NAME_P (DECL_NAME (node->decl))) + return true; + + return false; +} + +typedef vec<struct ipa_ref *> ipa_ref_vec; + +// XXX: Move to ipa-ref.h +static const char * +stringify_ipa_ref_use (ipa_ref_use use) +{ + switch (use) + { + case IPA_REF_LOAD: + return "IPA_REF_LOAD"; + break; + case IPA_REF_STORE: + return "IPA_REF_STORE"; + break; + case IPA_REF_ADDR: + return "IPA_REF_ADDR"; + break; + case IPA_REF_ALIAS: + return "IPA_REF_ALIAS"; + break; + default: + return "<unknown>"; + } +} + +static void +load_function_body_of_ipa_ref (cgraph_node *node) +{ + if (in_lto_p) + { + cgraph_node *f_cnode2 = node->ultimate_alias_target (); + if (dump_file) + { + fprintf (dump_file, "%s: for function '%s'\n", __func__, + f_cnode2->dump_asm_name ()); + dump_node (f_cnode2->decl, TDF_DETAILS, dump_file); + } + f_cnode2->get_untransformed_body (); + } +} + +static void +load_function_body_of_ipa_ref (struct ipa_ref *ref) +{ + /* IPA passes don't get the function bodies during LTO. */ + if (in_lto_p) + { + symtab_node *f_node = ref->referring; + cgraph_node *f_cnode = dyn_cast<cgraph_node *> (f_node); + load_function_body_of_ipa_ref (f_cnode); + } +} + +static void +dump_vnode_ipa_ref (ipa_ref *ref) +{ + fprintf (dump_file, "Reference type: %s\n", stringify_ipa_ref_use (ref->use)); + + symtab_node *f_node = ref->referring; + fprintf (dump_file, "Reference function: %s\n", f_node->dump_asm_name ()); + + fprintf (dump_file, "Gimple stmt (@0x%lx, lto_stmt_uid: %u):\n", + (long) ref->stmt, ref->lto_stmt_uid); + load_function_body_of_ipa_ref (ref); + print_gimple_stmt (dump_file, ref->stmt, 0, TDF_NONE); +} + +// XXX: Move to tree.h +static bool +tree_code_is_cst (tree op) +{ + // XXX: use cprop_constant_p () here? + int code = TREE_CODE (op); + if (code == INTEGER_CST || code == REAL_CST || code == COMPLEX_CST + || code == VECTOR_CST) + return true; + return false; +} + +/* Return true of all ops of assignment are constants. */ +static bool +gimple_assign_is_single_const (gimple *stmt) +{ + tree op; + + gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); + + if (gimple_has_volatile_ops (stmt)) + { + if (dump_file) + fprintf (dump_file, "has volatile ops!\n"); + return false; + } + + if (gimple_num_ops (stmt) != 2) + { + if (dump_file) + fprintf (dump_file, "wrong num of ops: %u!\n", gimple_num_ops (stmt)); + return false; + } + + op = gimple_op (stmt, 1); + if (!tree_code_is_cst (op)) + { + if (dump_file) + fprintf (dump_file, "op is not cst!\n"); + return false; + } + + return true; +} + +// XXX: Move to ipa-utils.c +// XXX: from/to could be const +static bool +path_exists_dfs (hash_set<cgraph_node *> *visited, cgraph_node *current, + cgraph_node *target) +{ + if (current == target) + return true; + + visited->add (current); + + cgraph_edge *cs; + for (cs = current->callees; cs; cs = cs->next_callee) + { + cgraph_node *callee = cs->callee->function_symbol (); + if (!visited->contains (callee)) + { + if (path_exists_dfs (visited, callee, target)) + { + return true; + } + } + } + return false; +} + +// XXX: Move to ipa-utils.c +// XXX: from/to could be const +static bool +path_exists (cgraph_node *from, cgraph_node *to) +{ + hash_set<cgraph_node *> visited; + bool found_path = path_exists_dfs (&visited, from, to); + visited.empty (); + + if (dump_file) + { + fprintf (dump_file, "Found "); + if (!found_path) + fprintf (dump_file, "*no* "); + fprintf (dump_file, "path from %s to %s.\n", from->name (), to->name ()); + } + return found_path; +} + +static vec<struct cgraph_node *> +get_nondominated_callees (cgraph_node *caller, cgraph_node *callee, + bool *exitEarly) +{ + // assumptions: + // * there is a callsite from caller to callee + + if (in_lto_p) + caller->get_untransformed_body (); + + function *func = DECL_STRUCT_FUNCTION (caller->decl); + basic_block bb; + bool found = false; + FOR_EACH_BB_FN (bb, func) + { + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + // The question here is, what happens if we don't have + // a tree that is of the type + // var = call() + // and instead we have a tree of the type + // call() + // are these trees even possible? + // I would prefer if they are all the same... + if (dump_file) + print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); + tree rhs = gimple_op (stmt, 1); + if (TREE_CODE (rhs) == RECORD_TYPE) + { + gcc_assert (exitEarly); + *exitEarly = true; + return vNULL; + } + tree callee_decl = gimple_call_fndecl (stmt); + if (!callee_decl) + { + gcc_assert (exitEarly); + *exitEarly = true; + return vNULL; + } + cgraph_node *callee_node = cgraph_node::get (callee_decl); + if (callee_node != callee) + continue; + + found = true; + } + if (found) + break; + } + + gcc_assert (found); + + // At this point in the program, we hold a valid bb. + // The callee is located + vec<struct cgraph_node *> callees = vNULL; + basic_block bb_c; + FOR_EACH_BB_FN (bb_c, func) + { + bool self_dominates = bb == bb_c; + bool bb_dominates_bbc = bb1_dominates_bb2 (bb, bb_c, caller); + if (bb_dominates_bbc && !self_dominates) + continue; + + for (gimple_stmt_iterator gsi = gsi_start_bb (bb_c); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + tree callee_decl = gimple_call_fndecl (stmt); + cgraph_node *callee_node = cgraph_node::get (callee_decl); + + if (fndecl_built_in_p (callee_node->decl)) + continue; + + if (self_dominates && callee_node == callee) + { + break; + } + + callees.safe_push (callee_node); + } + } + return callees; +} + +// XXX: Move to class symtab_node in cgraph.h +static ipa_ref * +symtab_node_iterate_address_uses (symtab_node *n, unsigned i, ipa_ref *&ref) +{ + ipa_ref *retval = NULL; + for (int i = 0; n->iterate_referring (i, retval); i++) + if (ref->use == IPA_REF_ADDR) + return ref; + return NULL; +} + +// XXX: Move to class symtab_node in cgraph.h +static bool +symtab_node_address_matters_p (symtab_node *n) +{ + struct ipa_ref *ref; + + if (!n->address_can_be_compared_p ()) + return false; + if (n->externally_visible || n->force_output) + return true; + + for (unsigned int i = 0; n->iterate_referring (i, ref); i++) + if (ref->address_matters_p ()) + return true; + return false; +} + +static bool +bb1_dominates_bb2 (basic_block bb1, basic_block bb2, cgraph_node *cnode) +{ + // self dominance + if (bb1 == bb2) + return true; + + push_cfun (DECL_STRUCT_FUNCTION (cnode->decl)); + + /* If dominator info is not available, we need to calculate it. */ + if (!dom_info_available_p (CDI_DOMINATORS)) + calculate_dominance_info (CDI_DOMINATORS); + + /* Check if the read is dominated by the write. */ + bool ret = dominated_by_p (CDI_DOMINATORS, bb2, bb1); + + /* Restore cfun. */ + pop_cfun (); + + return ret; +} + +static bool +write_before_read_in_function (struct ipa_ref *write, struct ipa_ref *read) +{ + basic_block w_bb = gimple_bb (write->stmt); + basic_block r_bb = gimple_bb (read->stmt); + + if (w_bb != r_bb) + { + /* + * The dominator framework operates on cfun. + * Therefore we need to set cfun accordingly. + */ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node); + return bb1_dominates_bb2 (w_bb, r_bb, w_cnode); + } + + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (w_bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + if (gsi_stmt (gsi) == write->stmt) + return true; + if (gsi_stmt (gsi) == read->stmt) + return false; + } + + /* Neither write nor read found it BB. */ + gcc_assert (1); + + return false; +} + +/* + * DFS over callees and return if callee is a or b. + */ +static cgraph_node * +find_cgraph_in_callee_set (cgraph_node *n, hash_set<cgraph_node *> set, + cgraph_node *a, cgraph_node *b) +{ + cgraph_edge *cs; + for (cs = n->callees; cs; cs = cs->next_callee) + { + cgraph_node *callee = cs->callee->function_symbol (); + if (dump_file) + fprintf (dump_file, "Child of %s: %s\n", n->dump_asm_name (), + callee->dump_asm_name ()); + if (callee == a) + return a; + if (callee == b) + return b; + if (!set.contains (callee)) + continue; + return find_cgraph_in_callee_set (callee, set, a, b); + } + return NULL; +} + +/* Walks back along caller relations until main is found. */ +static cgraph_node * +get_ancestor_main_dfs (hash_set<cgraph_node *> *visited, + vec<cgraph_node *> *path, cgraph_node *node) +{ + if (MAIN_NAME_P (DECL_NAME (node->decl))) + { + path->safe_push (node); + return node; + } + + visited->add (node); + + cgraph_edge *cs; + for (cs = node->callers; cs; cs = cs->next_caller) + { + cgraph_node *caller = cs->caller->function_symbol (); + if (!visited->contains (caller)) + { + cgraph_node *main = get_ancestor_main_dfs (visited, path, caller); + if (main) + { + path->safe_push (node); + return main; + } + } + } + return NULL; +} + +static cgraph_node * +get_path_from_main_to (cgraph_node *node, vec<cgraph_node *> *path) +{ + hash_set<cgraph_node *> visited; + cgraph_node *main = get_ancestor_main_dfs (&visited, path, node); + visited.empty (); + return main; +} + +/* + * Verifying that a stmt s1 is dominated by a stmt s2 + * across function borders is not trivial with the available + * infrastructure (dominator algorithm on bb level plus cgraph). + * If we rule out external calls/callbacks, then we still need + * to guarantee a write on each possible path to the read. + * + * The implemented solution to this problem, which is of course + * too strict, + * but also not too compute/memory intensive is as follows: + * + * - Check if write is reachable by main () by only looking into + * the first bb in each function on the path. + * - All call stmts between main () and write must not possibly + * reach a read. We consider indirect call statements as + * possible reaching read (unless we can prove opposite). + * + * The external calls/callbacks are ruled out as follows: + * + * - all possible ancestors of read must not be external visible + * - all possible ancestors of read must not be function pointers + * - Something else? + */ +static bool +write_before_read_across_function_borders (struct ipa_ref *write, + struct ipa_ref *read) +{ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node); + symtab_node *r_node = read->referring; + cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node); + cgraph_node *main; + + /* Get main () function. */ + vec<cgraph_node *> pw = vNULL; + main = get_path_from_main_to (w_cnode, &pw); + if (!main) + { + if (dump_file) + fprintf (dump_file, "main () is not ancestor of write -> skipping.\n"); + return false; + } + + vec<cgraph_node *> pr = vNULL; + cgraph_node *main_to_read = get_path_from_main_to (r_cnode, &pr); + if (!main_to_read) + { + if (dump_file) + fprintf (dump_file, "main () is not ancestor of read -> skipping.\n"); + return false; + } + + // TODO: is write guaranteed to happen? + // if it is not guaranteed to happen, we shouldn't modify anything! + // The very first approximation. There should be a path from main to w_cnode + // such that only the first basic block is actually inspected. + // Future work will involve looking at unconditional paths. + // I think right now, all paths are reachable from first basic block + if (!reach_nodes_via_bb1 (w_cnode)) + return false; + + int i; + cgraph_node *node_in_pr; + FOR_EACH_VEC_ELT (pr, i, node_in_pr) + { + // I think main has to be externally visible. + if (node_in_pr == main) + continue; + + /* Assure that all paths to read are not externally visible. */ + if (cgraph_externally_visible_p (node_in_pr, flag_whole_program)) + { + if (dump_file) + fprintf (dump_file, "%s is externally visible... skipping\n", + node_in_pr->dump_asm_name ()); + return false; + } + + /* Assure that all paths to read are not + * used as function pointers. */ + if (node_in_pr->address_taken) + { + if (dump_file) + fprintf (dump_file, "%s might be function pointer... skipping\n", + node_in_pr->dump_asm_name ()); + return false; + } + } + + cgraph_node *caller = main; + cgraph_node *node_in_pw; + FOR_EACH_VEC_ELT (pw, i, node_in_pw) + { + gcc_assert (w_cnode != r_cnode); + if (node_in_pw == r_cnode && path_exists (r_cnode, w_cnode)) + { + return call_before_read (write, read); + } + + if (node_in_pw == w_cnode && path_exists (w_cnode, r_cnode)) + { + return write_before_call (write, read); + } + + if (node_in_pw == main) + { + continue; + } + + if (dump_file) + fprintf (dump_file, "get_nondominated_callees from %s to %s\n", + caller->dump_asm_name (), node_in_pw->dump_asm_name ()); + + bool exitEarly = false; + vec<cgraph_node *> non_dominated_callees + = get_nondominated_callees (caller, node_in_pw, &exitEarly); + if (exitEarly) + return false; + cgraph_node *non_dominated_callee; + int j; + FOR_EACH_VEC_ELT (non_dominated_callees, j, non_dominated_callee) + { + if (dump_file) + fprintf (dump_file, "callee %d %s\n", j, + non_dominated_callee->dump_asm_name ()); + if (path_exists (non_dominated_callee, r_cnode)) + { + return false; + } + } + + caller = node_in_pw; + } + return true; +} + +static vec<struct cgraph_node *> +get_bb1_callees (cgraph_node *c, cgraph_node *w_cnode) +{ + vec<struct cgraph_node *> callees = vNULL; + if (fndecl_built_in_p (c->decl)) + return vNULL; + + if (dump_file) + fprintf (dump_file, "before get_untransformed_body %s\n", + c->dump_asm_name ()); + + if (!c->definition) + return vNULL; + + push_cfun (DECL_STRUCT_FUNCTION (c->decl)); + + if (in_lto_p) + c->get_untransformed_body (); + + pop_cfun (); + + function *func = DECL_STRUCT_FUNCTION (c->decl); + /* Get first BB (after the fake entry BB). */ + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (func)->next_bb; + + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + tree callee_decl = gimple_call_fndecl (stmt); + cgraph_node *callee_node = cgraph_node::get (callee_decl); + if (!path_exists (callee_node, w_cnode)) + continue; + + callees.safe_push (callee_node); + } + + return callees; +} + +static bool +reach_nodes_via_bb1_dfs (hash_set<cgraph_node *> *visited, cgraph_node *n1, + cgraph_node *n2) +{ + if (n1 == n2) + return true; + + visited->add (n1); + + vec<struct cgraph_node *> callees = get_bb1_callees (n1, n2); + + int i; + cgraph_node *callee; + FOR_EACH_VEC_ELT (callees, i, callee) + { + if (!visited->contains (callee)) + { + bool found = reach_nodes_via_bb1_dfs (visited, callee, n2); + if (found) + { + callees.release (); + return true; + } + } + } + + return false; +} + +bool +reach_nodes_via_bb1 (cgraph_node *n2) +{ + vec<cgraph_node *> pr = vNULL; + cgraph_node *main = get_path_from_main_to (n2, &pr); + hash_set<cgraph_node *> visited; + bool path_exists = reach_nodes_via_bb1_dfs (&visited, main, n2); + visited.empty (); + pr.release (); + return path_exists; +} + +static bool +write_before_call (struct ipa_ref *write, struct ipa_ref *read) +{ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node); + symtab_node *r_node = read->referring; + cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node); + + gcc_assert (path_exists (w_cnode, r_cnode)); + gcc_assert (w_cnode != r_cnode); + + basic_block w_bb = gimple_bb (write->stmt); + basic_block r_bb = gimple_bb (read->stmt); + + if (in_lto_p) + { + w_cnode->get_untransformed_body (); + } + + function *func = DECL_STRUCT_FUNCTION (w_cnode->decl); + basic_block c_bb; + vec<struct cgraph_node *> callees = vNULL; + FOR_EACH_BB_FN (c_bb, func) + { + bool self_dominates = w_bb == c_bb; + bool w_bb_dominates_c_bb = bb1_dominates_bb2 (w_bb, c_bb, w_cnode); + if (w_bb_dominates_c_bb && !self_dominates) + continue; + + for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (stmt == write->stmt) + { + break; + } + + if (gimple_code (stmt) != GIMPLE_CALL) + { + continue; + } + if (dump_file) + print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); + tree rhs = gimple_op (stmt, 1); + + if (rhs && TREE_CODE (rhs) == POINTER_TYPE) + return false; + + tree callee_decl = gimple_call_fndecl (stmt); + if (!callee_decl) + return false; + + cgraph_node *callee_node = cgraph_node::get (callee_decl); + const char *identifier + = IDENTIFIER_POINTER (DECL_NAME (callee_node->decl)); + const char *sigsetjmp = "sigsetjmp"; + if (strstr (identifier, sigsetjmp) != NULL) + return false; + + if (fndecl_built_in_p (callee_node->decl)) + continue; + + if (dump_file) + fprintf (dump_file, "found callee %s\n", + callee_node->dump_asm_name ()); + callees.safe_push (callee_node); + } + } + cgraph_node *callee; + int i; + FOR_EACH_VEC_ELT (callees, i, callee) + { + if (path_exists (callee, r_cnode)) + { + return false; + } + } + + return true; +} + +static bool +call_before_read (struct ipa_ref *write, struct ipa_ref *read) +{ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node); + symtab_node *r_node = read->referring; + cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node); + + gcc_assert (path_exists (r_cnode, w_cnode)); + gcc_assert (w_cnode != r_cnode); + + basic_block w_bb = gimple_bb (write->stmt); + basic_block r_bb = gimple_bb (read->stmt); + + if (in_lto_p) + r_cnode->get_untransformed_body (); + + function *func = DECL_STRUCT_FUNCTION (r_cnode->decl); + basic_block c_bb; + FOR_EACH_BB_FN (c_bb, func) + { + bool self_dominates = c_bb == r_bb; + bool call_dominates_read = bb1_dominates_bb2 (c_bb, r_bb, r_cnode); + if (!call_dominates_read && !self_dominates) + continue; + + for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + // self dominance + if (stmt == read->stmt) + break; + + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + if (dump_file) + print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); + tree rhs = gimple_op (stmt, 1); + if (TREE_CODE (rhs) == POINTER_TYPE) + return false; + tree callee_decl = gimple_call_fndecl (stmt); + + cgraph_node *callee_node = cgraph_node::get (callee_decl); + const char *identifier + = IDENTIFIER_POINTER (DECL_NAME (callee_node->decl)); + const char *sigsetjmp = "sigsetjmp"; + if (strstr (identifier, sigsetjmp) != NULL) + return false; + + if (path_exists (callee_node, w_cnode)) + return true; + } + } + return false; +} + +static bool +write_before_read (struct ipa_ref *write, struct ipa_ref *read) +{ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node); + symtab_node *r_node = read->referring; + cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node); + + if (w_cnode == r_cnode) + /* Within the same function. */ + return write_before_read_in_function (write, read); + + /* Not within the same function. */ + return write_before_read_across_function_borders (write, read); +} + +static bool +write_before_all_reads (struct ipa_ref *write, const ipa_ref_vec &reads) +{ + int i; + struct ipa_ref *read; + + FOR_EACH_VEC_ELT (reads, i, read) + { + load_function_body_of_ipa_ref (read); + if (!write_before_read (write, read)) + return false; + } + return true; +} + +static void +propagate_constant_to_read (tree write_val, struct ipa_ref *ref, + hash_set<cgraph_node *> *func) +{ + gcc_assert (ref->use == IPA_REF_LOAD); + load_function_body_of_ipa_ref (ref); + + gimple *read_stmt = ref->stmt; + + gcc_assert (gimple_code (read_stmt) == GIMPLE_ASSIGN); + gcc_assert (gimple_num_ops (read_stmt) == 2); + + tree old_lhs = gimple_op (read_stmt, 0); + symtab_node *r_node = ref->referring; + cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node); + + // if (!func->contains (r_cnode)) func->add (r_cnode); + cgraph_node **possible_clone = func_to_clone->get (r_cnode); + if (!possible_clone) + { + const char *suffix = "test"; + // callers has to be vNULL, otherwise, we will be + // analyzing clones... + // and we don't want that... + // but that means that we will need to update the callers + // later... in update_callgraph + cgraph_node *clone + = r_cnode->create_version_clone_with_body (vNULL, NULL, NULL, NULL, + NULL, suffix, NULL); + clone->get_untransformed_body (); + func_to_clone->put (r_cnode, clone); + } + + possible_clone = func_to_clone->get (r_cnode); + cgraph_node *clone = *possible_clone; + + // Build new stmt and replace old. + gimple_stmt_iterator gsi; + basic_block bb; + // Let's create a clone with body here... + // The clone should not have callers as + // to not interfere with the current + // analysis. + // The callers will be set at the end... + + push_cfun (DECL_STRUCT_FUNCTION (clone->decl)); + function *clone_func = DECL_STRUCT_FUNCTION (clone->decl); + bool found = false; + FOR_EACH_BB_FN (bb, clone_func) + { + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + continue; + tree old_val_clone = gimple_op (stmt, 1); + tree lhs = gimple_op (stmt, 0); + + // TODO: what happens when it is not a variable decl? + // This might actually be the source of imprecision + if (TREE_CODE (old_val_clone) != VAR_DECL) + continue; + + tree old_val = gimple_op (read_stmt, 1); + if (IDENTIFIER_POINTER (DECL_NAME (old_val_clone)) + != IDENTIFIER_POINTER (DECL_NAME (old_val))) + continue; + + found = true; + + // REPLACE!!! + gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); + tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); + gimple *new_read_stmt = gimple_build_assign (new_lhs, write_val); + gsi_insert_before (&gsi2, new_read_stmt, GSI_SAME_STMT); + + gimple *use_stmt; + imm_use_iterator use_iter; + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) + { + use_operand_p use_p; + FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) + SET_USE (use_p, new_lhs); + update_stmt (use_stmt); + } + } + + if (found) + break; + } + gcc_assert (found); + pop_cfun (); +} + +static bool +ipa_initcall_get_writes_and_reads (varpool_node *vnode, ipa_ref_vec *writes, + ipa_ref_vec *reads) +{ + int i; + struct ipa_ref *ref; + + if (dump_file) + fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ()); + + /* Only IPA_REF_STORE and IPA_REF_LOAD left. */ + for (i = 0; vnode->iterate_referring (i, ref); i++) + { + symtab_node *f_node = ref->referring; + cgraph_node *f_cnode = dyn_cast<cgraph_node *> (f_node); + + if (!f_cnode) + { + if (dump_file) + fprintf (dump_file, + "skipping variable %s due to static initialization\n", + vnode->name ()); + return false; + } + // it is possible that f_cnode is NULL if the dyn_cast fails. + // If the dyn_cast fails, this is an example of static initialization. + const char *identifier = IDENTIFIER_POINTER (DECL_NAME (f_cnode->decl)); + // This is the suffix we are adding to our clones. + // Therefore, if we find the suffix in the identifier, + // that means that we found a variable in a clone and + // we would like to skip it. + // TODO: make this a global static for easier changes... + const char *suffix = "test.0"; + if (strstr (identifier, suffix) != NULL) + continue; + + if (ref->use == IPA_REF_STORE) + writes->safe_push (ref); + + if (ref->use == IPA_REF_LOAD) + reads->safe_push (ref); + } + return true; +} + +static void +propagate_constant_to_reads (tree write_val, const ipa_ref_vec &reads_original, + hash_set<cgraph_node *> *funcs) +{ + /* Iterate over reads and replace them by constant. */ + struct ipa_ref *ref; + int i; + FOR_EACH_VEC_ELT (reads_original, i, ref) + { + propagate_constant_to_read (write_val, ref, funcs); + } +} + +/* + * Extracts the assigned constant, iff the given statement + * is a constant assignment. Returns NULL_TREE otherwise. + */ +static tree +extract_constant_from_initcall_write (struct ipa_ref *write) +{ + symtab_node *f_node = write->referring; + cgraph_node *f_cnode = dyn_cast<cgraph_node *> (f_node); + + tree decl = f_cnode->decl; + if (TREE_CODE (decl) != FUNCTION_DECL) + { + if (dump_file) + fprintf (dump_file, "Not function decl -> skipping.\n"); + return NULL_TREE; + } + + if (!f_cnode->has_gimple_body_p () && dump_file) + fprintf (dump_file, "Does NOT have gimple body!\n"); + if (f_cnode->inlined_to && dump_file) + fprintf (dump_file, "Inlined To\n"); + + if (dump_file) + { + fprintf (dump_file, "%s: for writer gimple:\n", __func__); + dump_vnode_ipa_ref (write); + } + + /* Get the write function's body. */ + load_function_body_of_ipa_ref (write); + + gimple *stmt = write->stmt; + + /* Verify that we have an assignment. */ + if (gimple_code (stmt) != GIMPLE_ASSIGN) + { + if (dump_file) + fprintf (dump_file, "Write stmt not assignment -> skipping.\n"); + return NULL_TREE; + } + + /* Check if write's LHS (vnode) is not volatile. */ + tree lhs = gimple_assign_lhs (stmt); + if (TREE_THIS_VOLATILE (TREE_TYPE (lhs))) + { + if (dump_file) + fprintf (dump_file, "Variable volatile -> skipping.\n"); + return NULL_TREE; + } + + /* Check if RHS of write stmt is constant. */ + if (!gimple_assign_is_single_const (stmt)) + { + if (dump_file) + { + fprintf (dump_file, "Found non-const operands.\n"); + } + return NULL_TREE; + } + + /* Extract the constant. */ + tree write_val = gimple_op (stmt, 1); + + if (dump_file) + { + fprintf (dump_file, "Const op:\n"); + dump_node (write_val, TDF_DETAILS, dump_file); + } + + return write_val; +} + +static void +ipa_initcall_cp_execute_for_var (varpool_node *vnode, + hash_set<cgraph_node *> *update_functions) +{ + ipa_ref_vec writes = vNULL; + ipa_ref_vec reads = vNULL; + struct ipa_ref *write; + tree write_val; + gimple_stmt_iterator gsi; + bool remove_permanently; + + if (dump_file) + fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ()); + + if (dump_file) + { + dump_node (vnode->decl, TDF_DETAILS, dump_file); + } + + /* Variable must not be externally visible. */ + if (vnode->externally_visible_p ()) + { + if (dump_file) + fprintf (dump_file, "\texternally visible " + "-> skipping\n"); + return; + } + + /* All refs must be explicit. */ + if (!vnode->all_refs_explicit_p ()) + { + if (dump_file) + fprintf (dump_file, "Not explicit variable refs " + "-> skipping.\n"); + return; + } + + /* Check if any ref->use is IPA_REF_ALIAS. */ + if (vnode->has_aliases_p ()) + { + if (dump_file) + fprintf (dump_file, "Found IPA_REF_ALIAS -> skipping.\n"); + return; + } + + /* Check if any ref->use is IPA_REF_ADDR. */ + if (symtab_node_address_matters_p (vnode)) + { + if (dump_file) + fprintf (dump_file, "Found IPA_REF_ADDR -> skipping.\n"); + return; + } + + /* We don't touch arrays. */ + if (TREE_CODE (TREE_TYPE (vnode->decl)) == ARRAY_TYPE) + { + if (dump_file) + fprintf (dump_file, "Variable is array -> skipping.\n"); + return; + } + + /* We don't touch structs. */ + if (TREE_CODE (TREE_TYPE (vnode->decl)) == RECORD_TYPE) + { + if (dump_file) + fprintf (dump_file, "Variable is struct -> skipping.\n"); + return; + } + + /* We don't touch unions. */ + if (TREE_CODE (TREE_TYPE (vnode->decl)) == UNION_TYPE) + { + if (dump_file) + fprintf (dump_file, "Variable is union -> skipping.\n"); + return; + } + + /* Get all refs (must be REF_STORE or REF_LOAD). */ + bool continue_work + = ipa_initcall_get_writes_and_reads (vnode, &writes, &reads); + if (!continue_work) + { + if (dump_file) + fprintf (dump_file, "Static initialization -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + + if (writes.length () > 1) + { + /* More than one writer. */ + if (dump_file) + fprintf (dump_file, "More than one writer -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + else if (writes.length () < 1) + { + /* No writer. */ + if (dump_file) + fprintf (dump_file, "No writer -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + + /* + * Note: + * Limiting ourselfs to only one write is not necessary in general, + * but good enough to address the typical init () case. + * Big advantage is, that it makes the following code much easier. + * + * TODO: I believe that if we create clones, we might get two writes + * Investigate + */ + + /* Get (only) write ref. */ + write = writes.pop (); + + /* Check if write's RHS is a constant and get it. */ + write_val = extract_constant_from_initcall_write (write); + if (write_val == NULL_TREE) + { + if (dump_file) + fprintf (dump_file, "Write's RHS is not constant" + " -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + + /* Assure all reads are after the write. */ + if (!write_before_all_reads (write, reads)) + { + if (dump_file) + fprintf (dump_file, "Write not guaranteed " + "to be before read -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + + /* Propagate constant to reads. */ + if (!flag_initcall_cp_dryrun) + propagate_constant_to_reads (write_val, reads, update_functions); + + /* Finally remove the write. */ + gsi = gsi_for_stmt (write->stmt); + remove_permanently = false; // XXX: fails with true? + // gsi_remove (&gsi, remove_permanently); + + if (dump_file) + fprintf (dump_file, "Eliminated variable '%s'.\n\n", vnode->name ()); + + writes.release (); + reads.release (); +} + +bool +update_callgraph (cgraph_node *const &r_cnode, cgraph_node **clone_ptr, void *) +{ + vec<cgraph_edge *> callers = r_cnode->collect_callers (); + cgraph_node *clone = *clone_ptr; + cgraph_edge *e; + int i; + profile_count count = r_cnode->count; + + FOR_EACH_VEC_ELT (callers, i, e) + e->redirect_callee (clone); + + /* + for (e = r_cnode->callees; e; e=e->next_callee) + e->clone (clone, e->call_stmt, e->lto_stmt_uid, count, count, true); + + for (e = r_cnode->indirect_calls; e; e=e->next_callee) + e->clone (clone, e->call_stmt, e->lto_stmt_uid, count, count, true); + */ + for (e = clone->callers; e; e = e->next_caller) + { + e->caller->get_untransformed_body (); + function *inner_function = DECL_STRUCT_FUNCTION (e->caller->decl); + gimple_call_set_fndecl (e->call_stmt, clone->decl); + maybe_clean_eh_stmt_fn (inner_function, e->call_stmt); + } + + r_cnode->skip_ipa_cp = true; + // r_cnode->remove (); + push_cfun (DECL_STRUCT_FUNCTION (r_cnode->decl)); + + if (dump_file) + fprintf (dump_file, "dumping function %s\n", r_cnode->dump_asm_name ()); + function *func = DECL_STRUCT_FUNCTION (r_cnode->decl); + basic_block bb; + FOR_EACH_BB_FN (bb, func) + { + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (dump_file) + print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); + } + } + if (dom_info_available_p (CDI_DOMINATORS)) + free_dominance_info (CDI_DOMINATORS); + pop_cfun (); + return true; +} + +static unsigned int +ipa_initcall_cp_execute (void) +{ + varpool_node *vnode; + + clone_num_suffixes1 = new hash_map<const char *, unsigned>; + hash_set<cgraph_node *> update_functions; + func_to_clone = new hash_map<cgraph_node *, cgraph_node *>; + FOR_EACH_VARIABLE (vnode) + { + ipa_initcall_cp_execute_for_var (vnode, &update_functions); + } + + if (!flag_initcall_cp_dryrun) + func_to_clone->traverse<void *, update_callgraph> (NULL); + + delete clone_num_suffixes1; + delete func_to_clone; + + return 0; +} + +namespace { + +const pass_data pass_data_ipa_initcall_cp = { + IPA_PASS, + "initcall_cp", + OPTGROUP_NONE, + TV_NONE, + (PROP_cfg | PROP_ssa), + 0, + 0, + 0, + (TODO_update_ssa | TODO_cleanup_cfg | TODO_dump_symtab + | TODO_rebuild_cgraph_edges | TODO_discard_function), +}; + +class pass_ipa_initcall_cp : public ipa_opt_pass_d +{ +public: + pass_ipa_initcall_cp (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_initcall_cp, ctxt, NULL, NULL, NULL, NULL, + NULL, NULL, 0, NULL, NULL) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_initcall_cp || flag_initcall_cp_dryrun; + } + + virtual unsigned int execute (function *) + { + return ipa_initcall_cp_execute (); + } + +}; // class pass_ipa_initcall_cp + +} // namespace + +ipa_opt_pass_d * +make_pass_ipa_initcall_cp (gcc::context *ctxt) +{ + return new pass_ipa_initcall_cp (ctxt); +} diff --git a/gcc/passes.def b/gcc/passes.def index 798a391..350a4fc 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -146,6 +146,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_profile); NEXT_PASS (pass_ipa_icf); NEXT_PASS (pass_ipa_devirt); + NEXT_PASS (pass_ipa_initcall_cp); NEXT_PASS (pass_ipa_cp); NEXT_PASS (pass_ipa_sra); NEXT_PASS (pass_ipa_cdtor_merge); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index a987661..6de60fe 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -498,6 +498,7 @@ extern ipa_opt_pass_d *make_pass_ipa_fn_summary (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context *ctxt); +extern ipa_opt_pass_d *make_pass_ipa_initcall_cp (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_sra (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt); On 2019-11-29 4:21 p.m., Erick Ochoa wrote: > > Hello, > > my name is Erick and I am working in a link-time-optimization pass named > ipa-initcall-cp. It is called ipa-initcall-cp because it propagates constant > values written to variables with static lifetimes (such as ones initialized in > initialization functions). ipa-initcall-cp has to be located after > ipa-whole-program-visibility to determine which functions have external > visibility and before ipa-cp because ipa-cp can take advantage of our > transformation. > > I have attached a patch and test cases. I have applied and tested the patch > against: > > commit 17a2c588c29f089d3c2a36df47175bbdf376e399 > Date: Wed Nov 27 00:23:39 2019 +0000 > git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@278752 > 138bc75d-0d04-0410-961f-82ee72b054a4 > > In order to run the test cases: > * apply patch to gcc > * modify Makefile to point to modified gcc > * make > > The test cases test the order in which a write can happen regarding a read > (before or after) and whether the write/read happen in a function being called > by the function that reads/writes. The test cases succeed when the variables > prefixed with SUCCEED_* have been propagated and when the variables prefixed > with FAILED_* have NOT been propagated. To quickly determine which variables > have been propagated, you can do the following command: > > grep 'Eliminated' test.wpa.071i.initcall_cp > # SUCCESS_{0,1,2,3}* should be eliminated > > This compiler pass is able to compile non-trivial code, but it does not > compile > arbitrary valid code (sometimes linking fails). We are using > create_clone_version_with_body for creating the clones. The reasoning behind > this is that we want clones to have a body so that we can modify it. I have > been told that `create_version_clone_with_body` are used mainly in IPA passes > that are not part of INSERT_PASSES_AFTER (all_regular_ipa_passes). > > Question #1: Can someone please tell me what would be the best kind of clones > to use in this use case? > Question #2: I found out that clones produced with > create_clone_version_with_body are not themselves versionable. What is the > reason behind this? > Question #3: I have added a flag to cgraph_nodes to skip ipa-cp. This flag is > true for the original functions which have been removed from the call graph. > If this flag is removed, then cgraph_nodes removed from the call graph will > trigger an assertion in ipa-cp (ipa-cp.c:1195). I would like to not have to > add this flag to cgraph_node but I am not sure what to do. > > Please note: > * The patch is also included in the attachments file (because I've been having > issues with how e-mail clients format my content. Note, that this is a text > only html, but the patch might have rows longer than 80 characters (due to the > + sign) > and my e-mail client is splitting lines. > * The patch still does not meet the coding standards, I'll be working on that. > * The patch is included here (and the attachments) and the tests are included > as a attachments. I'll be working on integrating the tests as part of the > testing suite > > > Thanks! > > diff --git a/gcc/Makefile.in b/gcc/Makefile.in > index 7d3c132..ad8d998 100644 > --- a/gcc/Makefile.in > +++ b/gcc/Makefile.in > @@ -1370,6 +1370,7 @@ OBJS = \ > internal-fn.o \ > ipa-cp.o \ > ipa-sra.o \ > + ipa-initcall-cp.o \ > ipa-devirt.o \ > ipa-fnsummary.o \ > ipa-polymorphic-call.o \ > diff --git a/gcc/cgraph.c b/gcc/cgraph.c > index 1f7a5c5..bc51681 100644 > --- a/gcc/cgraph.c > +++ b/gcc/cgraph.c > @@ -287,6 +287,7 @@ symbol_table::create_empty (void) > node->type = SYMTAB_FUNCTION; > node->frequency = NODE_FREQUENCY_NORMAL; > node->count_materialization_scale = REG_BR_PROB_BASE; > + node->skip_ipa_cp = false; > cgraph_count++; > > return node; > @@ -3515,6 +3516,7 @@ cgraph_node::get_untransformed_body (void) > name = lto_get_decl_name_mapping (file_data, name); > struct lto_in_decl_state *decl_state > = lto_get_function_in_decl_state (file_data, decl); > + gcc_assert (decl_state != NULL); > > cgraph_node *origin = this; > while (origin->clone_of) > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > index 0d2442c..82725ed 100644 > --- a/gcc/cgraph.h > +++ b/gcc/cgraph.h > @@ -899,6 +899,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : > public symtab_node > { > friend class symbol_table; > > + bool skip_ipa_cp; > + > /* Remove the node from cgraph and all inline clones inlined into it. > Skip however removal of FORBIDDEN_NODE and return true if it needs to be > removed. This allows to call the function from outer loop walking clone > diff --git a/gcc/common.opt b/gcc/common.opt > index 404b6aa..fb662ed 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -3344,4 +3344,7 @@ fipa-ra > Common Report Var(flag_ipa_ra) Optimization > Use caller save register across calls if possible. > > +finitcall-cp > +Common Report Var(flag_initcall_cp) TBD. > + > ; This comment is to ensure we retain the blank line above. > diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c > index 31a98a3..72d1595 100644 > --- a/gcc/ipa-cp.c > +++ b/gcc/ipa-cp.c > @@ -1185,7 +1185,7 @@ initialize_node_lattices (struct cgraph_node *node) > > gcc_checking_assert (node->has_gimple_body_p ()); > > - if (!ipa_get_param_count (info)) > + if (!ipa_get_param_count (info) || node->skip_ipa_cp) > disable = true; > else if (node->local) > { > diff --git a/gcc/ipa-initcall-cp.c b/gcc/ipa-initcall-cp.c > new file mode 100644 > index 0000000..8f1c4ab > --- /dev/null > +++ b/gcc/ipa-initcall-cp.c > @@ -0,0 +1,1289 @@ > +/* Initcall constant propagation pass. */ > + > +#include "config.h" > +#include "system.h" > +#include "coretypes.h" > +#include "backend.h" > +#include "tree.h" > +#include "tree-eh.h" > +#include "gimple.h" > +#include "gimple-expr.h" > +#include "gimple-iterator.h" > +#include "predict.h" > +#include "alloc-pool.h" > +#include "tree-pass.h" > +#include "cgraph.h" > +#include "diagnostic.h" > +#include "fold-const.h" > +#include "gimple-fold.h" > +#include "symbol-summary.h" > +#include "tree-vrp.h" > +#include "ipa-prop.h" > +#include "tree-pretty-print.h" > +#include "tree-inline.h" > +#include "ipa-fnsummary.h" > +#include "ipa-utils.h" > +#include "tree-ssa-ccp.h" > +#include "stringpool.h" > +#include "attribs.h" > +#include "gimple-pretty-print.h" > +#include "ssa.h" > + > +static bool bb1_dominates_bb2 (basic_block, basic_block, cgraph_node*); > +static bool write_before_call (struct ipa_ref *, struct ipa_ref *); > +static bool call_before_read(struct ipa_ref *, struct ipa_ref *); > +static hash_map<const char *, unsigned> *clone_num_suffixes1; > +static hash_map<cgraph_node*, cgraph_node*> *func_to_clone; > + > +static bool > +comdat_can_be_unshared_p_1 (symtab_node *node) > +{ > + if (!node->externally_visible) > + return true; > + if (node->address_can_be_compared_p ()) > + { > + struct ipa_ref *ref; > + > + for (unsigned int i = 0; node->iterate_referring (i, ref); i++) > + if (ref->address_matters_p ()) > + return false; > + } > + > + /* If the symbol is used in some weird way, > + * better to not touch it. */ > + if (node->force_output) > + return false; > + > + /* Explicit instantiations needs to be output when possibly > + used externally. */ > + if (node->forced_by_abi > + && TREE_PUBLIC (node->decl) > + && (node->resolution != LDPR_PREVAILING_DEF_IRONLY > + && !flag_whole_program)) > + return false; > + > + /* Non-readonly and volatile variables cannot be duplicated. */ > + if (is_a <varpool_node *> (node) > + && (!TREE_READONLY (node->decl) > + || TREE_THIS_VOLATILE (node->decl))) > + return false; > + return true; > +} > + > +/* COMDAT functions must be shared only if they have address taken, > + otherwise we can produce our own private implementation with > + -fwhole-program. > + Return true when turning COMDAT function static cannot lead to wrong > + code when the resulting object links with a library defining same > + COMDAT. > + > + Virtual functions do have their addresses taken from the vtables, > + but in C++ there is no way to compare their addresses > + for equality. */ > + > +static bool > +comdat_can_be_unshared_p (symtab_node *node) > +{ > + if (!comdat_can_be_unshared_p_1 (node)) > + return false; > + if (node->same_comdat_group) > + { > + symtab_node *next; > + > + /* If more than one function is in the same COMDAT group, it must > + be shared even if just one function in the comdat group has > + address taken. */ > + for (next = node->same_comdat_group; > + next != node; next = next->same_comdat_group) > + if (!comdat_can_be_unshared_p_1 (next)) > + return false; > + } > + return true; > +} > + > +/* Return true when function NODE should > + * be considered externally visible. */ > + > +static bool > +cgraph_externally_visible_p (struct cgraph_node *node, > + bool whole_program) > +{ > + while (node->transparent_alias && node->definition) > + node = node->get_alias_target (); > + if (!node->definition) > + return false; > + if (!TREE_PUBLIC (node->decl) > + || DECL_EXTERNAL (node->decl)) > + return false; > + > + /* Do not try to localize built-in functions yet. > + One of problems is that we > + end up mangling their asm for > + WHOPR that makes it impossible to call them > + using the implicit built-in declarations > + anymore. Similarly this enables > + us to remove them as unreachable before > + actual calls may appear during > + expansion or folding. */ > + if (fndecl_built_in_p (node->decl)) > + return true; > + > + /* If linker counts on us, we must preserve the function. */ > + if (node->used_from_object_file_p ()) > + return true; > + if (DECL_PRESERVE_P (node->decl)) > + return true; > + if (lookup_attribute ("externally_visible", > + DECL_ATTRIBUTES (node->decl))) > + return true; > + if (lookup_attribute ("noipa", DECL_ATTRIBUTES (node->decl))) > + return true; > + if (TARGET_DLLIMPORT_DECL_ATTRIBUTES > + && lookup_attribute ("dllexport", > + DECL_ATTRIBUTES (node->decl))) > + return true; > + if (node->resolution == LDPR_PREVAILING_DEF_IRONLY) > + return false; > + /* When doing LTO or whole program, we can bring COMDAT functions > + static. > + This improves code quality and we know we will duplicate them at > + most twice > + (in the case that we are not using plugin and link with object > + file implementing same COMDAT) */ > + if (((in_lto_p || whole_program) && !flag_incremental_link) > + && DECL_COMDAT (node->decl) > + && comdat_can_be_unshared_p (node)) > + return false; > + > + /* When doing link time optimizations, > + * hidden symbols become local. */ > + if ((in_lto_p && !flag_incremental_link) > + && (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN > + || DECL_VISIBILITY (node->decl) == VISIBILITY_INTERNAL) > + /* Be sure that node is defined in IR file, not in other object > + file. In that case we don't set > + used_from_other_object_file. */ > + && node->definition) > + ; > + else if (!whole_program) > + return true; > + > + if (MAIN_NAME_P (DECL_NAME (node->decl))) > + return true; > + > + return false; > +} > + > +typedef vec <struct ipa_ref*> ipa_ref_vec; > + > +//XXX: Move to ipa-ref.h > +static const char* stringify_ipa_ref_use(ipa_ref_use use) > +{ > + switch (use) { > + case IPA_REF_LOAD: > + return "IPA_REF_LOAD"; > + break; > + case IPA_REF_STORE: > + return "IPA_REF_STORE"; > + break; > + case IPA_REF_ADDR: > + return "IPA_REF_ADDR"; > + break; > + case IPA_REF_ALIAS: > + return "IPA_REF_ALIAS"; > + break; > + default: > + return "<unknown>"; > + } > +} > + > +static void > +load_function_body_of_ipa_ref (cgraph_node* node) > +{ > + if (in_lto_p) > + { > + cgraph_node *f_cnode2 = node->ultimate_alias_target (); > + if (dump_file) > + { > + fprintf (dump_file, "%s: for function '%s'\n", __func__, \ > + f_cnode2->dump_asm_name ()); > + dump_node (f_cnode2->decl, TDF_DETAILS, dump_file); > + } > + f_cnode2->get_untransformed_body(); > + } > +} > + > +static void > +load_function_body_of_ipa_ref (struct ipa_ref* ref) > +{ > + /* IPA passes don't get the function bodies during LTO. */ > + if (in_lto_p) > + { > + symtab_node *f_node = ref->referring; > + cgraph_node *f_cnode = dyn_cast <cgraph_node *> (f_node); > + load_function_body_of_ipa_ref (f_cnode); > + } > +} > + > + > +static void > +dump_vnode_ipa_ref (ipa_ref* ref) > +{ > + fprintf (dump_file, \ > + "Reference type: %s\n", \ > + stringify_ipa_ref_use (ref->use)); > + > + symtab_node *f_node = ref->referring; > + fprintf (dump_file, \ > + "Reference function: %s\n", \ > + f_node->dump_asm_name ()); > + > + fprintf (dump_file, "Gimple stmt (@0x%lx, lto_stmt_uid: %u):\n", \ > + (long)ref->stmt, > + ref->lto_stmt_uid); > + load_function_body_of_ipa_ref (ref); > + print_gimple_stmt (dump_file, ref->stmt, 0, TDF_NONE); > +} > + > +//XXX: Move to tree.h > +static bool > +tree_code_is_cst (tree op) > +{ > + //XXX: use cprop_constant_p() here? > + int code = TREE_CODE (op); > + if (code == INTEGER_CST || > + code == REAL_CST || > + code == COMPLEX_CST || > + code == VECTOR_CST) > + return true; > + return false; > +} > + > +/* Return true of all ops of assignment are constants. */ > +static bool > +gimple_assign_is_single_const(gimple *stmt) > +{ > + tree op; > + > + gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); > + > + if (gimple_has_volatile_ops (stmt)) > + { > + if (dump_file) > + fprintf (dump_file, "has volatile ops!\n"); > + return false; > + } > + > + if (gimple_num_ops (stmt) != 2) > + { > + if (dump_file) > + fprintf (dump_file, "wrong num of ops: %u!\n", \ > + gimple_num_ops (stmt)); > + return false; > + } > + > + op = gimple_op (stmt, 1); > + if (!tree_code_is_cst (op)) > + { > + if (dump_file) > + fprintf (dump_file, "op is not cst!\n"); > + return false; > + } > + > + return true; > +} > + > +//XXX: Move to ipa-utils.c > +//XXX: from/to could be const > +static bool > +path_exists_dfs (hash_set<cgraph_node*> *visited, > + cgraph_node* current, > + cgraph_node *target) > +{ > + if (current == target) > + return true; > + > + visited->add (current); > + > + cgraph_edge *cs; > + for (cs = current->callees; cs; cs = cs->next_callee) > + { > + cgraph_node *callee = cs->callee->function_symbol (); > + if (!visited->contains (callee)) > + { > + if (path_exists_dfs (visited, callee, target)) > + { > + return true; > + } > + } > + } > + return false; > +} > + > +//XXX: Move to ipa-utils.c > +//XXX: from/to could be const > +static bool > +path_exists (cgraph_node* from, cgraph_node *to) > +{ > + hash_set<cgraph_node*> visited; > + bool found_path = path_exists_dfs (&visited, from, to); > + visited.empty (); > + > + if (dump_file) > + { > + fprintf (dump_file, "Found "); > + if (!found_path) > + fprintf (dump_file, "*no* "); > + fprintf (dump_file, \ > + "path from %s to %s.\n", \ > + from->name (), \ > + to->name ()); > + } > + return found_path; > +} > + > +vec<struct cgraph_node*> > +get_nondominated_callees(cgraph_node* caller, cgraph_node* callee) > +{ > + // assumptions: > + // * there is a callsite from caller to callee > + > + if (in_lto_p) > + caller->get_untransformed_body (); > + > + function *func = DECL_STRUCT_FUNCTION (caller->decl); > + basic_block bb; > + bool found = false; > + FOR_EACH_BB_FN(bb, func) > + { > + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); \ > + !gsi_end_p (gsi); \ > + gsi_next (&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + if (gimple_code (stmt) != GIMPLE_CALL) > + continue; > + > + tree callee_decl = gimple_call_fndecl (stmt); > + cgraph_node *callee_node = cgraph_node::get (callee_decl); > + if (callee_node != callee) > + continue; > + > + found = true; > + } > + if (found) break; > + } > + > + gcc_assert (found); > + > + // At this point in the program, we hold a valid bb. > + // The callee is located > + vec<struct cgraph_node*> callees = vNULL; > + basic_block bb_c; > + FOR_EACH_BB_FN(bb_c, func) > + { > + bool self_dominates = bb == bb_c; > + bool bb_dominates_bbc = bb1_dominates_bb2(bb, bb_c, caller); > + if (bb_dominates_bbc && !self_dominates) continue; > + > + for (gimple_stmt_iterator gsi = gsi_start_bb (bb_c); \ > + !gsi_end_p (gsi); \ > + gsi_next (&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + if (gimple_code (stmt) != GIMPLE_CALL) > + continue; > + > + tree callee_decl = gimple_call_fndecl (stmt); > + cgraph_node *callee_node = cgraph_node::get (callee_decl); > + > + if (fndecl_built_in_p (callee_node->decl)) > + continue; > + > + if (self_dominates && callee_node == callee) { > + break; > + } > + > + callees.safe_push (callee_node); > + } > + > + } > + return callees; > +} > + > + > +//XXX: Move to class symtab_node in cgraph.h > +static ipa_ref* > +symtab_node_iterate_address_uses (const symtab_node *n, > + unsigned i, > + ipa_ref *&ref) > +{ > + n->ref_list.referring.iterate (i, &ref); > + > + if (ref && ref->use != IPA_REF_ADDR) > + return NULL; > + > + return ref; > +} > + > +//XXX: Move to class symtab_node in cgraph.h > +static bool > +symtab_node_address_matters_p(const symtab_node *n) > +{ > + ipa_ref *ref = NULL; > + > + return (symtab_node_iterate_address_uses (n, 0, ref) != NULL); > +} > + > +static bool > +bb1_dominates_bb2 (basic_block bb1, basic_block bb2, cgraph_node *cnode) > +{ > + // self dominance > + if (bb1 == bb2) return true; > + > + push_cfun (DECL_STRUCT_FUNCTION (cnode->decl)); > + > + /* If dominator info is not available, we need to calculate it. */ > + if (!dom_info_available_p (CDI_DOMINATORS)) > + calculate_dominance_info (CDI_DOMINATORS); > + > + /* Check if the read is dominated by the write. */ > + bool ret = dominated_by_p (CDI_DOMINATORS, bb2, bb1); > + > + /* Restore cfun. */ > + pop_cfun (); > + > + return ret; > +} > + > +static bool > +write_before_read_in_function (struct ipa_ref *write, struct ipa_ref *read) > +{ > + basic_block w_bb = gimple_bb (write->stmt); > + basic_block r_bb = gimple_bb (read->stmt); > + > + if (w_bb != r_bb) > + { > + /* > + * The dominator framework operates on cfun. > + * Therefore we need to set cfun accordingly. > + */ > + symtab_node *w_node = write->referring; > + cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node); > + return bb1_dominates_bb2(w_bb, r_bb, w_cnode); > + } > + > + gimple_stmt_iterator gsi; > + for (gsi = gsi_start_bb (w_bb); !gsi_end_p (gsi); gsi_next(&gsi)) > + { > + if (gsi_stmt (gsi) == write->stmt) > + return true; > + if (gsi_stmt (gsi) == read->stmt) > + return false; > + } > + > + /* Neither write nor read found it BB. */ > + gcc_assert (1); > + > + return false; > +} > + > +/* > + * DFS over callees and return if callee is a or b. > + */ > +static cgraph_node* > +find_cgraph_in_callee_set (cgraph_node* n, > + hash_set<cgraph_node*> set, > + cgraph_node* a, > + cgraph_node* b) > +{ > + cgraph_edge *cs; > + for (cs = n->callees; cs; cs = cs->next_callee) > + { > + cgraph_node *callee = cs->callee->function_symbol (); > + if (dump_file) > + fprintf (dump_file, "Child of %s: %s\n", n->dump_asm_name (), \ > + callee->dump_asm_name ()); > + if (callee == a) > + return a; > + if (callee == b) > + return b; > + if (!set.contains (callee)) > + continue; > + return find_cgraph_in_callee_set (callee, set, a, b); > + } > + return NULL; > +} > + > +/* Walks back along caller relations until main is found. */ > +static cgraph_node* > +get_ancestor_main_dfs (hash_set<cgraph_node*>* visited, > + vec<cgraph_node*>* path, > + cgraph_node *node) > +{ > + if (MAIN_NAME_P (DECL_NAME (node->decl))) > + { > + path->safe_push(node); > + return node; > + } > + > + visited->add (node); > + > + cgraph_edge *cs; > + for (cs = node->callers; cs; cs = cs->next_caller) > + { > + cgraph_node *caller = cs->caller->function_symbol (); > + if (!visited->contains (caller)) > + { > + cgraph_node* main = > + get_ancestor_main_dfs (visited, path, caller); > + if (main) > + { > + path->safe_push(node); > + return main; > + } > + } > + } > + return NULL; > +} > + > +static cgraph_node* > +get_path_from_main_to(cgraph_node *node, vec<cgraph_node*>* path) > +{ > + hash_set<cgraph_node*> visited; > + cgraph_node* main = get_ancestor_main_dfs (&visited, path, node); > + visited.empty(); > + return main; > +} > + > + > +/* > + * Verifying that a stmt s1 is dominated by a stmt s2 > + * across function borders is not trivial with the available > + * infrastructure (dominator algorithm on bb level plus cgraph). > + * If we rule out external calls/callbacks, then we still need > + * to guarantee a write on each possible path to the read. > + * > + * The implemented solution to this problem, which is of course > + * too strict, > + * but also not too compute/memory intensive is as follows: > + * > + * - Check if write is reachable by main() by only looking into > + * the first bb in each function on the path. > + * - All call stmts between main() and write must not possibly > + * reach a read. We consider indirect call statements as > + * possible reaching read (unless we can prove opposite). > + * > + * The external calls/callbacks are ruled out as follows: > + * > + * - all possible ancestors of read must not be external visible > + * - all possible ancestors of read must not be function pointers > + * ??????????? > + * > + * > + */ > +static bool > +write_before_read_across_function_borders (struct ipa_ref *write, > + struct ipa_ref *read) > +{ > + symtab_node *w_node = write->referring; > + cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node); > + symtab_node *r_node = read->referring; > + cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node); > + cgraph_node *main; > + > + /* Get main() function. */ > + vec<cgraph_node*> pw = vNULL; > + main = get_path_from_main_to(w_cnode, &pw); > + if (!main) > + { > + if (dump_file) > + fprintf (dump_file, \ > + "main() is not ancestor of write -> skipping.\n"); > + return false; > + } > + > + vec<cgraph_node*> pr = vNULL; > + cgraph_node *main_to_read = get_path_from_main_to(r_cnode, &pr); > + if (!main_to_read) > + { > + if (dump_file) > + fprintf (dump_file, \ > + "main() is not ancestor of read -> skipping.\n"); > + return false; > + } > + > + int i; > + cgraph_node *node_in_pr; > + FOR_EACH_VEC_ELT (pr, i, node_in_pr) > + { > + // I think main has to be externally visible. > + if (node_in_pr == main) continue; > + > + /* Assure that all paths to read are not externally visible. */ > + if (cgraph_externally_visible_p (node_in_pr, flag_whole_program)) > + { > + if (dump_file) > + fprintf (dump_file, \ > + "%s is externally visible... skipping\n", \ > + node_in_pr->dump_asm_name ()); > + return false; > + } > + > + /* Assure that all paths to read are not > + * used as function pointers. */ > + if (node_in_pr->address_taken) > + { > + if (dump_file) > + fprintf (dump_file, \ > + "%s might be function pointer... skipping\n", \ > + node_in_pr->dump_asm_name ()); > + return false; > + } > + } > + > + cgraph_node* caller = main; > + cgraph_node* node_in_pw; > + FOR_EACH_VEC_ELT (pw, i, node_in_pw) > + { > + gcc_assert (w_cnode != r_cnode); > + if (node_in_pw == r_cnode && path_exists(r_cnode, w_cnode)) > + { > + return call_before_read(write, read); > + } > + > + if (node_in_pw == w_cnode && path_exists(w_cnode, r_cnode)) > + { > + return write_before_call(write, read); > + } > + > + if (node_in_pw == main) > + { > + continue; > + } > + > + if (dump_file) > + fprintf (dump_file, \ > + "get_nondominated_callees from %s to %s\n", \ > + caller->dump_asm_name(), \ > + node_in_pw->dump_asm_name()); > + > + vec<cgraph_node *> non_dominated_callees = \ > + get_nondominated_callees(caller, node_in_pw); > + cgraph_node* non_dominated_callee; > + int j; > + FOR_EACH_VEC_ELT(non_dominated_callees, j, non_dominated_callee) > + { > + if (dump_file) > + fprintf (dump_file, "callee %d %s\n", j, \ > + non_dominated_callee->dump_asm_name()); > + if(path_exists(non_dominated_callee, r_cnode)) { > + return false; > + } > + } > + > + > + caller = node_in_pw; > + } > + return true; > +} > + > + > +static bool > +write_before_call (struct ipa_ref *write, struct ipa_ref *read) > +{ > + symtab_node *w_node = write->referring; > + cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node); > + symtab_node *r_node = read->referring; > + cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node); > + > + gcc_assert (path_exists(w_cnode, r_cnode)); > + gcc_assert (w_cnode != r_cnode); > + > + basic_block w_bb = gimple_bb (write->stmt); > + basic_block r_bb = gimple_bb (read->stmt); > + > + if (in_lto_p) { > + w_cnode->get_untransformed_body (); > + } > + > + function *func = DECL_STRUCT_FUNCTION (w_cnode->decl); > + basic_block c_bb; > + vec<struct cgraph_node*> callees = vNULL; > + FOR_EACH_BB_FN(c_bb, func) > + { > + bool self_dominates = w_bb == c_bb; > + bool w_bb_dominates_c_bb = bb1_dominates_bb2(w_bb, c_bb, w_cnode); > + if (w_bb_dominates_c_bb && !self_dominates) continue; > + > + for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); \ > + !gsi_end_p (gsi); \ > + gsi_next (&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + > + if (stmt == write->stmt) { > + break; > + } > + > + if (gimple_code (stmt) != GIMPLE_CALL) { > + continue; > + } > + > + tree callee_decl = gimple_call_fndecl (stmt); > + cgraph_node *callee_node = cgraph_node::get (callee_decl); > + > + if (fndecl_built_in_p (callee_node->decl)) > + continue; > + > + if (dump_file) > + fprintf (dump_file, "found callee %s\n", \ > + callee_node->dump_asm_name()); > + callees.safe_push (callee_node); > + > + } > + } > + cgraph_node* callee; > + int i; > + FOR_EACH_VEC_ELT(callees, i, callee) > + { > + if(path_exists(callee, r_cnode)) > + { > + return false; > + } > + } > + > + return true; > +} > + > +static bool > +call_before_read (struct ipa_ref *write, struct ipa_ref *read) > +{ > + symtab_node *w_node = write->referring; > + cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node); > + symtab_node *r_node = read->referring; > + cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node); > + > + gcc_assert (path_exists(r_cnode, w_cnode)); > + gcc_assert (w_cnode != r_cnode); > + > + basic_block w_bb = gimple_bb (write->stmt); > + basic_block r_bb = gimple_bb (read->stmt); > + > + if (in_lto_p) > + r_cnode->get_untransformed_body (); > + > + function *func = DECL_STRUCT_FUNCTION (r_cnode->decl); > + basic_block c_bb; > + FOR_EACH_BB_FN(c_bb, func) > + { > + bool self_dominates = c_bb == r_bb; > + bool call_dominates_read = bb1_dominates_bb2(c_bb, r_bb, r_cnode); > + if (!call_dominates_read && !self_dominates) continue; > + > + for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); \ > + !gsi_end_p (gsi); \ > + gsi_next (&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + > + // self dominance > + if (stmt == read->stmt) { > + break; > + } > + > + if (gimple_code (stmt) != GIMPLE_CALL) { > + continue; > + } > + > + tree callee_decl = gimple_call_fndecl (stmt); > + cgraph_node *callee_node = cgraph_node::get (callee_decl); > + > + if(path_exists(callee_node, w_cnode)) { > + return true; > + } > + } > + } > + return false; > +} > + > +static bool > +write_before_read (struct ipa_ref *write, struct ipa_ref *read) > +{ > + symtab_node *w_node = write->referring; > + cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node); > + symtab_node *r_node = read->referring; > + cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node); > + > + if (w_cnode == r_cnode) > + /* Within the same function. */ > + return write_before_read_in_function (write, read); > + > + /* Not within the same function. */ > + return write_before_read_across_function_borders (write, read); > +} > + > +static bool > +write_before_all_reads (struct ipa_ref* write, > + const ipa_ref_vec &reads) > +{ > + int i; > + struct ipa_ref* read; > + > + FOR_EACH_VEC_ELT (reads, i, read) > + { > + load_function_body_of_ipa_ref (read); > + if (!write_before_read (write, read)) > + return false; > + } > + return true; > +} > + > +static void > +propagate_constant_to_read (tree write_val, > + struct ipa_ref* ref, > + hash_set<cgraph_node*> *func) > +{ > + gcc_assert (ref->use == IPA_REF_LOAD); > + load_function_body_of_ipa_ref (ref); > + > + gimple *read_stmt = ref->stmt; > + > + gcc_assert (gimple_code (read_stmt) == GIMPLE_ASSIGN); > + gcc_assert (gimple_num_ops (read_stmt) == 2); > + > + tree old_lhs = gimple_op (read_stmt, 0); > + symtab_node *r_node = ref->referring; > + cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node); > + > + //if (!func->contains(r_cnode)) func->add(r_cnode); > + cgraph_node **possible_clone = func_to_clone->get(r_cnode); > + if (!possible_clone) { > + const char* suffix = "test"; > + // callers has to be vNULL, otherwise, we will be > + // analyzing clones... > + // and we don't want that... > + // but that means that we will need to update the callers > + // later... in update_callgraph > + cgraph_node *clone = r_cnode->create_version_clone_with_body( > + vNULL, > + NULL, > + NULL, > + NULL, > + NULL, > + suffix, > + NULL); > + clone->get_untransformed_body(); > + func_to_clone->put(r_cnode, clone); > + } > + > + possible_clone = func_to_clone->get(r_cnode); > + cgraph_node *clone = *possible_clone; > + > + // Build new stmt and replace old. > + gimple_stmt_iterator gsi; > + basic_block bb; > + // Let's create a clone with body here... > + // The clone should not have callers as > + // to not interfere with the current > + // analysis. > + // The callers will be set at the end... > + > + push_cfun (DECL_STRUCT_FUNCTION (clone->decl)); > + function *clone_func = DECL_STRUCT_FUNCTION (clone->decl); > + bool found = false; > + FOR_EACH_BB_FN(bb, clone_func) > + { > + for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + if (gimple_code (stmt) != GIMPLE_ASSIGN) continue; > + tree old_val_clone = gimple_op (stmt, 1); > + tree lhs = gimple_op (stmt, 0); > + > + //TODO: what happens when it is not a variable decl? > + //This might actually be the source of imprecision > + if (TREE_CODE(old_val_clone) != VAR_DECL) continue; > + tree old_val = gimple_op (read_stmt, 1); > + if (IDENTIFIER_POINTER(DECL_NAME(old_val_clone)) != \ > + IDENTIFIER_POINTER(DECL_NAME(old_val))) continue; > + > + > + found = true; > + > + // REPLACE!!! > + gimple_stmt_iterator gsi2 = gsi_for_stmt(stmt); > + tree new_lhs = make_ssa_name(TREE_TYPE(lhs)); > + gimple *new_read_stmt = \ > + gimple_build_assign(new_lhs, write_val); > + gsi_insert_before (&gsi2, new_read_stmt, GSI_SAME_STMT); > + > + gimple *use_stmt; > + imm_use_iterator use_iter; > + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) > + { > + use_operand_p use_p; > + FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) > + SET_USE (use_p, new_lhs); > + update_stmt (use_stmt); > + } > + } > + > + if (found) break; > + } > + pop_cfun(); > +} > + > + > + > +static void > +ipa_initcall_get_writes_and_reads(varpool_node *vnode, > + ipa_ref_vec *writes, > + ipa_ref_vec *reads) > +{ > + int i; > + struct ipa_ref *ref; > + > + if (dump_file) > + fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ()); > + > + /* Only IPA_REF_STORE and IPA_REF_LOAD left. */ > + for (i = 0; vnode->iterate_referring (i, ref); i++) > + { > + if (ref->use == IPA_REF_STORE) > + writes->safe_push (ref); > + if (ref->use == IPA_REF_LOAD) > + reads->safe_push (ref); > + } > +} > + > +static void > +propagate_constant_to_reads (tree write_val, > + const ipa_ref_vec &reads_original, > + hash_set<cgraph_node*> *funcs) > +{ > + /* Iterate over reads and replace them by constant. */ > + struct ipa_ref* ref; > + int i; > + FOR_EACH_VEC_ELT (reads_original, i, ref) > + { > + propagate_constant_to_read (write_val, ref, funcs); > + } > +} > + > +/* > + * Extracts the assigned constant, iff the given statement > + * is a constant assignment. Returns NULL_TREE otherwise. > + */ > +static tree > +extract_constant_from_initcall_write (struct ipa_ref *write) > +{ > + symtab_node *f_node = write->referring; > + cgraph_node *f_cnode = dyn_cast <cgraph_node *> (f_node); > + > + tree decl = f_cnode->decl; > + if (TREE_CODE (decl) != FUNCTION_DECL) > + { > + if (dump_file) > + fprintf (dump_file, "Not function decl -> skipping.\n"); > + return NULL_TREE; > + } > + > + if (!f_cnode->has_gimple_body_p () && dump_file) > + fprintf (dump_file, "Does NOT have gimple body!\n"); > + if (f_cnode->inlined_to && dump_file) > + fprintf (dump_file, "Inlined To\n"); > + > + if (dump_file) > + { > + fprintf (dump_file, "%s: for writer gimple:\n", __func__); > + dump_vnode_ipa_ref(write); > + } > + > + /* Get the write function's body. */ > + load_function_body_of_ipa_ref (write); > + > + gimple *stmt = write->stmt; > + > + /* Verify that we have an assignment. */ > + if (gimple_code (stmt) != GIMPLE_ASSIGN) > + { > + if (dump_file) > + fprintf (dump_file, "Write stmt not assignment -> skipping.\n"); > + return NULL_TREE; > + } > + > + /* Check if write's LHS (vnode) is not volatile. */ > + tree lhs = gimple_assign_lhs (stmt); > + if (TREE_THIS_VOLATILE (TREE_TYPE (lhs))) > + { > + if (dump_file) > + fprintf (dump_file, "Variable volatile -> skipping.\n"); > + return NULL_TREE; > + } > + > + /* Check if RHS of write stmt is constant. */ > + if (!gimple_assign_is_single_const (stmt)) > + { > + if (dump_file) > + { > + fprintf (dump_file, "Found non-const operands.\n"); > + } > + return NULL_TREE; > + } > + > + /* Extract the constant. */ > + tree write_val = gimple_op (stmt, 1); > + > + if (dump_file) > + { > + fprintf (dump_file, "Const op:\n"); > + dump_node (write_val, TDF_DETAILS, dump_file); > + } > + > + return write_val; > +} > + > +static void > +ipa_initcall_cp_execute_for_var (varpool_node *vnode, > + hash_set<cgraph_node*> *update_functions) > +{ > + ipa_ref_vec writes = vNULL; > + ipa_ref_vec reads = vNULL; > + struct ipa_ref* write; > + tree write_val; > + gimple_stmt_iterator gsi; > + bool remove_permanently; > + > + if (dump_file) > + fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ()); > + > + if (dump_file) > + { > + dump_node (vnode->decl, TDF_DETAILS, dump_file); > + } > + > + /* Variable must not be externally visible. */ > + if (vnode->externally_visible_p ()) > + { > + if (dump_file) > + fprintf (dump_file, \ > + "\texternally visible "\ > + "-> skipping\n"); > + return; > + } > + > + /* All refs must be explicit. */ > + if (!vnode->all_refs_explicit_p ()) > + { > + if (dump_file) > + fprintf (dump_file,\ > + "Not explicit variable refs "\ > + "-> skipping.\n"); > + return; > + } > + > + /* Check if any ref->use is IPA_REF_ALIAS. */ > + if (vnode->has_aliases_p ()) > + { > + if (dump_file) > + fprintf (dump_file, "Found IPA_REF_ALIAS -> skipping.\n"); > + return; > + } > + > + /* Check if any ref->use is IPA_REF_ADDR. */ > + if (symtab_node_address_matters_p (vnode)) > + { > + if (dump_file) > + fprintf (dump_file, "Found IPA_REF_ADDR -> skipping.\n"); > + return; > + } > + > + /* We don't touch arrays. */ > + if (TREE_CODE (TREE_TYPE (vnode->decl)) == ARRAY_TYPE) > + { > + if (dump_file) > + fprintf (dump_file, "Variable is array -> skipping.\n"); > + return; > + } > + > + /* Get all refs (must be REF_STORE or REF_LOAD). */ > + ipa_initcall_get_writes_and_reads (vnode, &writes, &reads); > + > + if (writes.length () > 1) > + { > + /* More than one writer. */ > + if (dump_file) > + fprintf (dump_file, "More than one writer -> skipping.\n"); > + writes.release (); > + reads.release (); > + return; > + } > + else if (writes.length () < 1) > + { > + /* No writer. */ > + if (dump_file) > + fprintf (dump_file, "No writer -> skipping.\n"); > + writes.release (); > + reads.release (); > + return; > + } > + > + /* > + * Note: > + * Limiting ourselfs to only one write is not necessary in general, > + * but good enough to address the typical init() case. > + * Big advantage is, that it makes the following code much easier. > + * > + * TODO: I believe that if we create clones, we might get two writes > + * Investigate > + */ > + > + /* Get (only) write ref. */ > + write = writes.pop (); > + > + /* Check if write's RHS is a constant and get it. */ > + write_val = extract_constant_from_initcall_write (write); > + if (write_val == NULL_TREE) > + { > + if (dump_file) > + fprintf (dump_file, \ > + "Write's RHS is not constant"\ > + " -> skipping.\n"); > + writes.release (); > + reads.release (); > + return; > + } > + > + /* Assure all reads are after the write. */ > + if (!write_before_all_reads (write, reads)) > + { > + if (dump_file) > + fprintf (dump_file, \ > + "Write not guaranteed "\ > + "to be before read -> skipping.\n"); > + writes.release (); > + reads.release (); > + return; > + } > + > + /* Propagate constant to reads. */ > + propagate_constant_to_reads (write_val, reads, update_functions); > + > + /* Finally remove the write. */ > + gsi = gsi_for_stmt (write->stmt); > + remove_permanently = false; //XXX: fails with true??? > + //gsi_remove (&gsi, remove_permanently); > + > + if (dump_file) > + fprintf (dump_file, \ > + "Eliminated variable '%s'.\n\n", vnode->name ()); > + > + writes.release (); > + reads.release (); > +} > + > + > +bool > +update_callgraph(cgraph_node *const& r_cnode, > + cgraph_node **clone_ptr, > + void*) > +{ > + vec<cgraph_edge*> callers = r_cnode->collect_callers(); > + cgraph_node *clone = *clone_ptr; > + cgraph_edge *e; > + int i; > + profile_count count = r_cnode->count; > + > + FOR_EACH_VEC_ELT(callers, i, e) > + e->redirect_callee(clone); > + > +/* > + for (e = r_cnode->callees; e; e=e->next_callee) > + e->clone(clone, e->call_stmt, e->lto_stmt_uid, count, count, true); > + > + for (e = r_cnode->indirect_calls; e; e=e->next_callee) > + e->clone(clone, e->call_stmt, e->lto_stmt_uid, count, count, true); > + > + > +*/ > + for (e = clone->callers; e; e=e->next_caller) > + { > + e->caller->get_untransformed_body(); > + function *inner_function = \ > + DECL_STRUCT_FUNCTION (e->caller->decl); > + gimple_call_set_fndecl (e->call_stmt, clone->decl); > + maybe_clean_eh_stmt_fn (inner_function, e->call_stmt); > + } > + > + r_cnode->skip_ipa_cp = true; > + //r_cnode->remove(); > + return true; > +} > + > +static unsigned int > +ipa_initcall_cp_execute (void) > +{ > + > + varpool_node *vnode; > + > + clone_num_suffixes1 = new hash_map<const char *, unsigned>; > + hash_set<cgraph_node *> update_functions; > + func_to_clone = new hash_map<cgraph_node*, cgraph_node*>; > + FOR_EACH_VARIABLE (vnode) > + { > + ipa_initcall_cp_execute_for_var (vnode, &update_functions); > + } > + > + func_to_clone->traverse<void*, update_callgraph>(NULL); > + > + delete clone_num_suffixes1; > + delete func_to_clone; > + > + return 0; > + > +} > + > +namespace { > + > +const pass_data pass_data_ipa_initcall_cp = > +{ > + SIMPLE_IPA_PASS, /* type */ > + "initcall_cp", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_NONE, /* tv_id */ > + ( PROP_cfg | PROP_ssa ), /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, > + ( TODO_update_ssa | TODO_cleanup_cfg | TODO_dump_symtab \ > + | TODO_rebuild_cgraph_edges | TODO_discard_function ), > +}; > + > +class pass_ipa_initcall_cp : public simple_ipa_opt_pass > +{ > +public: > + pass_ipa_initcall_cp (gcc::context *ctxt) > + : simple_ipa_opt_pass (pass_data_ipa_initcall_cp, ctxt) > + {} > + > + /* opt_pass methods: */ > + virtual bool gate (function *) > + { > + return flag_initcall_cp; > + } > + > + virtual unsigned int execute (function *) > + { > + return ipa_initcall_cp_execute(); > + } > + > +}; // class pass_ipa_initcall_cp > + > +} // anon namespace > + > +simple_ipa_opt_pass * > +make_pass_ipa_initcall_cp (gcc::context *ctxt) > +{ > + return new pass_ipa_initcall_cp (ctxt); > +} > diff --git a/gcc/passes.def b/gcc/passes.def > index 798a391..350a4fc 100644 > --- a/gcc/passes.def > +++ b/gcc/passes.def > @@ -146,6 +146,7 @@ along with GCC; see the file COPYING3. If not see > NEXT_PASS (pass_ipa_profile); > NEXT_PASS (pass_ipa_icf); > NEXT_PASS (pass_ipa_devirt); > + NEXT_PASS (pass_ipa_initcall_cp); > NEXT_PASS (pass_ipa_cp); > NEXT_PASS (pass_ipa_sra); > NEXT_PASS (pass_ipa_cdtor_merge); > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > index a987661..9e64373 100644 > --- a/gcc/tree-pass.h > +++ b/gcc/tree-pass.h > @@ -498,6 +498,7 @@ extern ipa_opt_pass_d *make_pass_ipa_fn_summary > (gcc::context *ctxt); > extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt); > extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context > *ctxt); > extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context > *ctxt); > +extern simple_ipa_opt_pass *make_pass_ipa_initcall_cp (gcc::context *ctxt); > extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt); > extern ipa_opt_pass_d *make_pass_ipa_sra (gcc::context *ctxt); > extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt); >