Hello,

this is an update on the LTO pass we've been working on. The optimization is
called ipa-initcall-cp because it propagates constant values written to
variables with static lifetimes (such as ones initialized in initialization
functions).

This patch can be applied to:
commit 3cce71b23f6ed221b4335b600e718d79903cc83d
Date:   Wed Dec 4 20:04:10 2019 +0000
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@278975
138bc75d-0d04-0410-961f-82ee72b054a4

This patch can now successfully compile all benchmarks on the SPEC CPU2017
benchmarking suite. ~12 of the benchmarks have variables that can be
eliminated with this transformation. I am pasting the patch at the
bottom of the e-mail. I'll be working on adding some of the test cases onto
gcc's testing suite.

In my previous e-mail I asked some questions. Some of them still remain,
others I have answered:

> Question #1: Can someone please tell me what would be the best kind of clones
> to use in this use case?

I think that the only API which makes sense here is
create_version_clone_with_body because we want to modify the body of the
clones.

> Question #2: I found out that clones produced with
> create_clone_version_with_body are not themselves versionable. What is the
> reason behind this?

Still not sure why this is the case.

> Question #3: I have added a flag to cgraph_nodes to skip ipa-cp. This flag is
> true for the original functions which have been removed from the call graph.
> If this flag is removed, then cgraph_nodes removed from the call graph will
> trigger an assertion in ipa-cp (ipa-cp.c:1195). I would like to not have to
> add this flag to cgraph_node but I am not sure what to do.

Still not sure how to remove this graph. Ideally, ipa-cp would not
trigger that assertion because it shouldn't try to propagate constants
on cgraph_nodes that are not connected to the graph any more.

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 6b857bd..fb4ba09 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1370,6 +1370,7 @@ OBJS = \
        internal-fn.o \
        ipa-cp.o \
        ipa-sra.o \
+       ipa-initcall-cp.o \
        ipa-devirt.o \
        ipa-fnsummary.o \
        ipa-polymorphic-call.o \
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 7288440..0d56149 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -288,6 +288,7 @@ symbol_table::create_empty (void)
   node->type = SYMTAB_FUNCTION;
   node->frequency = NODE_FREQUENCY_NORMAL;
   node->count_materialization_scale = REG_BR_PROB_BASE;
+  node->skip_ipa_cp = false;
   cgraph_count++;
 
   return node;
@@ -3548,6 +3549,7 @@ cgraph_node::get_untransformed_body (void)
   name = lto_get_decl_name_mapping (file_data, name);
   struct lto_in_decl_state *decl_state
         = lto_get_function_in_decl_state (file_data, decl);
+  gcc_assert (decl_state != NULL);
 
   cgraph_node *origin = this;
   while (origin->clone_of)
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 9c086fe..806ed56 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -901,6 +901,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public 
symtab_node
 {
   friend class symbol_table;
 
+  bool skip_ipa_cp;
+
   /* Remove the node from cgraph and all inline clones inlined into it.
      Skip however removal of FORBIDDEN_NODE and return true if it needs to be
      removed.  This allows to call the function from outer loop walking clone
diff --git a/gcc/common.opt b/gcc/common.opt
index 404b6aa..00c86dc 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3344,4 +3344,10 @@ fipa-ra
 Common Report Var(flag_ipa_ra) Optimization
 Use caller save register across calls if possible.
 
+finitcall-cp
+Common Report Var(flag_initcall_cp) TBD.
+
+finitcall-cp-dryrun
+Common Report Var(flag_initcall_cp_dryrun) TBD.
+
 ; This comment is to ensure we retain the blank line above.
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 693c7a2..4169dfb 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -1187,7 +1187,7 @@ initialize_node_lattices (struct cgraph_node *node)
 
   gcc_checking_assert (node->has_gimple_body_p ());
 
-  if (!ipa_get_param_count (info))
+  if (!ipa_get_param_count (info) || node->skip_ipa_cp)
     disable = true;
   else if (node->local)
     {
diff --git a/gcc/ipa-initcall-cp.c b/gcc/ipa-initcall-cp.c
new file mode 100644
index 0000000..264bf38
--- /dev/null
+++ b/gcc/ipa-initcall-cp.c
@@ -0,0 +1,1467 @@
+/* Initcall constant propagation pass.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "tree-eh.h"
+#include "gimple.h"
+#include "gimple-expr.h"
+#include "gimple-iterator.h"
+#include "predict.h"
+#include "alloc-pool.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "diagnostic.h"
+#include "fold-const.h"
+#include "gimple-fold.h"
+#include "symbol-summary.h"
+#include "tree-vrp.h"
+#include "ipa-prop.h"
+#include "tree-pretty-print.h"
+#include "tree-inline.h"
+#include "ipa-fnsummary.h"
+#include "ipa-utils.h"
+#include "tree-ssa-ccp.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "gimple-pretty-print.h"
+#include "ssa.h"
+
+bool
+reach_nodes_via_bb1 (cgraph_node *n2);
+static bool
+bb1_dominates_bb2 (basic_block, basic_block, cgraph_node *);
+static bool
+write_before_call (struct ipa_ref *, struct ipa_ref *);
+static bool
+call_before_read (struct ipa_ref *, struct ipa_ref *);
+static hash_map<const char *, unsigned> *clone_num_suffixes1;
+static hash_map<cgraph_node *, cgraph_node *> *func_to_clone;
+static vec<struct cgraph_node *>
+get_nondominated_callees (cgraph_node *caller, cgraph_node *callee,
+                         bool *exitEarly = NULL);
+
+static bool
+comdat_can_be_unshared_p_1 (symtab_node *node)
+{
+  if (!node->externally_visible)
+    return true;
+  if (node->address_can_be_compared_p ())
+    {
+      struct ipa_ref *ref;
+
+      for (unsigned int i = 0; node->iterate_referring (i, ref); i++)
+       if (ref->address_matters_p ())
+         return false;
+    }
+
+  /* If the symbol is used in some weird way,
+   * better to not touch it.  */
+  if (node->force_output)
+    return false;
+
+  /* Explicit instantiations needs to be output when possibly
+     used externally.  */
+  if (node->forced_by_abi && TREE_PUBLIC (node->decl)
+      && (node->resolution != LDPR_PREVAILING_DEF_IRONLY
+         && !flag_whole_program))
+    return false;
+
+  /* Non-readonly and volatile variables cannot be duplicated.  */
+  if (is_a<varpool_node *> (node)
+      && (!TREE_READONLY (node->decl) || TREE_THIS_VOLATILE (node->decl)))
+    return false;
+  return true;
+}
+
+/* COMDAT functions must be shared only if they have address taken,
+   otherwise we can produce our own private implementation with
+   -fwhole-program.
+   Return true when turning COMDAT function static cannot lead to wrong
+   code when the resulting object links with a library defining same
+   COMDAT.
+
+   Virtual functions do have their addresses taken from the vtables,
+   but in C++ there is no way to compare their addresses
+   for equality.  */
+
+static bool
+comdat_can_be_unshared_p (symtab_node *node)
+{
+  if (!comdat_can_be_unshared_p_1 (node))
+    return false;
+  if (node->same_comdat_group)
+    {
+      symtab_node *next;
+
+      /* If more than one function is in the same COMDAT group, it must
+        be shared even if just one function in the comdat group has
+        address taken.  */
+      for (next = node->same_comdat_group; next != node;
+          next = next->same_comdat_group)
+       if (!comdat_can_be_unshared_p_1 (next))
+         return false;
+    }
+  return true;
+}
+
+/* Return true when function NODE should
+ * be considered externally visible.  */
+
+static bool
+cgraph_externally_visible_p (struct cgraph_node *node, bool whole_program)
+{
+  while (node->transparent_alias && node->definition)
+    node = node->get_alias_target ();
+  if (!node->definition)
+    return false;
+  if (!TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl))
+    return false;
+
+  /* Do not try to localize built-in functions yet.
+    One of problems is that we
+     end up mangling their asm for
+     WHOPR that makes it impossible to call them
+     using the implicit built-in declarations
+     anymore.  Similarly this enables
+     us to remove them as unreachable before
+     actual calls may appear during
+     expansion or folding.  */
+  if (fndecl_built_in_p (node->decl))
+    return true;
+
+  /* If linker counts on us, we must preserve the function.  */
+  if (node->used_from_object_file_p ())
+    return true;
+  if (DECL_PRESERVE_P (node->decl))
+    return true;
+  if (lookup_attribute ("externally_visible", DECL_ATTRIBUTES (node->decl)))
+    return true;
+  if (lookup_attribute ("noipa", DECL_ATTRIBUTES (node->decl)))
+    return true;
+  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+      && lookup_attribute ("dllexport", DECL_ATTRIBUTES (node->decl)))
+    return true;
+  if (node->resolution == LDPR_PREVAILING_DEF_IRONLY)
+    return false;
+  /* When doing LTO or whole program, we can bring COMDAT functions
+     static.
+     This improves code quality and we know we will duplicate them at
+     most twice
+     (in the case that we are not using plugin and link with object
+     file implementing same COMDAT)  */
+  if (((in_lto_p || whole_program) && !flag_incremental_link)
+      && DECL_COMDAT (node->decl) && comdat_can_be_unshared_p (node))
+    return false;
+
+  /* When doing link time optimizations,
+   * hidden symbols become local.  */
+  if ((in_lto_p && !flag_incremental_link)
+      && (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN
+         || DECL_VISIBILITY (node->decl) == VISIBILITY_INTERNAL)
+      /* Be sure that node is defined in IR file, not in other object
+        file.  In that case we don't set
+        used_from_other_object_file.  */
+      && node->definition)
+    ;
+  else if (!whole_program)
+    return true;
+
+  if (MAIN_NAME_P (DECL_NAME (node->decl)))
+    return true;
+
+  return false;
+}
+
+typedef vec<struct ipa_ref *> ipa_ref_vec;
+
+// XXX: Move to ipa-ref.h
+static const char *
+stringify_ipa_ref_use (ipa_ref_use use)
+{
+  switch (use)
+    {
+    case IPA_REF_LOAD:
+      return "IPA_REF_LOAD";
+      break;
+    case IPA_REF_STORE:
+      return "IPA_REF_STORE";
+      break;
+    case IPA_REF_ADDR:
+      return "IPA_REF_ADDR";
+      break;
+    case IPA_REF_ALIAS:
+      return "IPA_REF_ALIAS";
+      break;
+    default:
+      return "<unknown>";
+    }
+}
+
+static void
+load_function_body_of_ipa_ref (cgraph_node *node)
+{
+  if (in_lto_p)
+    {
+      cgraph_node *f_cnode2 = node->ultimate_alias_target ();
+      if (dump_file)
+       {
+         fprintf (dump_file, "%s: for function '%s'\n", __func__,
+                  f_cnode2->dump_asm_name ());
+         dump_node (f_cnode2->decl, TDF_DETAILS, dump_file);
+       }
+      f_cnode2->get_untransformed_body ();
+    }
+}
+
+static void
+load_function_body_of_ipa_ref (struct ipa_ref *ref)
+{
+  /* IPA passes don't get the function bodies during LTO.  */
+  if (in_lto_p)
+    {
+      symtab_node *f_node = ref->referring;
+      cgraph_node *f_cnode = dyn_cast<cgraph_node *> (f_node);
+      load_function_body_of_ipa_ref (f_cnode);
+    }
+}
+
+static void
+dump_vnode_ipa_ref (ipa_ref *ref)
+{
+  fprintf (dump_file, "Reference type: %s\n", stringify_ipa_ref_use 
(ref->use));
+
+  symtab_node *f_node = ref->referring;
+  fprintf (dump_file, "Reference function: %s\n", f_node->dump_asm_name ());
+
+  fprintf (dump_file, "Gimple stmt (@0x%lx, lto_stmt_uid: %u):\n",
+          (long) ref->stmt, ref->lto_stmt_uid);
+  load_function_body_of_ipa_ref (ref);
+  print_gimple_stmt (dump_file, ref->stmt, 0, TDF_NONE);
+}
+
+// XXX: Move to tree.h
+static bool
+tree_code_is_cst (tree op)
+{
+  // XXX: use cprop_constant_p () here?
+  int code = TREE_CODE (op);
+  if (code == INTEGER_CST || code == REAL_CST || code == COMPLEX_CST
+      || code == VECTOR_CST)
+    return true;
+  return false;
+}
+
+/* Return true of all ops of assignment are constants.  */
+static bool
+gimple_assign_is_single_const (gimple *stmt)
+{
+  tree op;
+
+  gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN);
+
+  if (gimple_has_volatile_ops (stmt))
+    {
+      if (dump_file)
+       fprintf (dump_file, "has volatile ops!\n");
+      return false;
+    }
+
+  if (gimple_num_ops (stmt) != 2)
+    {
+      if (dump_file)
+       fprintf (dump_file, "wrong num of ops: %u!\n", gimple_num_ops (stmt));
+      return false;
+    }
+
+  op = gimple_op (stmt, 1);
+  if (!tree_code_is_cst (op))
+    {
+      if (dump_file)
+       fprintf (dump_file, "op is not cst!\n");
+      return false;
+    }
+
+  return true;
+}
+
+// XXX: Move to ipa-utils.c
+// XXX: from/to could be const
+static bool
+path_exists_dfs (hash_set<cgraph_node *> *visited, cgraph_node *current,
+                cgraph_node *target)
+{
+  if (current == target)
+    return true;
+
+  visited->add (current);
+
+  cgraph_edge *cs;
+  for (cs = current->callees; cs; cs = cs->next_callee)
+    {
+      cgraph_node *callee = cs->callee->function_symbol ();
+      if (!visited->contains (callee))
+       {
+         if (path_exists_dfs (visited, callee, target))
+           {
+             return true;
+           }
+       }
+    }
+  return false;
+}
+
+// XXX: Move to ipa-utils.c
+// XXX: from/to could be const
+static bool
+path_exists (cgraph_node *from, cgraph_node *to)
+{
+  hash_set<cgraph_node *> visited;
+  bool found_path = path_exists_dfs (&visited, from, to);
+  visited.empty ();
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Found ");
+      if (!found_path)
+       fprintf (dump_file, "*no* ");
+      fprintf (dump_file, "path from %s to %s.\n", from->name (), to->name ());
+    }
+  return found_path;
+}
+
+static vec<struct cgraph_node *>
+get_nondominated_callees (cgraph_node *caller, cgraph_node *callee,
+                         bool *exitEarly)
+{
+  // assumptions:
+  //  * there is a callsite from caller to callee
+
+  if (in_lto_p)
+    caller->get_untransformed_body ();
+
+  function *func = DECL_STRUCT_FUNCTION (caller->decl);
+  basic_block bb;
+  bool found = false;
+  FOR_EACH_BB_FN (bb, func)
+    {
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+          gsi_next (&gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+         if (gimple_code (stmt) != GIMPLE_CALL)
+           continue;
+
+         // The question here is, what happens if we don't have
+         // a tree that is of the type
+         // var = call()
+         // and instead we have a tree of the type
+         // call()
+         // are these trees even possible?
+         // I would prefer if they are all the same...
+         if (dump_file)
+           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
+         tree rhs = gimple_op (stmt, 1);
+         if (TREE_CODE (rhs) == RECORD_TYPE)
+           {
+             gcc_assert (exitEarly);
+             *exitEarly = true;
+             return vNULL;
+           }
+         tree callee_decl = gimple_call_fndecl (stmt);
+         if (!callee_decl)
+           {
+             gcc_assert (exitEarly);
+             *exitEarly = true;
+             return vNULL;
+           }
+         cgraph_node *callee_node = cgraph_node::get (callee_decl);
+         if (callee_node != callee)
+           continue;
+
+         found = true;
+       }
+      if (found)
+       break;
+    }
+
+  gcc_assert (found);
+
+  // At this point in the program, we hold a valid bb.
+  // The callee is located
+  vec<struct cgraph_node *> callees = vNULL;
+  basic_block bb_c;
+  FOR_EACH_BB_FN (bb_c, func)
+    {
+      bool self_dominates = bb == bb_c;
+      bool bb_dominates_bbc = bb1_dominates_bb2 (bb, bb_c, caller);
+      if (bb_dominates_bbc && !self_dominates)
+       continue;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb_c); !gsi_end_p (gsi);
+          gsi_next (&gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+         if (gimple_code (stmt) != GIMPLE_CALL)
+           continue;
+
+         tree callee_decl = gimple_call_fndecl (stmt);
+         cgraph_node *callee_node = cgraph_node::get (callee_decl);
+
+         if (fndecl_built_in_p (callee_node->decl))
+           continue;
+
+         if (self_dominates && callee_node == callee)
+           {
+             break;
+           }
+
+         callees.safe_push (callee_node);
+       }
+    }
+  return callees;
+}
+
+// XXX: Move to class symtab_node in cgraph.h
+static ipa_ref *
+symtab_node_iterate_address_uses (symtab_node *n, unsigned i, ipa_ref *&ref)
+{
+  ipa_ref *retval = NULL;
+  for (int i = 0; n->iterate_referring (i, retval); i++)
+    if (ref->use == IPA_REF_ADDR)
+      return ref;
+  return NULL;
+}
+
+// XXX: Move to class symtab_node in cgraph.h
+static bool
+symtab_node_address_matters_p (symtab_node *n)
+{
+  struct ipa_ref *ref;
+
+  if (!n->address_can_be_compared_p ())
+    return false;
+  if (n->externally_visible || n->force_output)
+    return true;
+
+  for (unsigned int i = 0; n->iterate_referring (i, ref); i++)
+    if (ref->address_matters_p ())
+      return true;
+  return false;
+}
+
+static bool
+bb1_dominates_bb2 (basic_block bb1, basic_block bb2, cgraph_node *cnode)
+{
+  // self dominance
+  if (bb1 == bb2)
+    return true;
+
+  push_cfun (DECL_STRUCT_FUNCTION (cnode->decl));
+
+  /* If dominator info is not available, we need to calculate it.  */
+  if (!dom_info_available_p (CDI_DOMINATORS))
+    calculate_dominance_info (CDI_DOMINATORS);
+
+  /* Check if the read is dominated by the write.  */
+  bool ret = dominated_by_p (CDI_DOMINATORS, bb2, bb1);
+
+  /* Restore cfun.  */
+  pop_cfun ();
+
+  return ret;
+}
+
+static bool
+write_before_read_in_function (struct ipa_ref *write, struct ipa_ref *read)
+{
+  basic_block w_bb = gimple_bb (write->stmt);
+  basic_block r_bb = gimple_bb (read->stmt);
+
+  if (w_bb != r_bb)
+    {
+      /*
+       * The dominator framework operates on cfun.
+       * Therefore we need to set cfun accordingly.
+       */
+      symtab_node *w_node = write->referring;
+      cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
+      return bb1_dominates_bb2 (w_bb, r_bb, w_cnode);
+    }
+
+  gimple_stmt_iterator gsi;
+  for (gsi = gsi_start_bb (w_bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      if (gsi_stmt (gsi) == write->stmt)
+       return true;
+      if (gsi_stmt (gsi) == read->stmt)
+       return false;
+    }
+
+  /* Neither write nor read found it BB.  */
+  gcc_assert (1);
+
+  return false;
+}
+
+/*
+ * DFS over callees and return if callee is a or b.
+ */
+static cgraph_node *
+find_cgraph_in_callee_set (cgraph_node *n, hash_set<cgraph_node *> set,
+                          cgraph_node *a, cgraph_node *b)
+{
+  cgraph_edge *cs;
+  for (cs = n->callees; cs; cs = cs->next_callee)
+    {
+      cgraph_node *callee = cs->callee->function_symbol ();
+      if (dump_file)
+       fprintf (dump_file, "Child of %s: %s\n", n->dump_asm_name (),
+                callee->dump_asm_name ());
+      if (callee == a)
+       return a;
+      if (callee == b)
+       return b;
+      if (!set.contains (callee))
+       continue;
+      return find_cgraph_in_callee_set (callee, set, a, b);
+    }
+  return NULL;
+}
+
+/* Walks back along caller relations until main is found.  */
+static cgraph_node *
+get_ancestor_main_dfs (hash_set<cgraph_node *> *visited,
+                      vec<cgraph_node *> *path, cgraph_node *node)
+{
+  if (MAIN_NAME_P (DECL_NAME (node->decl)))
+    {
+      path->safe_push (node);
+      return node;
+    }
+
+  visited->add (node);
+
+  cgraph_edge *cs;
+  for (cs = node->callers; cs; cs = cs->next_caller)
+    {
+      cgraph_node *caller = cs->caller->function_symbol ();
+      if (!visited->contains (caller))
+       {
+         cgraph_node *main = get_ancestor_main_dfs (visited, path, caller);
+         if (main)
+           {
+             path->safe_push (node);
+             return main;
+           }
+       }
+    }
+  return NULL;
+}
+
+static cgraph_node *
+get_path_from_main_to (cgraph_node *node, vec<cgraph_node *> *path)
+{
+  hash_set<cgraph_node *> visited;
+  cgraph_node *main = get_ancestor_main_dfs (&visited, path, node);
+  visited.empty ();
+  return main;
+}
+
+/*
+ * Verifying that a stmt s1 is dominated by a stmt s2
+ * across function borders is not trivial with the available
+ * infrastructure (dominator algorithm on bb level plus cgraph).
+ * If we rule out external calls/callbacks, then we still need
+ * to guarantee a write on each possible path to the read.
+ *
+ * The implemented solution to this problem, which is of course
+ * too strict,
+ * but also not too compute/memory intensive is as follows:
+ *
+ * - Check if write is reachable by main () by only looking into
+ *   the first bb in each function on the path.
+ * - All call stmts between main () and write must not possibly
+ *   reach a read.  We consider indirect call statements as
+ *   possible reaching read (unless we can prove opposite).
+ *
+ * The external calls/callbacks are ruled out as follows:
+ *
+ * - all possible ancestors of read must not be external visible
+ * - all possible ancestors of read must not be function pointers
+ * - Something else?
+ */
+static bool
+write_before_read_across_function_borders (struct ipa_ref *write,
+                                          struct ipa_ref *read)
+{
+  symtab_node *w_node = write->referring;
+  cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
+  symtab_node *r_node = read->referring;
+  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
+  cgraph_node *main;
+
+  /* Get main () function.  */
+  vec<cgraph_node *> pw = vNULL;
+  main = get_path_from_main_to (w_cnode, &pw);
+  if (!main)
+    {
+      if (dump_file)
+       fprintf (dump_file, "main () is not ancestor of write -> skipping.\n");
+      return false;
+    }
+
+  vec<cgraph_node *> pr = vNULL;
+  cgraph_node *main_to_read = get_path_from_main_to (r_cnode, &pr);
+  if (!main_to_read)
+    {
+      if (dump_file)
+       fprintf (dump_file, "main () is not ancestor of read -> skipping.\n");
+      return false;
+    }
+
+  // TODO: is write guaranteed to happen?
+  // if it is not guaranteed to happen, we shouldn't modify anything!
+  // The very first approximation. There should be a path from main to w_cnode
+  // such that only the first basic block is actually inspected.
+  // Future work will involve looking at unconditional paths.
+  // I think right now, all paths are reachable from first basic block
+  if (!reach_nodes_via_bb1 (w_cnode))
+    return false;
+
+  int i;
+  cgraph_node *node_in_pr;
+  FOR_EACH_VEC_ELT (pr, i, node_in_pr)
+    {
+      // I think main has to be externally visible.
+      if (node_in_pr == main)
+       continue;
+
+      /* Assure that all paths to read are not externally visible.  */
+      if (cgraph_externally_visible_p (node_in_pr, flag_whole_program))
+       {
+         if (dump_file)
+           fprintf (dump_file, "%s is externally visible...  skipping\n",
+                    node_in_pr->dump_asm_name ());
+         return false;
+       }
+
+      /* Assure that all paths to read are not
+       * used as function pointers.  */
+      if (node_in_pr->address_taken)
+       {
+         if (dump_file)
+           fprintf (dump_file, "%s might be function pointer...  skipping\n",
+                    node_in_pr->dump_asm_name ());
+         return false;
+       }
+    }
+
+  cgraph_node *caller = main;
+  cgraph_node *node_in_pw;
+  FOR_EACH_VEC_ELT (pw, i, node_in_pw)
+    {
+      gcc_assert (w_cnode != r_cnode);
+      if (node_in_pw == r_cnode && path_exists (r_cnode, w_cnode))
+       {
+         return call_before_read (write, read);
+       }
+
+      if (node_in_pw == w_cnode && path_exists (w_cnode, r_cnode))
+       {
+         return write_before_call (write, read);
+       }
+
+      if (node_in_pw == main)
+       {
+         continue;
+       }
+
+      if (dump_file)
+       fprintf (dump_file, "get_nondominated_callees from %s to %s\n",
+                caller->dump_asm_name (), node_in_pw->dump_asm_name ());
+
+      bool exitEarly = false;
+      vec<cgraph_node *> non_dominated_callees
+       = get_nondominated_callees (caller, node_in_pw, &exitEarly);
+      if (exitEarly)
+       return false;
+      cgraph_node *non_dominated_callee;
+      int j;
+      FOR_EACH_VEC_ELT (non_dominated_callees, j, non_dominated_callee)
+       {
+         if (dump_file)
+           fprintf (dump_file, "callee %d %s\n", j,
+                    non_dominated_callee->dump_asm_name ());
+         if (path_exists (non_dominated_callee, r_cnode))
+           {
+             return false;
+           }
+       }
+
+      caller = node_in_pw;
+    }
+  return true;
+}
+
+static vec<struct cgraph_node *>
+get_bb1_callees (cgraph_node *c, cgraph_node *w_cnode)
+{
+  vec<struct cgraph_node *> callees = vNULL;
+  if (fndecl_built_in_p (c->decl))
+    return vNULL;
+
+  if (dump_file)
+    fprintf (dump_file, "before get_untransformed_body %s\n",
+            c->dump_asm_name ());
+
+  if (!c->definition)
+    return vNULL;
+
+  push_cfun (DECL_STRUCT_FUNCTION (c->decl));
+
+  if (in_lto_p)
+    c->get_untransformed_body ();
+
+  pop_cfun ();
+
+  function *func = DECL_STRUCT_FUNCTION (c->decl);
+  /* Get first BB (after the fake entry BB).  */
+  basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (func)->next_bb;
+
+  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+       gsi_next (&gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+
+      if (gimple_code (stmt) != GIMPLE_CALL)
+       continue;
+
+      tree callee_decl = gimple_call_fndecl (stmt);
+      cgraph_node *callee_node = cgraph_node::get (callee_decl);
+      if (!path_exists (callee_node, w_cnode))
+       continue;
+
+      callees.safe_push (callee_node);
+    }
+
+  return callees;
+}
+
+static bool
+reach_nodes_via_bb1_dfs (hash_set<cgraph_node *> *visited, cgraph_node *n1,
+                        cgraph_node *n2)
+{
+  if (n1 == n2)
+    return true;
+
+  visited->add (n1);
+
+  vec<struct cgraph_node *> callees = get_bb1_callees (n1, n2);
+
+  int i;
+  cgraph_node *callee;
+  FOR_EACH_VEC_ELT (callees, i, callee)
+    {
+      if (!visited->contains (callee))
+       {
+         bool found = reach_nodes_via_bb1_dfs (visited, callee, n2);
+         if (found)
+           {
+             callees.release ();
+             return true;
+           }
+       }
+    }
+
+  return false;
+}
+
+bool
+reach_nodes_via_bb1 (cgraph_node *n2)
+{
+  vec<cgraph_node *> pr = vNULL;
+  cgraph_node *main = get_path_from_main_to (n2, &pr);
+  hash_set<cgraph_node *> visited;
+  bool path_exists = reach_nodes_via_bb1_dfs (&visited, main, n2);
+  visited.empty ();
+  pr.release ();
+  return path_exists;
+}
+
+static bool
+write_before_call (struct ipa_ref *write, struct ipa_ref *read)
+{
+  symtab_node *w_node = write->referring;
+  cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
+  symtab_node *r_node = read->referring;
+  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
+
+  gcc_assert (path_exists (w_cnode, r_cnode));
+  gcc_assert (w_cnode != r_cnode);
+
+  basic_block w_bb = gimple_bb (write->stmt);
+  basic_block r_bb = gimple_bb (read->stmt);
+
+  if (in_lto_p)
+    {
+      w_cnode->get_untransformed_body ();
+    }
+
+  function *func = DECL_STRUCT_FUNCTION (w_cnode->decl);
+  basic_block c_bb;
+  vec<struct cgraph_node *> callees = vNULL;
+  FOR_EACH_BB_FN (c_bb, func)
+    {
+      bool self_dominates = w_bb == c_bb;
+      bool w_bb_dominates_c_bb = bb1_dominates_bb2 (w_bb, c_bb, w_cnode);
+      if (w_bb_dominates_c_bb && !self_dominates)
+       continue;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p (gsi);
+          gsi_next (&gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+
+         if (stmt == write->stmt)
+           {
+             break;
+           }
+
+         if (gimple_code (stmt) != GIMPLE_CALL)
+           {
+             continue;
+           }
+         if (dump_file)
+           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
+         tree rhs = gimple_op (stmt, 1);
+
+         if (rhs && TREE_CODE (rhs) == POINTER_TYPE)
+           return false;
+
+         tree callee_decl = gimple_call_fndecl (stmt);
+         if (!callee_decl)
+           return false;
+
+         cgraph_node *callee_node = cgraph_node::get (callee_decl);
+         const char *identifier
+           = IDENTIFIER_POINTER (DECL_NAME (callee_node->decl));
+         const char *sigsetjmp = "sigsetjmp";
+         if (strstr (identifier, sigsetjmp) != NULL)
+           return false;
+
+         if (fndecl_built_in_p (callee_node->decl))
+           continue;
+
+         if (dump_file)
+           fprintf (dump_file, "found callee %s\n",
+                    callee_node->dump_asm_name ());
+         callees.safe_push (callee_node);
+       }
+    }
+  cgraph_node *callee;
+  int i;
+  FOR_EACH_VEC_ELT (callees, i, callee)
+    {
+      if (path_exists (callee, r_cnode))
+       {
+         return false;
+       }
+    }
+
+  return true;
+}
+
+static bool
+call_before_read (struct ipa_ref *write, struct ipa_ref *read)
+{
+  symtab_node *w_node = write->referring;
+  cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
+  symtab_node *r_node = read->referring;
+  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
+
+  gcc_assert (path_exists (r_cnode, w_cnode));
+  gcc_assert (w_cnode != r_cnode);
+
+  basic_block w_bb = gimple_bb (write->stmt);
+  basic_block r_bb = gimple_bb (read->stmt);
+
+  if (in_lto_p)
+    r_cnode->get_untransformed_body ();
+
+  function *func = DECL_STRUCT_FUNCTION (r_cnode->decl);
+  basic_block c_bb;
+  FOR_EACH_BB_FN (c_bb, func)
+    {
+      bool self_dominates = c_bb == r_bb;
+      bool call_dominates_read = bb1_dominates_bb2 (c_bb, r_bb, r_cnode);
+      if (!call_dominates_read && !self_dominates)
+       continue;
+
+      for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p (gsi);
+          gsi_next (&gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+
+         // self dominance
+         if (stmt == read->stmt)
+           break;
+
+         if (gimple_code (stmt) != GIMPLE_CALL)
+           continue;
+
+         if (dump_file)
+           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
+         tree rhs = gimple_op (stmt, 1);
+         if (TREE_CODE (rhs) == POINTER_TYPE)
+           return false;
+         tree callee_decl = gimple_call_fndecl (stmt);
+
+         cgraph_node *callee_node = cgraph_node::get (callee_decl);
+         const char *identifier
+           = IDENTIFIER_POINTER (DECL_NAME (callee_node->decl));
+         const char *sigsetjmp = "sigsetjmp";
+         if (strstr (identifier, sigsetjmp) != NULL)
+           return false;
+
+         if (path_exists (callee_node, w_cnode))
+           return true;
+       }
+    }
+  return false;
+}
+
+static bool
+write_before_read (struct ipa_ref *write, struct ipa_ref *read)
+{
+  symtab_node *w_node = write->referring;
+  cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
+  symtab_node *r_node = read->referring;
+  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
+
+  if (w_cnode == r_cnode)
+    /* Within the same function.  */
+    return write_before_read_in_function (write, read);
+
+  /* Not within the same function.  */
+  return write_before_read_across_function_borders (write, read);
+}
+
+static bool
+write_before_all_reads (struct ipa_ref *write, const ipa_ref_vec &reads)
+{
+  int i;
+  struct ipa_ref *read;
+
+  FOR_EACH_VEC_ELT (reads, i, read)
+    {
+      load_function_body_of_ipa_ref (read);
+      if (!write_before_read (write, read))
+       return false;
+    }
+  return true;
+}
+
+static void
+propagate_constant_to_read (tree write_val, struct ipa_ref *ref,
+                           hash_set<cgraph_node *> *func)
+{
+  gcc_assert (ref->use == IPA_REF_LOAD);
+  load_function_body_of_ipa_ref (ref);
+
+  gimple *read_stmt = ref->stmt;
+
+  gcc_assert (gimple_code (read_stmt) == GIMPLE_ASSIGN);
+  gcc_assert (gimple_num_ops (read_stmt) == 2);
+
+  tree old_lhs = gimple_op (read_stmt, 0);
+  symtab_node *r_node = ref->referring;
+  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
+
+  // if (!func->contains (r_cnode)) func->add (r_cnode);
+  cgraph_node **possible_clone = func_to_clone->get (r_cnode);
+  if (!possible_clone)
+    {
+      const char *suffix = "test";
+      // callers has to be vNULL, otherwise, we will be
+      // analyzing clones...
+      // and we don't want that...
+      // but that means that we will need to update the callers
+      // later...  in update_callgraph
+      cgraph_node *clone
+       = r_cnode->create_version_clone_with_body (vNULL, NULL, NULL, NULL,
+                                                  NULL, suffix, NULL);
+      clone->get_untransformed_body ();
+      func_to_clone->put (r_cnode, clone);
+    }
+
+  possible_clone = func_to_clone->get (r_cnode);
+  cgraph_node *clone = *possible_clone;
+
+  // Build new stmt and replace old.
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+  // Let's create a clone with body here...
+  // The clone should not have callers as
+  // to not interfere with the current
+  // analysis.
+  // The callers will be set at the end...
+
+  push_cfun (DECL_STRUCT_FUNCTION (clone->decl));
+  function *clone_func = DECL_STRUCT_FUNCTION (clone->decl);
+  bool found = false;
+  FOR_EACH_BB_FN (bb, clone_func)
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+         if (gimple_code (stmt) != GIMPLE_ASSIGN)
+           continue;
+         tree old_val_clone = gimple_op (stmt, 1);
+         tree lhs = gimple_op (stmt, 0);
+
+         // TODO: what happens when it is not a variable decl?
+         // This might actually be the source of imprecision
+         if (TREE_CODE (old_val_clone) != VAR_DECL)
+           continue;
+
+         tree old_val = gimple_op (read_stmt, 1);
+         if (IDENTIFIER_POINTER (DECL_NAME (old_val_clone))
+             != IDENTIFIER_POINTER (DECL_NAME (old_val)))
+           continue;
+
+         found = true;
+
+         // REPLACE!!!
+         gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
+         tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
+         gimple *new_read_stmt = gimple_build_assign (new_lhs, write_val);
+         gsi_insert_before (&gsi2, new_read_stmt, GSI_SAME_STMT);
+
+         gimple *use_stmt;
+         imm_use_iterator use_iter;
+         FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
+           {
+             use_operand_p use_p;
+             FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
+               SET_USE (use_p, new_lhs);
+             update_stmt (use_stmt);
+           }
+       }
+
+      if (found)
+       break;
+    }
+  gcc_assert (found);
+  pop_cfun ();
+}
+
+static bool
+ipa_initcall_get_writes_and_reads (varpool_node *vnode, ipa_ref_vec *writes,
+                                  ipa_ref_vec *reads)
+{
+  int i;
+  struct ipa_ref *ref;
+
+  if (dump_file)
+    fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ());
+
+  /* Only IPA_REF_STORE and IPA_REF_LOAD left.  */
+  for (i = 0; vnode->iterate_referring (i, ref); i++)
+    {
+      symtab_node *f_node = ref->referring;
+      cgraph_node *f_cnode = dyn_cast<cgraph_node *> (f_node);
+
+      if (!f_cnode)
+       {
+         if (dump_file)
+           fprintf (dump_file,
+                    "skipping variable %s due to static initialization\n",
+                    vnode->name ());
+         return false;
+       }
+      // it is possible that f_cnode is NULL if the dyn_cast fails.
+      // If the dyn_cast fails, this is an example of static initialization.
+      const char *identifier = IDENTIFIER_POINTER (DECL_NAME (f_cnode->decl));
+      // This is the suffix we are adding to our clones.
+      // Therefore, if we find the suffix in the identifier,
+      // that means that we found a variable in a clone and
+      // we would like to skip it.
+      // TODO: make this a global static for easier changes...
+      const char *suffix = "test.0";
+      if (strstr (identifier, suffix) != NULL)
+       continue;
+
+      if (ref->use == IPA_REF_STORE)
+       writes->safe_push (ref);
+
+      if (ref->use == IPA_REF_LOAD)
+       reads->safe_push (ref);
+    }
+  return true;
+}
+
+static void
+propagate_constant_to_reads (tree write_val, const ipa_ref_vec &reads_original,
+                            hash_set<cgraph_node *> *funcs)
+{
+  /* Iterate over reads and replace them by constant.  */
+  struct ipa_ref *ref;
+  int i;
+  FOR_EACH_VEC_ELT (reads_original, i, ref)
+    {
+      propagate_constant_to_read (write_val, ref, funcs);
+    }
+}
+
+/*
+ * Extracts the assigned constant, iff the given statement
+ * is a constant assignment.  Returns NULL_TREE otherwise.
+ */
+static tree
+extract_constant_from_initcall_write (struct ipa_ref *write)
+{
+  symtab_node *f_node = write->referring;
+  cgraph_node *f_cnode = dyn_cast<cgraph_node *> (f_node);
+
+  tree decl = f_cnode->decl;
+  if (TREE_CODE (decl) != FUNCTION_DECL)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Not function decl -> skipping.\n");
+      return NULL_TREE;
+    }
+
+  if (!f_cnode->has_gimple_body_p () && dump_file)
+    fprintf (dump_file, "Does NOT have gimple body!\n");
+  if (f_cnode->inlined_to && dump_file)
+    fprintf (dump_file, "Inlined To\n");
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "%s: for writer gimple:\n", __func__);
+      dump_vnode_ipa_ref (write);
+    }
+
+  /* Get the write function's body.  */
+  load_function_body_of_ipa_ref (write);
+
+  gimple *stmt = write->stmt;
+
+  /* Verify that we have an assignment.  */
+  if (gimple_code (stmt) != GIMPLE_ASSIGN)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Write stmt not assignment -> skipping.\n");
+      return NULL_TREE;
+    }
+
+  /* Check if write's LHS (vnode) is not volatile.  */
+  tree lhs = gimple_assign_lhs (stmt);
+  if (TREE_THIS_VOLATILE (TREE_TYPE (lhs)))
+    {
+      if (dump_file)
+       fprintf (dump_file, "Variable volatile -> skipping.\n");
+      return NULL_TREE;
+    }
+
+  /* Check if RHS of write stmt is constant.  */
+  if (!gimple_assign_is_single_const (stmt))
+    {
+      if (dump_file)
+       {
+         fprintf (dump_file, "Found non-const operands.\n");
+       }
+      return NULL_TREE;
+    }
+
+  /* Extract the constant.  */
+  tree write_val = gimple_op (stmt, 1);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Const op:\n");
+      dump_node (write_val, TDF_DETAILS, dump_file);
+    }
+
+  return write_val;
+}
+
+static void
+ipa_initcall_cp_execute_for_var (varpool_node *vnode,
+                                hash_set<cgraph_node *> *update_functions)
+{
+  ipa_ref_vec writes = vNULL;
+  ipa_ref_vec reads = vNULL;
+  struct ipa_ref *write;
+  tree write_val;
+  gimple_stmt_iterator gsi;
+  bool remove_permanently;
+
+  if (dump_file)
+    fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ());
+
+  if (dump_file)
+    {
+      dump_node (vnode->decl, TDF_DETAILS, dump_file);
+    }
+
+  /* Variable must not be externally visible.  */
+  if (vnode->externally_visible_p ())
+    {
+      if (dump_file)
+       fprintf (dump_file, "\texternally visible "
+                           "-> skipping\n");
+      return;
+    }
+
+  /* All refs must be explicit.  */
+  if (!vnode->all_refs_explicit_p ())
+    {
+      if (dump_file)
+       fprintf (dump_file, "Not explicit variable refs "
+                           "-> skipping.\n");
+      return;
+    }
+
+  /* Check if any ref->use is IPA_REF_ALIAS.  */
+  if (vnode->has_aliases_p ())
+    {
+      if (dump_file)
+       fprintf (dump_file, "Found IPA_REF_ALIAS -> skipping.\n");
+      return;
+    }
+
+  /* Check if any ref->use is IPA_REF_ADDR.  */
+  if (symtab_node_address_matters_p (vnode))
+    {
+      if (dump_file)
+       fprintf (dump_file, "Found IPA_REF_ADDR -> skipping.\n");
+      return;
+    }
+
+  /* We don't touch arrays.  */
+  if (TREE_CODE (TREE_TYPE (vnode->decl)) == ARRAY_TYPE)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Variable is array -> skipping.\n");
+      return;
+    }
+
+  /* We don't touch structs.  */
+  if (TREE_CODE (TREE_TYPE (vnode->decl)) == RECORD_TYPE)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Variable is struct -> skipping.\n");
+      return;
+    }
+
+  /* We don't touch unions.  */
+  if (TREE_CODE (TREE_TYPE (vnode->decl)) == UNION_TYPE)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Variable is union -> skipping.\n");
+      return;
+    }
+
+  /* Get all refs (must be REF_STORE or REF_LOAD).  */
+  bool continue_work
+    = ipa_initcall_get_writes_and_reads (vnode, &writes, &reads);
+  if (!continue_work)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Static initialization -> skipping.\n");
+      writes.release ();
+      reads.release ();
+      return;
+    }
+
+  if (writes.length () > 1)
+    {
+      /* More than one writer.  */
+      if (dump_file)
+       fprintf (dump_file, "More than one writer -> skipping.\n");
+      writes.release ();
+      reads.release ();
+      return;
+    }
+  else if (writes.length () < 1)
+    {
+      /* No writer.  */
+      if (dump_file)
+       fprintf (dump_file, "No writer -> skipping.\n");
+      writes.release ();
+      reads.release ();
+      return;
+    }
+
+  /*
+   * Note:
+   * Limiting ourselfs to only one write is not necessary in general,
+   * but good enough to address the typical init () case.
+   * Big advantage is, that it makes the following code much easier.
+   *
+   * TODO: I believe that if we create clones, we might get two writes
+   * Investigate
+   */
+
+  /* Get (only) write ref.  */
+  write = writes.pop ();
+
+  /* Check if write's RHS is a constant and get it.  */
+  write_val = extract_constant_from_initcall_write (write);
+  if (write_val == NULL_TREE)
+    {
+      if (dump_file)
+       fprintf (dump_file, "Write's RHS is not constant"
+                           " -> skipping.\n");
+      writes.release ();
+      reads.release ();
+      return;
+    }
+
+  /* Assure all reads are after the write.  */
+  if (!write_before_all_reads (write, reads))
+    {
+      if (dump_file)
+       fprintf (dump_file, "Write not guaranteed "
+                           "to be before read -> skipping.\n");
+      writes.release ();
+      reads.release ();
+      return;
+    }
+
+  /* Propagate constant to reads.  */
+  if (!flag_initcall_cp_dryrun)
+    propagate_constant_to_reads (write_val, reads, update_functions);
+
+  /* Finally remove the write.  */
+  gsi = gsi_for_stmt (write->stmt);
+  remove_permanently = false; // XXX: fails with true?
+  // gsi_remove (&gsi, remove_permanently);
+
+  if (dump_file)
+    fprintf (dump_file, "Eliminated variable '%s'.\n\n", vnode->name ());
+
+  writes.release ();
+  reads.release ();
+}
+
+bool
+update_callgraph (cgraph_node *const &r_cnode, cgraph_node **clone_ptr, void *)
+{
+  vec<cgraph_edge *> callers = r_cnode->collect_callers ();
+  cgraph_node *clone = *clone_ptr;
+  cgraph_edge *e;
+  int i;
+  profile_count count = r_cnode->count;
+
+  FOR_EACH_VEC_ELT (callers, i, e)
+    e->redirect_callee (clone);
+
+  /*
+    for (e = r_cnode->callees; e; e=e->next_callee)
+      e->clone (clone, e->call_stmt, e->lto_stmt_uid, count, count, true);
+
+    for (e = r_cnode->indirect_calls; e; e=e->next_callee)
+      e->clone (clone, e->call_stmt, e->lto_stmt_uid, count, count, true);
+  */
+  for (e = clone->callers; e; e = e->next_caller)
+    {
+      e->caller->get_untransformed_body ();
+      function *inner_function = DECL_STRUCT_FUNCTION (e->caller->decl);
+      gimple_call_set_fndecl (e->call_stmt, clone->decl);
+      maybe_clean_eh_stmt_fn (inner_function, e->call_stmt);
+    }
+
+  r_cnode->skip_ipa_cp = true;
+  // r_cnode->remove ();
+  push_cfun (DECL_STRUCT_FUNCTION (r_cnode->decl));
+
+  if (dump_file)
+    fprintf (dump_file, "dumping function %s\n", r_cnode->dump_asm_name ());
+  function *func = DECL_STRUCT_FUNCTION (r_cnode->decl);
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, func)
+    {
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+          gsi_next (&gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+         if (dump_file)
+           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
+       }
+    }
+  if (dom_info_available_p (CDI_DOMINATORS))
+    free_dominance_info (CDI_DOMINATORS);
+  pop_cfun ();
+  return true;
+}
+
+static unsigned int
+ipa_initcall_cp_execute (void)
+{
+  varpool_node *vnode;
+
+  clone_num_suffixes1 = new hash_map<const char *, unsigned>;
+  hash_set<cgraph_node *> update_functions;
+  func_to_clone = new hash_map<cgraph_node *, cgraph_node *>;
+  FOR_EACH_VARIABLE (vnode)
+    {
+      ipa_initcall_cp_execute_for_var (vnode, &update_functions);
+    }
+
+  if (!flag_initcall_cp_dryrun)
+    func_to_clone->traverse<void *, update_callgraph> (NULL);
+
+  delete clone_num_suffixes1;
+  delete func_to_clone;
+
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_ipa_initcall_cp = {
+  IPA_PASS,
+  "initcall_cp",
+  OPTGROUP_NONE,
+  TV_NONE,
+  (PROP_cfg | PROP_ssa),
+  0,
+  0,
+  0,
+  (TODO_update_ssa | TODO_cleanup_cfg | TODO_dump_symtab
+   | TODO_rebuild_cgraph_edges | TODO_discard_function),
+};
+
+class pass_ipa_initcall_cp : public ipa_opt_pass_d
+{
+public:
+  pass_ipa_initcall_cp (gcc::context *ctxt)
+    : ipa_opt_pass_d (pass_data_ipa_initcall_cp, ctxt, NULL, NULL, NULL, NULL,
+                     NULL, NULL, 0, NULL, NULL)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+  {
+    return flag_initcall_cp || flag_initcall_cp_dryrun;
+  }
+
+  virtual unsigned int execute (function *)
+  {
+    return ipa_initcall_cp_execute ();
+  }
+
+}; // class pass_ipa_initcall_cp
+
+} // namespace
+
+ipa_opt_pass_d *
+make_pass_ipa_initcall_cp (gcc::context *ctxt)
+{
+  return new pass_ipa_initcall_cp (ctxt);
+}
diff --git a/gcc/passes.def b/gcc/passes.def
index 798a391..350a4fc 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -146,6 +146,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_ipa_profile);
   NEXT_PASS (pass_ipa_icf);
   NEXT_PASS (pass_ipa_devirt);
+  NEXT_PASS (pass_ipa_initcall_cp);
   NEXT_PASS (pass_ipa_cp);
   NEXT_PASS (pass_ipa_sra);
   NEXT_PASS (pass_ipa_cdtor_merge);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index a987661..6de60fe 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -498,6 +498,7 @@ extern ipa_opt_pass_d *make_pass_ipa_fn_summary 
(gcc::context *ctxt);
 extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt);
 extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt);
 extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context *ctxt);
+extern ipa_opt_pass_d *make_pass_ipa_initcall_cp (gcc::context *ctxt);
 extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt);
 extern ipa_opt_pass_d *make_pass_ipa_sra (gcc::context *ctxt);
 extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt);




On 2019-11-29 4:21 p.m., Erick Ochoa wrote:
> 
> Hello,
> 
> my name is Erick and I am working in a link-time-optimization pass named
> ipa-initcall-cp. It is called ipa-initcall-cp because it propagates constant
> values written to variables with static lifetimes (such as ones initialized in
> initialization functions). ipa-initcall-cp has to be located after
> ipa-whole-program-visibility to determine which functions have external
> visibility and before ipa-cp because ipa-cp can take advantage of our
> transformation.
> 
> I have attached a patch and test cases. I have applied and tested the patch
> against:
> 
> commit 17a2c588c29f089d3c2a36df47175bbdf376e399
> Date:   Wed Nov 27 00:23:39 2019 +0000
> git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@278752
> 138bc75d-0d04-0410-961f-82ee72b054a4
> 
> In order to run the test cases:
> * apply patch to gcc
> * modify Makefile to point to modified gcc
> * make
> 
> The test cases test the order in which a write can happen regarding a read
> (before or after) and whether the write/read happen in a function being called
> by the function that reads/writes. The test cases succeed when the variables
> prefixed with SUCCEED_* have been propagated and when the variables prefixed
> with FAILED_* have NOT been propagated. To quickly determine which variables
> have been propagated, you can do the following command:
> 
> grep 'Eliminated' test.wpa.071i.initcall_cp
> # SUCCESS_{0,1,2,3}* should be eliminated
> 
> This compiler pass is able to compile non-trivial code, but it does not 
> compile
> arbitrary valid code (sometimes linking fails). We are using
> create_clone_version_with_body for creating the clones. The reasoning behind
> this is that we want clones to have a body so that we can modify it. I have
> been told that `create_version_clone_with_body` are used mainly in IPA passes
> that are not part of   INSERT_PASSES_AFTER (all_regular_ipa_passes).
> 
> Question #1: Can someone please tell me what would be the best kind of clones
> to use in this use case?
> Question #2: I found out that clones produced with
> create_clone_version_with_body are not themselves versionable. What is the
> reason behind this?
> Question #3: I have added a flag to cgraph_nodes to skip ipa-cp. This flag is
> true for the original functions which have been removed from the call graph.
> If this flag is removed, then cgraph_nodes removed from the call graph will
> trigger an assertion in ipa-cp (ipa-cp.c:1195). I would like to not have to
> add this flag to cgraph_node but I am not sure what to do.
> 
> Please note:
> * The patch is also included in the attachments file (because I've been having
> issues with how e-mail clients format my content. Note, that this is a text
> only html, but the patch might have rows longer than 80 characters (due to the
> + sign)
> and my e-mail client is splitting lines.
> * The patch still does not meet the coding standards, I'll be working on that.
> * The patch is included here (and the attachments) and the tests are included
> as a attachments. I'll be working on integrating the tests as part of the
> testing suite
> 
> 
> Thanks!
> 
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 7d3c132..ad8d998 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1370,6 +1370,7 @@ OBJS = \
>       internal-fn.o \
>       ipa-cp.o \
>       ipa-sra.o \
> +     ipa-initcall-cp.o \
>       ipa-devirt.o \
>       ipa-fnsummary.o \
>       ipa-polymorphic-call.o \
> diff --git a/gcc/cgraph.c b/gcc/cgraph.c
> index 1f7a5c5..bc51681 100644
> --- a/gcc/cgraph.c
> +++ b/gcc/cgraph.c
> @@ -287,6 +287,7 @@ symbol_table::create_empty (void)
>    node->type = SYMTAB_FUNCTION;
>    node->frequency = NODE_FREQUENCY_NORMAL;
>    node->count_materialization_scale = REG_BR_PROB_BASE;
> +  node->skip_ipa_cp = false;
>    cgraph_count++;
>  
>    return node;
> @@ -3515,6 +3516,7 @@ cgraph_node::get_untransformed_body (void)
>    name = lto_get_decl_name_mapping (file_data, name);
>    struct lto_in_decl_state *decl_state
>        = lto_get_function_in_decl_state (file_data, decl);
> +  gcc_assert (decl_state != NULL);
>  
>    cgraph_node *origin = this;
>    while (origin->clone_of)
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index 0d2442c..82725ed 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -899,6 +899,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
> public symtab_node
>  {
>    friend class symbol_table;
>  
> +  bool skip_ipa_cp;
> +
>    /* Remove the node from cgraph and all inline clones inlined into it.
>       Skip however removal of FORBIDDEN_NODE and return true if it needs to be
>       removed.  This allows to call the function from outer loop walking clone
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 404b6aa..fb662ed 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -3344,4 +3344,7 @@ fipa-ra
>  Common Report Var(flag_ipa_ra) Optimization
>  Use caller save register across calls if possible.
>  
> +finitcall-cp
> +Common Report Var(flag_initcall_cp) TBD.
> +
>  ; This comment is to ensure we retain the blank line above.
> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
> index 31a98a3..72d1595 100644
> --- a/gcc/ipa-cp.c
> +++ b/gcc/ipa-cp.c
> @@ -1185,7 +1185,7 @@ initialize_node_lattices (struct cgraph_node *node)
>  
>    gcc_checking_assert (node->has_gimple_body_p ());
>  
> -  if (!ipa_get_param_count (info))
> +  if (!ipa_get_param_count (info) || node->skip_ipa_cp)
>      disable = true;
>    else if (node->local)
>      {
> diff --git a/gcc/ipa-initcall-cp.c b/gcc/ipa-initcall-cp.c
> new file mode 100644
> index 0000000..8f1c4ab
> --- /dev/null
> +++ b/gcc/ipa-initcall-cp.c
> @@ -0,0 +1,1289 @@
> +/* Initcall constant propagation pass.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "tree.h"
> +#include "tree-eh.h"
> +#include "gimple.h"
> +#include "gimple-expr.h"
> +#include "gimple-iterator.h"
> +#include "predict.h"
> +#include "alloc-pool.h"
> +#include "tree-pass.h"
> +#include "cgraph.h"
> +#include "diagnostic.h"
> +#include "fold-const.h"
> +#include "gimple-fold.h"
> +#include "symbol-summary.h"
> +#include "tree-vrp.h"
> +#include "ipa-prop.h"
> +#include "tree-pretty-print.h"
> +#include "tree-inline.h"
> +#include "ipa-fnsummary.h"
> +#include "ipa-utils.h"
> +#include "tree-ssa-ccp.h"
> +#include "stringpool.h"
> +#include "attribs.h"
> +#include "gimple-pretty-print.h"
> +#include "ssa.h"
> +
> +static bool bb1_dominates_bb2 (basic_block, basic_block, cgraph_node*);
> +static bool write_before_call (struct ipa_ref *, struct ipa_ref *);
> +static bool call_before_read(struct ipa_ref *, struct ipa_ref *);
> +static hash_map<const char *, unsigned> *clone_num_suffixes1;
> +static hash_map<cgraph_node*, cgraph_node*> *func_to_clone;
> +
> +static bool
> +comdat_can_be_unshared_p_1 (symtab_node *node)
> +{
> +  if (!node->externally_visible)
> +    return true;
> +  if (node->address_can_be_compared_p ())
> +    {
> +      struct ipa_ref *ref;
> +
> +      for (unsigned int i = 0; node->iterate_referring (i, ref); i++)
> +        if (ref->address_matters_p ())
> +          return false;
> +    }
> +
> +  /* If the symbol is used in some weird way, 
> +   * better to not touch it.  */
> +  if (node->force_output)
> +    return false;
> +
> +  /* Explicit instantiations needs to be output when possibly
> +     used externally.  */
> +  if (node->forced_by_abi
> +      && TREE_PUBLIC (node->decl)
> +      && (node->resolution != LDPR_PREVAILING_DEF_IRONLY
> +          && !flag_whole_program))
> +    return false;
> +
> +  /* Non-readonly and volatile variables cannot be duplicated.  */
> +  if (is_a <varpool_node *> (node)
> +      && (!TREE_READONLY (node->decl)
> +      || TREE_THIS_VOLATILE (node->decl)))
> +    return false;
> +  return true;
> +}
> +
> +/* COMDAT functions must be shared only if they have address taken,
> +   otherwise we can produce our own private implementation with
> +   -fwhole-program.
> +   Return true when turning COMDAT function static cannot lead to wrong
> +   code when the resulting object links with a library defining same
> +   COMDAT.
> +
> +   Virtual functions do have their addresses taken from the vtables,
> +   but in C++ there is no way to compare their addresses 
> +   for equality.  */
> +
> +static bool
> +comdat_can_be_unshared_p (symtab_node *node)
> +{
> +  if (!comdat_can_be_unshared_p_1 (node))
> +    return false;
> +  if (node->same_comdat_group)
> +    {
> +      symtab_node *next;
> +
> +      /* If more than one function is in the same COMDAT group, it must
> +         be shared even if just one function in the comdat group has
> +         address taken.  */
> +      for (next = node->same_comdat_group;
> +        next != node; next = next->same_comdat_group)
> +        if (!comdat_can_be_unshared_p_1 (next))
> +          return false;
> +    }
> +  return true;
> +}
> +
> +/* Return true when function NODE should 
> + * be considered externally visible.  */
> +
> +static bool
> +cgraph_externally_visible_p (struct cgraph_node *node,
> +                             bool whole_program)
> +{
> +  while (node->transparent_alias && node->definition)
> +    node = node->get_alias_target ();
> +  if (!node->definition)
> +    return false;
> +  if (!TREE_PUBLIC (node->decl)
> +      || DECL_EXTERNAL (node->decl))
> +    return false;
> +
> +  /* Do not try to localize built-in functions yet. 
> +    One of problems is that we
> +     end up mangling their asm for
> +     WHOPR that makes it impossible to call them
> +     using the implicit built-in declarations 
> +     anymore.  Similarly this enables
> +     us to remove them as unreachable before 
> +     actual calls may appear during
> +     expansion or folding.  */
> +  if (fndecl_built_in_p (node->decl))
> +    return true;
> +
> +  /* If linker counts on us, we must preserve the function.  */
> +  if (node->used_from_object_file_p ())
> +    return true;
> +  if (DECL_PRESERVE_P (node->decl))
> +    return true;
> +  if (lookup_attribute ("externally_visible",
> +                        DECL_ATTRIBUTES (node->decl)))
> +    return true;
> +  if (lookup_attribute ("noipa", DECL_ATTRIBUTES (node->decl)))
> +    return true;
> +  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
> +      && lookup_attribute ("dllexport",
> +                           DECL_ATTRIBUTES (node->decl)))
> +    return true;
> +  if (node->resolution == LDPR_PREVAILING_DEF_IRONLY)
> +    return false;
> +  /* When doing LTO or whole program, we can bring COMDAT functions
> +     static.
> +     This improves code quality and we know we will duplicate them at
> +     most twice
> +     (in the case that we are not using plugin and link with object 
> +     file implementing same COMDAT)  */
> +  if (((in_lto_p || whole_program) && !flag_incremental_link)
> +      && DECL_COMDAT (node->decl)
> +      && comdat_can_be_unshared_p (node))
> +    return false;
> +
> +  /* When doing link time optimizations, 
> +  * hidden symbols become local.  */
> +  if ((in_lto_p && !flag_incremental_link)
> +      && (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN
> +      || DECL_VISIBILITY (node->decl) == VISIBILITY_INTERNAL)
> +      /* Be sure that node is defined in IR file, not in other object
> +         file.  In that case we don't set 
> +         used_from_other_object_file. */
> +      && node->definition)
> +    ;
> +  else if (!whole_program)
> +    return true;
> +
> +  if (MAIN_NAME_P (DECL_NAME (node->decl)))
> +    return true;
> +
> +  return false;
> +}
> +
> +typedef vec <struct ipa_ref*> ipa_ref_vec;
> +
> +//XXX: Move to ipa-ref.h
> +static const char* stringify_ipa_ref_use(ipa_ref_use use)
> +{
> +  switch (use) {
> +    case IPA_REF_LOAD:
> +      return "IPA_REF_LOAD";
> +      break;
> +    case IPA_REF_STORE:
> +      return "IPA_REF_STORE";
> +      break;
> +    case IPA_REF_ADDR:
> +      return "IPA_REF_ADDR";
> +      break;
> +    case IPA_REF_ALIAS:
> +      return "IPA_REF_ALIAS";
> +      break;
> +    default:
> +      return "<unknown>";
> +  }
> +}
> +
> +static void
> +load_function_body_of_ipa_ref (cgraph_node* node)
> +{
> +  if (in_lto_p)
> +    {
> +      cgraph_node *f_cnode2 = node->ultimate_alias_target ();
> +      if (dump_file)
> +        {
> +        fprintf (dump_file, "%s: for function '%s'\n", __func__, \
> +               f_cnode2->dump_asm_name ());
> +        dump_node (f_cnode2->decl, TDF_DETAILS, dump_file);
> +        }
> +      f_cnode2->get_untransformed_body();
> +    }
> +}
> +
> +static void
> +load_function_body_of_ipa_ref (struct ipa_ref* ref)
> +{
> +  /* IPA passes don't get the function bodies during LTO.  */
> +  if (in_lto_p)
> +    {
> +      symtab_node *f_node = ref->referring;
> +      cgraph_node *f_cnode = dyn_cast <cgraph_node *> (f_node);
> +      load_function_body_of_ipa_ref (f_cnode);
> +    }
> +}
> +
> +
> +static void
> +dump_vnode_ipa_ref (ipa_ref* ref)
> +{
> +  fprintf (dump_file, \
> +       "Reference type: %s\n", \
> +       stringify_ipa_ref_use (ref->use));
> +
> +  symtab_node *f_node = ref->referring;
> +  fprintf (dump_file, \
> +       "Reference function: %s\n", \
> +       f_node->dump_asm_name ());
> +
> +  fprintf (dump_file, "Gimple stmt (@0x%lx, lto_stmt_uid: %u):\n", \
> +     (long)ref->stmt,
> +     ref->lto_stmt_uid);
> +  load_function_body_of_ipa_ref (ref);
> +  print_gimple_stmt (dump_file, ref->stmt, 0, TDF_NONE);
> +}
> +
> +//XXX: Move to tree.h
> +static bool
> +tree_code_is_cst (tree op)
> +{
> +  //XXX: use cprop_constant_p() here?
> +  int code = TREE_CODE (op);
> +  if (code == INTEGER_CST ||
> +      code == REAL_CST ||
> +      code == COMPLEX_CST ||
> +      code == VECTOR_CST)
> +    return true;
> +  return false;
> +}
> +
> +/* Return true of all ops of assignment are constants.  */
> +static bool
> +gimple_assign_is_single_const(gimple *stmt)
> +{
> +  tree op;
> +
> +  gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN);
> +
> +  if (gimple_has_volatile_ops (stmt))
> +    {
> +      if (dump_file)
> +        fprintf (dump_file, "has volatile ops!\n");
> +      return false;
> +    }
> +
> +  if (gimple_num_ops (stmt) != 2)
> +    {
> +      if (dump_file)
> +        fprintf (dump_file, "wrong num of ops: %u!\n", \
> +           gimple_num_ops (stmt));
> +      return false;
> +    }
> +
> +  op = gimple_op (stmt, 1);
> +  if (!tree_code_is_cst (op))
> +    {
> +      if (dump_file)
> +        fprintf (dump_file, "op is not cst!\n");
> +      return false;
> +    }
> +
> +  return true;
> +}
> +
> +//XXX: Move to ipa-utils.c
> +//XXX: from/to could be const
> +static bool
> +path_exists_dfs (hash_set<cgraph_node*> *visited,
> +             cgraph_node* current,
> +             cgraph_node *target)
> +{
> +  if (current == target)
> +    return true;
> +
> +  visited->add (current);
> +
> +  cgraph_edge *cs;
> +  for (cs = current->callees; cs; cs = cs->next_callee)
> +    {
> +      cgraph_node *callee = cs->callee->function_symbol ();
> +      if (!visited->contains (callee))
> +     {
> +       if (path_exists_dfs (visited, callee, target))
> +         {
> +           return true;
> +         }
> +     }
> +    }
> +  return false;
> +}
> +
> +//XXX: Move to ipa-utils.c
> +//XXX: from/to could be const
> +static bool
> +path_exists (cgraph_node* from, cgraph_node *to)
> +{
> +  hash_set<cgraph_node*> visited;
> +  bool found_path = path_exists_dfs (&visited, from, to);
> +  visited.empty ();
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "Found ");
> +      if (!found_path)
> +     fprintf (dump_file, "*no* ");
> +      fprintf (dump_file, \
> +         "path from %s to %s.\n", \
> +         from->name (), \
> +         to->name ());
> +    }
> +  return found_path;
> +}
> +
> +vec<struct cgraph_node*>
> +get_nondominated_callees(cgraph_node* caller, cgraph_node* callee)
> +{
> +  // assumptions:
> +  //  * there is a callsite from caller to callee
> +
> +  if (in_lto_p)
> +    caller->get_untransformed_body ();
> +
> +  function *func = DECL_STRUCT_FUNCTION (caller->decl);
> +  basic_block bb;
> +  bool found = false;
> +  FOR_EACH_BB_FN(bb, func)
> +  {
> +     for (gimple_stmt_iterator gsi = gsi_start_bb (bb); \
> +                  !gsi_end_p (gsi); \
> +                  gsi_next (&gsi))
> +     {
> +         gimple *stmt = gsi_stmt (gsi);
> +         if (gimple_code (stmt) != GIMPLE_CALL)
> +         continue;
> +
> +         tree callee_decl = gimple_call_fndecl (stmt);
> +         cgraph_node *callee_node = cgraph_node::get (callee_decl);
> +         if (callee_node != callee)
> +            continue;
> +
> +         found = true;
> +    }
> +  if (found) break;
> +  }
> +
> +  gcc_assert (found);
> +
> +  // At this point in the program, we hold a valid bb.
> +  // The callee is located
> +  vec<struct cgraph_node*> callees = vNULL;
> +  basic_block bb_c;
> +  FOR_EACH_BB_FN(bb_c, func)
> +  {
> +     bool self_dominates = bb == bb_c;
> +     bool bb_dominates_bbc = bb1_dominates_bb2(bb, bb_c, caller);
> +     if (bb_dominates_bbc && !self_dominates) continue;
> +
> +     for (gimple_stmt_iterator gsi = gsi_start_bb (bb_c); \
> +                    !gsi_end_p (gsi); \
> +                    gsi_next (&gsi))
> +     {
> +         gimple *stmt = gsi_stmt (gsi);
> +         if (gimple_code (stmt) != GIMPLE_CALL)
> +         continue;
> +
> +         tree callee_decl = gimple_call_fndecl (stmt);
> +         cgraph_node *callee_node = cgraph_node::get (callee_decl);
> +
> +         if (fndecl_built_in_p (callee_node->decl))
> +            continue;
> +
> +         if (self_dominates && callee_node == callee) {
> +             break;
> +         }
> +
> +         callees.safe_push (callee_node);
> +    }
> +
> +  }
> +  return callees;
> +}
> +
> +
> +//XXX: Move to class symtab_node in cgraph.h
> +static ipa_ref*
> +symtab_node_iterate_address_uses (const symtab_node *n,
> +           unsigned i,
> +           ipa_ref *&ref)
> +{
> +  n->ref_list.referring.iterate (i, &ref);
> +
> +  if (ref && ref->use != IPA_REF_ADDR)
> +    return NULL;
> +
> +  return ref;
> +}
> +
> +//XXX: Move to class symtab_node in cgraph.h
> +static bool
> +symtab_node_address_matters_p(const symtab_node *n)
> +{
> +  ipa_ref *ref = NULL;
> +
> +  return (symtab_node_iterate_address_uses (n, 0, ref) != NULL);
> +}
> +
> +static bool
> +bb1_dominates_bb2 (basic_block bb1, basic_block bb2, cgraph_node *cnode)
> +{
> +   // self dominance
> +   if (bb1 == bb2) return true;
> +
> +   push_cfun (DECL_STRUCT_FUNCTION (cnode->decl));
> +
> +   /* If dominator info is not available, we need to calculate it.  */
> +   if (!dom_info_available_p (CDI_DOMINATORS))
> +      calculate_dominance_info (CDI_DOMINATORS);
> +
> +   /* Check if the read is dominated by the write.  */
> +   bool ret = dominated_by_p (CDI_DOMINATORS, bb2, bb1);
> +
> +   /* Restore cfun.  */
> +   pop_cfun ();
> +
> +   return ret;
> +}
> +
> +static bool
> +write_before_read_in_function (struct ipa_ref *write, struct ipa_ref *read)
> +{
> +  basic_block w_bb = gimple_bb (write->stmt);
> +  basic_block r_bb = gimple_bb (read->stmt);
> +
> +  if (w_bb != r_bb)
> +    {
> +      /*
> +       * The dominator framework operates on cfun.
> +       * Therefore we need to set cfun accordingly.
> +       */
> +      symtab_node *w_node = write->referring;
> +      cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
> +      return bb1_dominates_bb2(w_bb, r_bb, w_cnode);
> +    }
> +
> +  gimple_stmt_iterator gsi;
> +  for (gsi = gsi_start_bb (w_bb); !gsi_end_p (gsi); gsi_next(&gsi))
> +  {
> +    if (gsi_stmt (gsi) == write->stmt)
> +      return true;
> +    if (gsi_stmt (gsi) == read->stmt)
> +      return false;
> +  }
> +
> +  /* Neither write nor read found it BB.  */
> +  gcc_assert (1);
> +
> +  return false;
> +}
> +
> +/*
> + * DFS over callees and return if callee is a or b.
> + */
> +static cgraph_node*
> +find_cgraph_in_callee_set (cgraph_node* n,
> +            hash_set<cgraph_node*> set,
> +            cgraph_node* a,
> +            cgraph_node* b)
> +{
> +  cgraph_edge *cs;
> +  for (cs = n->callees; cs; cs = cs->next_callee)
> +    {
> +      cgraph_node *callee = cs->callee->function_symbol ();
> +      if (dump_file)
> +     fprintf (dump_file, "Child of %s: %s\n", n->dump_asm_name (), \
> +               callee->dump_asm_name ());
> +      if (callee == a)
> +     return a;
> +      if (callee == b)
> +     return b;
> +      if (!set.contains (callee))
> +     continue;
> +      return find_cgraph_in_callee_set (callee, set, a, b);
> +    }
> +  return NULL;
> +}
> +
> +/* Walks back along caller relations until main is found.  */
> +static cgraph_node*
> +get_ancestor_main_dfs (hash_set<cgraph_node*>* visited,
> +            vec<cgraph_node*>* path,
> +            cgraph_node *node)
> +{
> +  if (MAIN_NAME_P (DECL_NAME (node->decl)))
> +    {
> +    path->safe_push(node);
> +    return node;
> +    }
> +
> +  visited->add (node);
> +
> +  cgraph_edge *cs;
> +  for (cs = node->callers; cs; cs = cs->next_caller)
> +    {
> +      cgraph_node *caller = cs->caller->function_symbol ();
> +      if (!visited->contains (caller))
> +     {
> +       cgraph_node* main = 
> +                get_ancestor_main_dfs (visited, path, caller);
> +       if (main)
> +            {
> +            path->safe_push(node);
> +            return main;
> +            }
> +     }
> +    }
> +  return NULL;
> +}
> +
> +static cgraph_node*
> +get_path_from_main_to(cgraph_node *node, vec<cgraph_node*>* path)
> +{
> +   hash_set<cgraph_node*> visited;
> +   cgraph_node* main = get_ancestor_main_dfs (&visited, path, node); 
> +   visited.empty();
> +   return main;
> +}
> +
> +
> +/*
> + * Verifying that a stmt s1 is dominated by a stmt s2
> + * across function borders is not trivial with the available
> + * infrastructure (dominator algorithm on bb level plus cgraph).
> + * If we rule out external calls/callbacks, then we still need
> + * to guarantee a write on each possible path to the read.
> + *
> + * The implemented solution to this problem, which is of course 
> + * too strict,
> + * but also not too compute/memory intensive is as follows:
> + *
> + * - Check if write is reachable by main() by only looking into
> + *   the first bb in each function on the path.
> + * - All call stmts between main() and write must not possibly
> + *   reach a read. We consider indirect call statements as
> + *   possible reaching read (unless we can prove opposite).
> + *
> + * The external calls/callbacks are ruled out as follows:
> + *
> + * - all possible ancestors of read must not be external visible
> + * - all possible ancestors of read must not be function pointers
> + *   ???????????
> + *
> + *
> + */
> +static bool
> +write_before_read_across_function_borders (struct ipa_ref *write,
> +          struct ipa_ref *read)
> +{
> +  symtab_node *w_node = write->referring;
> +  cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
> +  symtab_node *r_node = read->referring;
> +  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
> +  cgraph_node *main;
> +
> +  /* Get main() function. */
> +  vec<cgraph_node*> pw = vNULL;
> +  main = get_path_from_main_to(w_cnode, &pw);
> +  if (!main)
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, \
> +           "main() is not ancestor of write -> skipping.\n");
> +      return false;
> +    }
> +
> +  vec<cgraph_node*> pr = vNULL;
> +  cgraph_node *main_to_read = get_path_from_main_to(r_cnode, &pr);
> +  if (!main_to_read)
> +     {
> +      if (dump_file)
> +     fprintf (dump_file, \
> +         "main() is not ancestor of read -> skipping.\n");
> +      return false;
> +     }
> +
> +  int i;
> +  cgraph_node *node_in_pr;
> +  FOR_EACH_VEC_ELT (pr, i, node_in_pr)
> +     {
> +      // I think main has to be externally visible.
> +      if (node_in_pr == main) continue;
> +
> +      /* Assure that all paths to read are not externally visible.  */
> +      if (cgraph_externally_visible_p (node_in_pr, flag_whole_program)) 
> +        {
> +         if (dump_file)
> +         fprintf (dump_file, \
> +                 "%s is externally visible... skipping\n", \
> +                 node_in_pr->dump_asm_name ());
> +         return false;
> +        }
> +
> +      /* Assure that all paths to read are not 
> +       * used as function pointers. */
> +      if (node_in_pr->address_taken)
> +        {
> +         if (dump_file)
> +         fprintf (dump_file, \
> +                 "%s might be function pointer... skipping\n", \
> +                 node_in_pr->dump_asm_name ());
> +         return false;
> +        }
> +     }
> +
> +  cgraph_node* caller = main;
> +  cgraph_node* node_in_pw;
> +  FOR_EACH_VEC_ELT (pw, i, node_in_pw)
> +     {
> +     gcc_assert (w_cnode != r_cnode);
> +     if (node_in_pw == r_cnode && path_exists(r_cnode, w_cnode))
> +        {
> +        return call_before_read(write, read);
> +        }
> +
> +     if (node_in_pw == w_cnode && path_exists(w_cnode, r_cnode))
> +        {
> +        return write_before_call(write, read);
> +        }
> +
> +     if (node_in_pw == main)
> +        {
> +        continue;
> +        }
> +
> +     if (dump_file)
> +     fprintf (dump_file, \
> +               "get_nondominated_callees from %s to %s\n", \
> +               caller->dump_asm_name(), \
> +               node_in_pw->dump_asm_name());
> +
> +     vec<cgraph_node *> non_dominated_callees = \
> +                get_nondominated_callees(caller, node_in_pw);
> +     cgraph_node* non_dominated_callee;
> +     int j;
> +     FOR_EACH_VEC_ELT(non_dominated_callees, j, non_dominated_callee)
> +        {
> +        if (dump_file)
> +        fprintf (dump_file, "callee %d %s\n", j, \
> +                  non_dominated_callee->dump_asm_name());
> +        if(path_exists(non_dominated_callee, r_cnode)) {
> +           return false;
> +           }
> +        }
> +
> +
> +        caller = node_in_pw;
> +     }
> +   return true;
> +}
> +
> +
> +static bool
> +write_before_call (struct ipa_ref *write, struct ipa_ref *read)
> +{
> +  symtab_node *w_node = write->referring;
> +  cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
> +  symtab_node *r_node = read->referring;
> +  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
> +
> +  gcc_assert (path_exists(w_cnode, r_cnode));
> +  gcc_assert (w_cnode != r_cnode);
> +
> +  basic_block w_bb = gimple_bb (write->stmt);
> +  basic_block r_bb = gimple_bb (read->stmt);
> +
> +  if (in_lto_p) {
> +    w_cnode->get_untransformed_body ();
> +  }
> +
> +  function *func = DECL_STRUCT_FUNCTION (w_cnode->decl);
> +  basic_block c_bb;
> +  vec<struct cgraph_node*> callees = vNULL;
> +  FOR_EACH_BB_FN(c_bb, func)
> +  {
> +     bool self_dominates = w_bb == c_bb;
> +     bool w_bb_dominates_c_bb = bb1_dominates_bb2(w_bb, c_bb, w_cnode);
> +     if (w_bb_dominates_c_bb && !self_dominates) continue;
> +
> +     for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); \
> +             !gsi_end_p (gsi); \
> +             gsi_next (&gsi))
> +     {
> +         gimple *stmt = gsi_stmt (gsi);
> +         
> +         if (stmt == write->stmt) {
> +            break;
> +         }
> +
> +         if (gimple_code (stmt) != GIMPLE_CALL) {
> +         continue;
> +         }
> +
> +         tree callee_decl = gimple_call_fndecl (stmt);
> +         cgraph_node *callee_node = cgraph_node::get (callee_decl);
> +
> +         if (fndecl_built_in_p (callee_node->decl))
> +            continue;
> +
> +         if (dump_file)
> +           fprintf (dump_file, "found callee %s\n", \
> +              callee_node->dump_asm_name());
> +         callees.safe_push (callee_node);
> +
> +    }
> +  }
> +  cgraph_node* callee;
> +  int i;
> +  FOR_EACH_VEC_ELT(callees, i, callee)
> +  {
> +     if(path_exists(callee, r_cnode))
> +     {
> +        return false;
> +     }
> +  }
> +
> +  return true;
> +}
> +
> +static bool
> +call_before_read (struct ipa_ref *write, struct ipa_ref *read)
> +{
> +  symtab_node *w_node = write->referring;
> +  cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
> +  symtab_node *r_node = read->referring;
> +  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
> +
> +  gcc_assert (path_exists(r_cnode, w_cnode));
> +  gcc_assert (w_cnode != r_cnode);
> +
> +  basic_block w_bb = gimple_bb (write->stmt);
> +  basic_block r_bb = gimple_bb (read->stmt);
> +
> +  if (in_lto_p)
> +    r_cnode->get_untransformed_body ();
> +
> +  function *func = DECL_STRUCT_FUNCTION (r_cnode->decl);
> +  basic_block c_bb;
> +  FOR_EACH_BB_FN(c_bb, func)
> +  {
> +     bool self_dominates = c_bb == r_bb;
> +     bool call_dominates_read = bb1_dominates_bb2(c_bb, r_bb, r_cnode);
> +     if (!call_dominates_read && !self_dominates) continue;
> +
> +     for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); \
> +             !gsi_end_p (gsi); \
> +             gsi_next (&gsi))
> +     {
> +         gimple *stmt = gsi_stmt (gsi);
> +         
> +         // self dominance
> +         if (stmt == read->stmt) {
> +            break;
> +         }
> +
> +         if (gimple_code (stmt) != GIMPLE_CALL) {
> +         continue;
> +         }
> +
> +         tree callee_decl = gimple_call_fndecl (stmt);
> +         cgraph_node *callee_node = cgraph_node::get (callee_decl);
> +
> +         if(path_exists(callee_node, w_cnode)) {
> +            return true;
> +         }
> +    }
> +  }
> +  return false;
> +}
> +
> +static bool
> +write_before_read (struct ipa_ref *write, struct ipa_ref *read)
> +{
> +  symtab_node *w_node = write->referring;
> +  cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
> +  symtab_node *r_node = read->referring;
> +  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
> +
> +  if (w_cnode == r_cnode)
> +    /* Within the same function.  */
> +    return write_before_read_in_function (write, read);
> +
> +  /* Not within the same function. */
> +  return write_before_read_across_function_borders (write, read);
> +}
> +
> +static bool
> +write_before_all_reads (struct ipa_ref* write,
> +            const ipa_ref_vec &reads)
> +{
> +  int i;
> +  struct ipa_ref* read;
> +
> +  FOR_EACH_VEC_ELT (reads, i, read)
> +    {
> +      load_function_body_of_ipa_ref (read);
> +      if (!write_before_read (write, read))
> +     return false;
> +    }
> +  return true;
> +}
> +
> +static void
> +propagate_constant_to_read (tree write_val,
> +         struct ipa_ref* ref,
> +         hash_set<cgraph_node*> *func)
> +{
> +  gcc_assert (ref->use == IPA_REF_LOAD);
> +  load_function_body_of_ipa_ref (ref);
> +
> +  gimple *read_stmt = ref->stmt;
> +
> +  gcc_assert (gimple_code (read_stmt) == GIMPLE_ASSIGN);
> +  gcc_assert (gimple_num_ops (read_stmt) == 2);
> +
> +  tree old_lhs = gimple_op (read_stmt, 0);
> +  symtab_node *r_node = ref->referring;
> +  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
> +
> +  //if (!func->contains(r_cnode)) func->add(r_cnode);
> +  cgraph_node **possible_clone = func_to_clone->get(r_cnode);
> +  if (!possible_clone) {
> +     const char* suffix = "test";
> +     // callers has to be vNULL, otherwise, we will be 
> +     // analyzing clones...
> +     // and we don't want that...
> +     // but that means that we will need to update the callers
> +     // later... in update_callgraph
> +     cgraph_node *clone = r_cnode->create_version_clone_with_body(
> +         vNULL,
> +         NULL,
> +         NULL,
> +         NULL,
> +         NULL,
> +         suffix,
> +         NULL);
> +     clone->get_untransformed_body();
> +     func_to_clone->put(r_cnode, clone);
> +  }
> +
> +  possible_clone = func_to_clone->get(r_cnode);
> +  cgraph_node *clone = *possible_clone;
> +
> +  // Build new stmt and replace old.  
> +  gimple_stmt_iterator gsi;
> +  basic_block bb;
> +  // Let's create a clone with body here...
> +  // The clone should not have callers as 
> +  // to not interfere with the current
> +  // analysis.
> +  // The callers will be set at the end...
> +
> +  push_cfun (DECL_STRUCT_FUNCTION (clone->decl));
> +  function *clone_func = DECL_STRUCT_FUNCTION (clone->decl);
> +  bool found = false;
> +  FOR_EACH_BB_FN(bb, clone_func)
> +    {
> +         for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi))
> +         {
> +             gimple *stmt = gsi_stmt (gsi);
> +             if (gimple_code (stmt) != GIMPLE_ASSIGN) continue;
> +             tree old_val_clone = gimple_op (stmt, 1);
> +             tree lhs = gimple_op (stmt, 0);
> +
> +             //TODO: what happens when it is not a variable decl?
> +             //This might actually be the source of imprecision
> +             if (TREE_CODE(old_val_clone) != VAR_DECL) continue;
> +             tree old_val = gimple_op (read_stmt, 1);
> +             if (IDENTIFIER_POINTER(DECL_NAME(old_val_clone)) != \
> +                    IDENTIFIER_POINTER(DECL_NAME(old_val))) continue;
> +
> +
> +             found = true;
> +
> +             // REPLACE!!!
> +             gimple_stmt_iterator gsi2 = gsi_for_stmt(stmt);
> +             tree new_lhs = make_ssa_name(TREE_TYPE(lhs));
> +             gimple *new_read_stmt = \
> +                  gimple_build_assign(new_lhs, write_val);
> +             gsi_insert_before (&gsi2, new_read_stmt, GSI_SAME_STMT);
> +
> +             gimple *use_stmt;
> +             imm_use_iterator use_iter;
> +             FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
> +             {
> +                use_operand_p use_p;
> +                FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
> +                  SET_USE (use_p, new_lhs);
> +                update_stmt (use_stmt);
> +             }
> +         }
> +
> +         if (found) break;
> +    }
> +    pop_cfun();
> +}
> +
> +
> +
> +static void
> +ipa_initcall_get_writes_and_reads(varpool_node *vnode,
> +       ipa_ref_vec *writes,
> +       ipa_ref_vec *reads)
> +{
> +  int i;
> +  struct ipa_ref *ref;
> +
> +  if (dump_file)
> +    fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ());
> +
> +  /* Only IPA_REF_STORE and IPA_REF_LOAD left.  */
> +  for (i = 0; vnode->iterate_referring (i, ref); i++)
> +    {
> +      if (ref->use == IPA_REF_STORE)
> +     writes->safe_push (ref);
> +      if (ref->use == IPA_REF_LOAD)
> +     reads->safe_push (ref);
> +    }
> +}
> +
> +static void
> +propagate_constant_to_reads (tree write_val,
> +           const ipa_ref_vec &reads_original,
> +           hash_set<cgraph_node*> *funcs)
> +{
> +  /* Iterate over reads and replace them by constant.  */
> +  struct ipa_ref* ref;
> +  int i;
> +  FOR_EACH_VEC_ELT (reads_original, i, ref)
> +  {
> +    propagate_constant_to_read (write_val, ref, funcs);
> +  }
> +}
> +
> +/*
> + * Extracts the assigned constant, iff the given statement
> + * is a constant assignment. Returns NULL_TREE otherwise.
> + */
> +static tree
> +extract_constant_from_initcall_write (struct ipa_ref *write)
> +{
> +  symtab_node *f_node = write->referring;
> +  cgraph_node *f_cnode = dyn_cast <cgraph_node *> (f_node);
> +
> +  tree decl = f_cnode->decl;
> +  if (TREE_CODE (decl) != FUNCTION_DECL)
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Not function decl -> skipping.\n");
> +      return NULL_TREE;
> +    }
> +
> +  if (!f_cnode->has_gimple_body_p () && dump_file)
> +    fprintf (dump_file, "Does NOT have gimple body!\n");
> +  if (f_cnode->inlined_to && dump_file)
> +    fprintf (dump_file, "Inlined To\n");
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "%s: for writer gimple:\n", __func__);
> +      dump_vnode_ipa_ref(write);
> +    }
> +
> +  /* Get the write function's body.  */
> +  load_function_body_of_ipa_ref (write);
> +
> +  gimple *stmt = write->stmt;
> +
> +  /* Verify that we have an assignment.  */
> +  if (gimple_code (stmt) != GIMPLE_ASSIGN)
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Write stmt not assignment -> skipping.\n");
> +      return NULL_TREE;
> +    }
> +
> +  /* Check if write's LHS (vnode) is not volatile.  */
> +  tree lhs = gimple_assign_lhs (stmt);
> +  if (TREE_THIS_VOLATILE (TREE_TYPE (lhs)))
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Variable volatile -> skipping.\n");
> +      return NULL_TREE;
> +    }
> +
> +  /* Check if RHS of write stmt is constant. */
> +  if (!gimple_assign_is_single_const (stmt))
> +    {
> +      if (dump_file)
> +     {
> +       fprintf (dump_file, "Found non-const operands.\n");
> +     }
> +      return NULL_TREE;
> +    }
> +
> +  /* Extract the constant.  */
> +  tree write_val = gimple_op (stmt, 1);
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "Const op:\n");
> +      dump_node (write_val, TDF_DETAILS, dump_file);
> +    }
> +
> +  return write_val;
> +}
> +
> +static void
> +ipa_initcall_cp_execute_for_var (varpool_node *vnode,
> +          hash_set<cgraph_node*> *update_functions)
> +{
> +  ipa_ref_vec writes = vNULL;
> +  ipa_ref_vec reads = vNULL;
> +  struct ipa_ref* write;
> +  tree write_val;
> +  gimple_stmt_iterator gsi;
> +  bool remove_permanently;
> +
> +  if (dump_file)
> +    fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ());
> +
> +  if (dump_file)
> +    {
> +      dump_node (vnode->decl, TDF_DETAILS, dump_file);
> +    }
> +
> +  /* Variable must not be externally visible.  */
> +  if (vnode->externally_visible_p ())
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, \
> +             "\texternally visible "\
> +             "-> skipping\n");
> +      return;
> +    }
> +
> +  /* All refs must be explicit.  */
> +  if (!vnode->all_refs_explicit_p ())
> +    {
> +      if (dump_file)
> +     fprintf (dump_file,\
> +            "Not explicit variable refs "\
> +            "-> skipping.\n");
> +      return;
> +    }
> +
> +  /* Check if any ref->use is IPA_REF_ALIAS.  */
> +  if (vnode->has_aliases_p ())
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Found IPA_REF_ALIAS -> skipping.\n");
> +      return;
> +    }
> +
> +  /* Check if any ref->use is IPA_REF_ADDR.  */
> +  if (symtab_node_address_matters_p (vnode))
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Found IPA_REF_ADDR -> skipping.\n");
> +      return;
> +    }
> +
> +  /* We don't touch arrays.  */
> +  if (TREE_CODE (TREE_TYPE (vnode->decl)) == ARRAY_TYPE)
> +    {
> +      if (dump_file)
> +     fprintf (dump_file, "Variable is array -> skipping.\n");
> +      return;
> +    }
> +
> +  /* Get all refs (must be REF_STORE or REF_LOAD).  */
> +  ipa_initcall_get_writes_and_reads (vnode, &writes, &reads);
> +
> +  if (writes.length () > 1)
> +    {
> +      /* More than one writer.  */
> +      if (dump_file)
> +     fprintf (dump_file, "More than one writer -> skipping.\n");
> +      writes.release ();
> +      reads.release ();
> +      return;
> +    }
> +  else if (writes.length () < 1)
> +    {
> +      /* No writer.  */
> +      if (dump_file)
> +     fprintf (dump_file, "No writer -> skipping.\n");
> +      writes.release ();
> +      reads.release ();
> +      return;
> +    }
> +
> +  /*
> +   * Note:
> +   * Limiting ourselfs to only one write is not necessary in general,
> +   * but good enough to address the typical init() case.
> +   * Big advantage is, that it makes the following code much easier.
> +   *
> +   * TODO: I believe that if we create clones, we might get two writes
> +   * Investigate
> +   */
> +
> +  /* Get (only) write ref.  */
> +  write = writes.pop ();
> +
> +  /* Check if write's RHS is a constant and get it.  */
> +  write_val = extract_constant_from_initcall_write (write);
> +  if (write_val == NULL_TREE)
> +    {
> +    if (dump_file)
> +       fprintf (dump_file, \
> +            "Write's RHS is not constant"\
> +            " -> skipping.\n");
> +    writes.release ();
> +    reads.release ();
> +    return;
> +    }
> +
> +  /* Assure all reads are after the write.  */
> +  if (!write_before_all_reads (write, reads))
> +    {
> +    if (dump_file)
> +       fprintf (dump_file, \
> +           "Write not guaranteed "\
> +           "to be before read -> skipping.\n");
> +    writes.release ();
> +    reads.release ();
> +    return;
> +    }
> +
> +  /* Propagate constant to reads.  */
> +  propagate_constant_to_reads (write_val, reads, update_functions);
> +
> +  /* Finally remove the write.  */
> +  gsi = gsi_for_stmt (write->stmt);
> +  remove_permanently = false; //XXX: fails with true???
> +  //gsi_remove (&gsi, remove_permanently);
> +
> +  if (dump_file)
> +    fprintf (dump_file, \
> +        "Eliminated variable '%s'.\n\n", vnode->name ());
> +
> +  writes.release ();
> +  reads.release ();
> +}
> +
> +
> +bool
> +update_callgraph(cgraph_node *const& r_cnode,
> +        cgraph_node **clone_ptr,
> +        void*)
> +{
> +  vec<cgraph_edge*> callers = r_cnode->collect_callers();
> +  cgraph_node *clone = *clone_ptr;
> +  cgraph_edge *e;
> +  int i;
> +  profile_count count = r_cnode->count;
> +
> +  FOR_EACH_VEC_ELT(callers, i, e)
> +     e->redirect_callee(clone);
> +
> +/*
> +  for (e = r_cnode->callees; e; e=e->next_callee)
> +    e->clone(clone, e->call_stmt, e->lto_stmt_uid, count, count, true);
> +
> +  for (e = r_cnode->indirect_calls; e; e=e->next_callee)
> +    e->clone(clone, e->call_stmt, e->lto_stmt_uid, count, count, true);
> +  
> +
> +*/
> +  for (e = clone->callers; e; e=e->next_caller)
> +     {
> +      e->caller->get_untransformed_body();
> +      function *inner_function = \
> +            DECL_STRUCT_FUNCTION (e->caller->decl);
> +      gimple_call_set_fndecl (e->call_stmt, clone->decl);
> +      maybe_clean_eh_stmt_fn (inner_function, e->call_stmt);
> +     }
> +
> +  r_cnode->skip_ipa_cp = true;
> +  //r_cnode->remove();
> +  return true;
> +}
> +
> +static unsigned int
> +ipa_initcall_cp_execute (void)
> +{
> +
> +  varpool_node *vnode;
> +
> +  clone_num_suffixes1 = new hash_map<const char *, unsigned>;
> +  hash_set<cgraph_node *> update_functions;
> +  func_to_clone = new hash_map<cgraph_node*, cgraph_node*>;
> +  FOR_EACH_VARIABLE (vnode)
> +    {
> +      ipa_initcall_cp_execute_for_var (vnode, &update_functions);
> +    }
> +
> +  func_to_clone->traverse<void*, update_callgraph>(NULL);
> +
> +  delete clone_num_suffixes1;
> +  delete func_to_clone;
> +
> +  return 0;
> +
> +}
> +
> +namespace {
> +
> +const pass_data pass_data_ipa_initcall_cp =
> +{
> +  SIMPLE_IPA_PASS, /* type */
> +  "initcall_cp", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_NONE, /* tv_id */
> +  ( PROP_cfg | PROP_ssa ), /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, 
> +  ( TODO_update_ssa | TODO_cleanup_cfg | TODO_dump_symtab \
> +  | TODO_rebuild_cgraph_edges | TODO_discard_function ),
> +};
> +
> +class pass_ipa_initcall_cp : public simple_ipa_opt_pass
> +{
> +public:
> +  pass_ipa_initcall_cp (gcc::context *ctxt)
> +    : simple_ipa_opt_pass (pass_data_ipa_initcall_cp, ctxt)
> +  {}
> +
> +  /* opt_pass methods: */
> +  virtual bool gate (function *)
> +    {
> +      return flag_initcall_cp;
> +    }
> +
> +  virtual unsigned int execute (function *)
> +    {
> +      return ipa_initcall_cp_execute();
> +    }
> +
> +}; // class pass_ipa_initcall_cp
> +
> +} // anon namespace
> +
> +simple_ipa_opt_pass *
> +make_pass_ipa_initcall_cp (gcc::context *ctxt)
> +{
> +  return new pass_ipa_initcall_cp (ctxt);
> +}
> diff --git a/gcc/passes.def b/gcc/passes.def
> index 798a391..350a4fc 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -146,6 +146,7 @@ along with GCC; see the file COPYING3.  If not see
>    NEXT_PASS (pass_ipa_profile);
>    NEXT_PASS (pass_ipa_icf);
>    NEXT_PASS (pass_ipa_devirt);
> +  NEXT_PASS (pass_ipa_initcall_cp);
>    NEXT_PASS (pass_ipa_cp);
>    NEXT_PASS (pass_ipa_sra);
>    NEXT_PASS (pass_ipa_cdtor_merge);
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index a987661..9e64373 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -498,6 +498,7 @@ extern ipa_opt_pass_d *make_pass_ipa_fn_summary 
> (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt);
>  extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context 
> *ctxt);
>  extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context 
> *ctxt);
> +extern simple_ipa_opt_pass *make_pass_ipa_initcall_cp (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_sra (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt);
> 

Reply via email to