On Wed, Jun 5, 2019 at 10:59 AM Feng Xue OS <f...@os.amperecomputing.com> wrote: > > IPA-CP can not identify a constant by-ref argument to a function, if > definition > of the argument is not in same basic block where the callsite lies in. This is > because IPA-CP only does local search in the callsite basic block.So this > patch > implemented an enhanced algorithm, which uses dominance information to guide > traverse in virtual SSA web, to find out constant definitions in dominating > basic > block. > > Feng > > --- > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 526ed45be89..cc076c337af 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,14 @@ > +2019-06-04 Feng Xue <f...@os.amperecomputing.com> > + > + PR ipa/90401 > + * ipa-prop.c (add_to_agg_contents_list): New function. > + (clobber_by_agg_contents_list_p): Likewise. > + (extract_mem_content): Likewise. > + (strictly_dominated_by_ssa_p): Likewise. > + (get_place_in_agg_contents_list): Delete. > + (determine_known_aggregate_parts): Renamed from > + determine_locally_known_aggregate_parts. > + > 2019-06-04 Segher Boessenkool <seg...@kernel.crashing.org> > > * config/rs6000/constraints.md (define_register_constraint "wp"): > diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c > index d86c2f3db55..405cdf7730b 100644 > --- a/gcc/ipa-prop.c > +++ b/gcc/ipa-prop.c > @@ -1458,7 +1458,7 @@ get_ssa_def_if_simple_copy (tree rhs) > return rhs; > } > > -/* Simple linked list, describing known contents of an aggregate beforere > +/* Simple linked list, describing known contents of an aggregate before > call. */ > > struct ipa_known_agg_contents_list > @@ -1471,41 +1471,64 @@ struct ipa_known_agg_contents_list > struct ipa_known_agg_contents_list *next; > }; > > -/* Find the proper place in linked list of ipa_known_agg_contents_list > - structures where to put a new one with the given LHS_OFFSET and LHS_SIZE, > - unless there is a partial overlap, in which case return NULL, or such > - element is already there, in which case set *ALREADY_THERE to true. */ > +/* Add a known content item into a linked list of > ipa_known_agg_contents_list, > + in which all elements are sorted ascendingly by offset. When ALLOW_DUP is > + false, insert the item only if there is no duplicate one (with same offset > + and size) in the list. And if the item is added, return true. */ > > -static struct ipa_known_agg_contents_list ** > -get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list, > - HOST_WIDE_INT lhs_offset, > - HOST_WIDE_INT lhs_size, > - bool *already_there) > +static inline bool > +add_to_agg_contents_list (struct ipa_known_agg_contents_list **plist, > + struct ipa_known_agg_contents_list *item, > + bool allow_dup = true) > { > - struct ipa_known_agg_contents_list **p = list; > - while (*p && (*p)->offset < lhs_offset) > + struct ipa_known_agg_contents_list *list = *plist; > + > + for (; list; list = list->next) > { > - if ((*p)->offset + (*p)->size > lhs_offset) > - return NULL; > - p = &(*p)->next; > + if (list->offset > item->offset) > + break; > + > + if (list->offset == item->offset && list->size == item->size > + && !allow_dup) > + return false; > + > + plist = &list->next; > } > > - if (*p && (*p)->offset < lhs_offset + lhs_size) > + item->next = list; > + *plist = item; > + return true; > +} > + > +/* Check whether a given known content is clobbered by certain element in > + a linked list of ipa_known_agg_contents_list. A special case is that > + we can ignore those constant items completely same as the given item, > + that is they have same offset/size/value. */ > + > +static inline bool > +clobber_by_agg_contents_list_p (struct ipa_known_agg_contents_list *list, > + struct ipa_known_agg_contents_list *item) > +{ > + for (; list; list = list->next) > { > - if ((*p)->offset == lhs_offset && (*p)->size == lhs_size) > - /* We already know this value is subsequently overwritten with > - something else. */ > - *already_there = true; > - else > - /* Otherwise this is a partial overlap which we cannot > - represent. */ > - return NULL; > + if (list->offset > item->offset) > + return list->offset < item->offset + item->size; > + > + /* For the constant item, we can skip comparison with identical items > in > + the list, because its content remains unchanged after clobbering. */ > + if (list->offset == item->offset && list->size == item->size > + && list->constant && item->constant > + && operand_equal_p (list->constant, item->constant, 0)) > + continue; > + > + if (list->offset + list->size > item->offset) > + return true; > } > - return p; > + return false; > } > > /* Build aggregate jump function from LIST, assuming there are exactly > - CONST_COUNT constant entries there and that th offset of the passed > argument > + CONST_COUNT constant entries there and that offset of the passed argument > is ARG_OFFSET and store it into JFUNC. */ > > static void > @@ -1528,6 +1551,66 @@ build_agg_jump_func_from_list (struct > ipa_known_agg_contents_list *list, > } > } > > +/* If STMT is a memory store to the object whose address is BASE, extract > + information (offset, size, and value) into CONTENT, and return true, > + otherwise we conservatively assume the whole object is modified with > + unknown content, and return false. CHECK_REF means that access to object > + is expected to be in form of MEM_REF expression. */ > + > +static bool > +extract_mem_content (gimple *stmt, tree base, bool check_ref, > + struct ipa_known_agg_contents_list *content) > +{ > + HOST_WIDE_INT lhs_offset, lhs_size; > + tree lhs, rhs, lhs_base; > + bool reverse; > + > + if (!gimple_assign_single_p (stmt)) > + return false; > + > + lhs = gimple_assign_lhs (stmt); > + rhs = gimple_assign_rhs1 (stmt); > + > + if (!is_gimple_reg_type (TREE_TYPE (rhs)) > + || TREE_CODE (lhs) == BIT_FIELD_REF > + || contains_bitfld_component_ref_p (lhs)) > + return false; > + > + lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset, > + &lhs_size, &reverse); > + if (!lhs_base) > + return false; > + > + if (check_ref) > + { > + if (TREE_CODE (lhs_base) != MEM_REF > + || TREE_OPERAND (lhs_base, 0) != base > + || !integer_zerop (TREE_OPERAND (lhs_base, 1))) > + return false; > + } > + else if (lhs_base != base) > + return false; > + > + rhs = get_ssa_def_if_simple_copy (rhs); > + > + content->size = lhs_size; > + content->offset = lhs_offset; > + content->constant = is_gimple_ip_invariant (rhs) ? rhs : NULL_TREE; > + content->next = NULL; > + > + return true; > +} > + > +/* Return true if defintion of SSA name strictly dominates basic block BB. */ > + > +static inline bool > +strictly_dominated_by_ssa_p (basic_block bb, tree ssa) > +{ > + basic_block ssa_bb = gimple_bb (SSA_NAME_DEF_STMT (ssa)); > + > + return bb != ssa_bb && dominated_by_p (CDI_DOMINATORS, bb, ssa_bb); > +} > + > /* Traverse statements from CALL backwards, scanning whether an aggregate > given > in ARG is filled in with constant values. ARG can either be an aggregate > expression or a pointer to an aggregate. ARG_TYPE is the type of the > @@ -1535,19 +1618,22 @@ build_agg_jump_func_from_list (struct > ipa_known_agg_contents_list *list, > subsequently stored. */ > > static void > -determine_locally_known_aggregate_parts (gcall *call, tree arg, > - tree arg_type, > - struct ipa_jump_func *jfunc) > +determine_known_aggregate_parts (gcall *call, tree arg, > + tree arg_type, > + struct ipa_jump_func *jfunc) > { > - struct ipa_known_agg_contents_list *list = NULL; > + struct ipa_known_agg_contents_list *list = NULL, *all_list = NULL; > + basic_block call_bb = gimple_bb (call); > + hash_set <tree> visited; > + auto_vec <tree> worklist; > int item_count = 0, const_count = 0; > + int ipa_max_agg_items = PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS); > HOST_WIDE_INT arg_offset, arg_size; > - gimple_stmt_iterator gsi; > tree arg_base; > bool check_ref, by_ref; > ao_ref r; > > - if (PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0) > + if (ipa_max_agg_items == 0) > return; > > /* The function operates in three stages. First, we prepare check_ref, r, > @@ -1606,82 +1692,139 @@ determine_locally_known_aggregate_parts (gcall > *call, tree arg, > ao_ref_init (&r, arg); > } > > - /* Second stage walks back the BB, looks at individual statements and as > long > - as it is confident of how the statements affect contents of the > - aggregates, it builds a sorted linked list of ipa_agg_jf_list structures > - describing it. */ > - gsi = gsi_for_stmt (call); > - gsi_prev (&gsi); > - for (; !gsi_end_p (gsi); gsi_prev (&gsi)) > - { > - struct ipa_known_agg_contents_list *n, **p; > - gimple *stmt = gsi_stmt (gsi); > - HOST_WIDE_INT lhs_offset, lhs_size; > - tree lhs, rhs, lhs_base; > - bool reverse; > - > - if (!stmt_may_clobber_ref_p_1 (stmt, &r)) > - continue; > - if (!gimple_assign_single_p (stmt)) > - break; > - > - lhs = gimple_assign_lhs (stmt); > - rhs = gimple_assign_rhs1 (stmt); > - if (!is_gimple_reg_type (TREE_TYPE (rhs)) > - || TREE_CODE (lhs) == BIT_FIELD_REF > - || contains_bitfld_component_ref_p (lhs)) > - break; > - > - lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset, > - &lhs_size, &reverse); > - if (!lhs_base) > - break; > - > - if (check_ref) > - { > - if (TREE_CODE (lhs_base) != MEM_REF > - || TREE_OPERAND (lhs_base, 0) != arg_base > - || !integer_zerop (TREE_OPERAND (lhs_base, 1))) > - break; > - } > - else if (lhs_base != arg_base) > - { > - if (DECL_P (lhs_base)) > - continue; > - else > - break; > - } > - > - bool already_there = false; > - p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size, > - &already_there); > - if (!p) > - break; > - if (already_there) > - continue; > - > - rhs = get_ssa_def_if_simple_copy (rhs); > - n = XALLOCA (struct ipa_known_agg_contents_list); > - n->size = lhs_size; > - n->offset = lhs_offset; > - if (is_gimple_ip_invariant (rhs)) > - { > - n->constant = rhs; > - const_count++; > - } > - else > - n->constant = NULL_TREE; > - n->next = *p; > - *p = n; > - > - item_count++; > - if (const_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) > - || item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)) > - break; > - } > + /* Second stage walks back from the call statement, looks at individual > + virtual operand and as long as it is confident of how definition > + statement of the virtual operand affects content of the aggregate, it > + builds a sorted linked list of ipa_agg_jf_list structures describing it. > + > + Actually, the stage involves kind of global reachability analysis on > + in-memory variables definitions. A complete resolution requires > iterative > + dataflow equation computation, which is somewhat complex and seems to be > + overkill to this collecting-constant task. > + > + So here we use a simpler algorithm that we believe can archive nearly > + same result in most situations. In the algorithm, we traverse virtual > SSA > + web backwards starting from the call, by stepping from one dominating > + virtual operand (its definition dominates the call) to another > dominating > + one. Before a dom virtual operand is processed, we will ensure all those > + non-dom virtual operands, which are backwards reachable from previous > dom > + virtual operand, have been processed. But only dom virtual operand is > + regarded as possible constant value provider to the call argument. And > + non-dom virtual operand is just checked to see whether it kills > + definitions of some dom virtual operands. */ > + > + for (tree dom_vuse = gimple_vuse (call); dom_vuse; ) > + { > + gimple *stmt = SSA_NAME_DEF_STMT (dom_vuse); > + bool exist = visited.add (dom_vuse); > + > + gcc_assert (!exist); > + > + if (gimple_code (stmt) != GIMPLE_PHI) > + { > + if (stmt_may_clobber_ref_p_1 (stmt, &r)) > + { > + struct ipa_known_agg_contents_list *content = > + XALLOCA (struct ipa_known_agg_contents_list); > + > + if (!extract_mem_content (stmt, arg_base, check_ref, content)) > + break; > + > + /* Now we get a dom virtual operand, and need to check whether > + its value can finally reach the call. And put it into the > + content list only if it is unique. */ > + if (!clobber_by_agg_contents_list_p (all_list, content) > + && add_to_agg_contents_list (&list, content, false)) > + { > + struct ipa_known_agg_contents_list *copy = > + XALLOCA (struct ipa_known_agg_contents_list); > + > + *copy = *content; > + content = copy; > + > + item_count +=1; > + const_count += !!(content->constant); > + > + if (item_count == 2 * ipa_max_agg_items > + || const_count == ipa_max_agg_items) > + break; > + } > + add_to_agg_contents_list (&all_list, content); > + } > + dom_vuse = gimple_vuse (stmt); > + continue; > + } > + > + /* We get to a merge point in virtual SSA web, then go through all > + non-dom virtual operands before handling next dom virtual operand. > */ > + worklist.safe_push (dom_vuse); > + dom_vuse = NULL_TREE; > + > + do > + { > + tree vuse = worklist.pop (); > + gimple *stmt = SSA_NAME_DEF_STMT (vuse); > + auto_vec<tree, 6> append; > + > + /* Non-dom virtual operand should not be the DEFAULT definition. */ > + gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (vuse)); > + > + if (gimple_code (stmt) != GIMPLE_PHI) > + { > + if (stmt_may_clobber_ref_p_1 (stmt, &r)) > + { > + struct ipa_known_agg_contents_list *content = > + XALLOCA (struct ipa_known_agg_contents_list); > + > + if (!extract_mem_content (stmt, arg_base, check_ref, > content)) > + goto finish; > + > + add_to_agg_contents_list (&all_list, content);
I don't think your PHI handling works correct. If you have agg.part1 = 0; if () agg.part2 = 1; call (agg); then you seem to end up above for agg.part2 because you push that onto the worklist below. > + } > + append.safe_push (gimple_vuse (stmt)); > + } > + else > + { > + for (unsigned i = 0; i < gimple_phi_num_args (stmt); ++i) > + { > + tree phi_arg = gimple_phi_arg_def (stmt, i); > + > + if (SSA_NAME_IS_DEFAULT_DEF (phi_arg)) > + goto finish; > + > + append.safe_push (phi_arg); Not sure why you first push to append and then move parts to the worklist below - it seems to only complicate code. What you want to do is (probably) use get_continuation_for_phi which for a virtual PHI node and a reference computes a virtual operand you can continue your processing with. Thus sth like bitmap vistied = NULL; dom_vuse = get_continuation_for_phi (stmt, &ref, counter /* limit walking */, &visited, false, NULL, NULL); if (visited) BITMAP_FREE (visited); and have ref be ao_ref ref = ao_ref_init (arg); where you then can remove the worklist alltogether. You need to limit work you want to do, otherwise it becomes quite expensive - assign an overall limit of alias queries, get_continuation_for_phi will decrement counter for each query it does and return NULL if counter reaches zero. Each extract_mem_content call should be accounted as well. Richard. > + } > + } > + > + for (unsigned i = 0; i < append.length (); ++i) > + { > + if (visited.add (vuse = append[i])) > + continue; > + > + if (SSA_NAME_IS_DEFAULT_DEF (vuse) > + || strictly_dominated_by_ssa_p (call_bb, vuse)) > + { > + /* Found a new dom virtual operand, stop going further > until > + all pending non-dom virtual operands are processed. */ > + gcc_assert (!dom_vuse); > + dom_vuse = vuse; > + } > + else > + worklist.safe_push (vuse); > + } > + } while (!worklist.is_empty ()); > + > + gcc_assert (dom_vuse); > + > + /* It is asserted that unprocessed dom virtual operand should be in > + non-visited state. */ > + visited.remove (dom_vuse); > + } > + > +finish: > > /* Third stage just goes over the list and creates an appropriate vector of > - ipa_agg_jf_item structures out of it, of sourse only if there are > + ipa_agg_jf_item structures out of it, of course only if there are > any known constants to begin with. */ > > if (const_count) > @@ -1691,6 +1834,7 @@ determine_locally_known_aggregate_parts (gcall *call, > tree arg, > } > } > > + > /* Return the Ith param type of callee associated with call graph > edge E. */ > > @@ -1797,7 +1941,7 @@ ipa_set_jfunc_vr (ipa_jump_func *jf, enum > value_range_kind type, > jf->m_vr = ipa_get_value_range (type, min, max); > } > > -/* Assign to JF a pointer to a value_range just liek TMP but either fetch a > +/* Assign to JF a pointer to a value_range just like TMP but either fetch a > copy from ipa_vr_hash_table or allocate a new on in GC memory. */ > > static void > @@ -1978,7 +2122,7 @@ ipa_compute_jump_functions_for_edge (struct > ipa_func_body_info *fbi, > || !ipa_get_jf_ancestor_agg_preserved (jfunc)) > && (AGGREGATE_TYPE_P (TREE_TYPE (arg)) > || POINTER_TYPE_P (param_type))) > - determine_locally_known_aggregate_parts (call, arg, param_type, > jfunc); > + determine_known_aggregate_parts (call, arg, param_type, jfunc); > } > if (!useful_context) > vec_free (args->polymorphic_call_contexts); > diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c > b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c > new file mode 100644 > index 00000000000..131c4e265bb > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c > @@ -0,0 +1,78 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-inline" } */ > + > +int data1; > + > +int callee1(int *v) > +{ > + if (*v < 2) > + return 0; > + else > + { > + int t = data1; > + > + data1 = *v; > + *v = t; > + > + return 1; > + } > +} > + > +int __attribute((pure)) callee2(int *v) > +{ > + if (*v < 2) > + return 0; > + else > + { > + data1 = v[0] + v[2]; > + > + return 1; > + } > +} > + > +int caller1(int c, int *r) > +{ > + int a = 1; > + > + if (c) > + return callee1(&a); > + else > + { > + *r = 2; > + return callee1(r); > + } > +} > + > +int data2[200]; > +int data3; > + > +int __attribute((const)) gen_cond(int); > + > +int caller2(void) > +{ > + int i, j; > + int sum = 0; > + int a[8]; > + > + a[0] = 3; > + for (i = 0; i < 100; i++) > + { > + if (gen_cond (i)) > + continue; > + > + a[2] = 4; > + for (j = 0; j < 100; j++) > + { > + data2[i + j] = (i ^ j) + data3; > + > + sum += callee2(a); > + } > + } > + > + return sum; > +} > + > +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 1" 1 "cp" } } */ > +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 2" 1 "cp" } } */ > +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 3" 1 "cp" } } */ > +/* { dg-final { scan-ipa-dump-times "offset: 64, cst: 4" 1 "cp" } } */ >