Hi Feng, On 2019/10/17 16:23, Feng Xue OS wrote: > IPA does not allow constant propagation on parameter that is used to control > function recursion. > > recur_fn (i) > { > if ( !terminate_recursion (i)) > { > ... > recur_fn (i + 1); > ... > } > ... > } > > This patch is composed to enable multi-versioning for self-recursive function, > and versioning copies is limited by a specified option.
I noticed similar issue when analyzing the SPEC, self-recursive function is not versioned and posted my observations in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92074. Generally, this could be implemented well by your patch, while I am wondering whether it is OK to convert the recursive function to non-recursive function in a independent pass after ipa-cp and ipa-sra instead of reuse the ipa-cp framework? The reason is sometimes the argument is passed-by-reference, and ipa-sra runs after ipa-cp, so this kind of optimization may not be done in WPA. What's your idea about this, please? Thanks. Thanks Xiong Hu > > Feng > --- > diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c > index 045072e02ec..6255a766e4d 100644 > --- a/gcc/ipa-cp.c > +++ b/gcc/ipa-cp.c > @@ -229,7 +229,9 @@ public: > inline bool set_contains_variable (); > bool add_value (valtype newval, cgraph_edge *cs, > ipcp_value<valtype> *src_val = NULL, > - int src_idx = 0, HOST_WIDE_INT offset = -1); > + int src_idx = 0, HOST_WIDE_INT offset = -1, > + ipcp_value<valtype> **val_pos_p = NULL, > + bool unlimited = false); > void print (FILE * f, bool dump_sources, bool dump_benefits); > }; > > @@ -1579,22 +1581,37 @@ allocate_and_init_ipcp_value > (ipa_polymorphic_call_context source) > /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. > CS, > SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same > meaning. OFFSET -1 means the source is scalar and not a part of an > - aggregate. */ > + aggregate. If non-NULL, VAL_POS_P specifies position in value list, > + after which newly created ipcp_value will be inserted, and it is also > + used to record address of the added ipcp_value before function returns. > + UNLIMITED means whether value count should not exceed the limit given > + by PARAM_IPA_CP_VALUE_LIST_SIZE. */ > > template <typename valtype> > bool > ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, > ipcp_value<valtype> *src_val, > - int src_idx, HOST_WIDE_INT offset) > + int src_idx, HOST_WIDE_INT offset, > + ipcp_value<valtype> **val_pos_p, > + bool unlimited) > { > ipcp_value<valtype> *val; > > + if (val_pos_p) > + { > + for (val = values; val && val != *val_pos_p; val = val->next); > + gcc_checking_assert (val); > + } > + > if (bottom) > return false; > > for (val = values; val; val = val->next) > if (values_equal_for_ipcp_p (val->value, newval)) > { > + if (val_pos_p) > + *val_pos_p = val; > + > if (ipa_edge_within_scc (cs)) > { > ipcp_value_source<valtype> *s; > @@ -1609,7 +1626,8 @@ ipcp_lattice<valtype>::add_value (valtype newval, > cgraph_edge *cs, > return false; > } > > - if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) > + if (!unlimited > + && values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) > { > /* We can only free sources, not the values themselves, because > sources > of other values in this SCC might point to them. */ > @@ -1623,6 +1641,9 @@ ipcp_lattice<valtype>::add_value (valtype newval, > cgraph_edge *cs, > } > } > > + if (val_pos_p) > + *val_pos_p = NULL; > + > values = NULL; > return set_to_bottom (); > } > @@ -1630,8 +1651,54 @@ ipcp_lattice<valtype>::add_value (valtype newval, > cgraph_edge *cs, > values_count++; > val = allocate_and_init_ipcp_value (newval); > val->add_source (cs, src_val, src_idx, offset); > - val->next = values; > - values = val; > + if (val_pos_p) > + { > + val->next = (*val_pos_p)->next; > + (*val_pos_p)->next = val; > + *val_pos_p = val; > + } > + else > + { > + val->next = values; > + values = val; > + } > + > + return true; > +} > + > +/* Return true, if a ipcp_value VAL is orginated from parameter value of > + self-feeding recursive function by applying non-passthrough arithmetic > + transformation. */ > + > +static bool > +self_recursively_generated_p (ipcp_value<tree> *val) > +{ > + class ipa_node_params *info = NULL; > + > + for (ipcp_value_source<tree> *src = val->sources; src; src = src->next) > + { > + cgraph_edge *cs = src->cs; > + > + if (!src->val || cs->caller != cs->callee->function_symbol () > + || src->val == val) > + return false; > + > + if (!info) > + info = IPA_NODE_REF (cs->caller); > + > + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, > + src->index); > + ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself > + : plats->aggs; > + ipcp_value<tree> *src_val; > + > + for (src_val = src_lat->values; src_val && src_val != val; > + src_val = src_val->next); > + > + if (!src_val) > + return false; > + } > + > return true; > } > > @@ -1649,20 +1716,72 @@ propagate_vals_across_pass_through (cgraph_edge *cs, > ipa_jump_func *jfunc, > ipcp_value<tree> *src_val; > bool ret = false; > > - /* Do not create new values when propagating within an SCC because if there > - are arithmetic functions with circular dependencies, there is infinite > - number of them and we would just make lattices bottom. If this > condition > - is ever relaxed we have to detect self-feeding recursive calls in > - cgraph_edge_brings_value_p in a smarter way. */ > + /* Due to circular dependencies, propagating within an SCC through > arithmetic > + transformation would create infinite number of values. But for > + self-feeding recursive function, we could allow propagation in a limited > + count, and this can enable a simple kind of recursive function > versioning. > + For other scenario, we would just make lattices bottom. */ > if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) > && ipa_edge_within_scc (cs)) > - ret = dest_lat->set_contains_variable (); > + { > + if (src_lat != dest_lat) > + return dest_lat->set_contains_variable (); > + > + auto_vec<ipcp_value<tree> *, 8> val_seeds; > + > + for (src_val = src_lat->values; src_val; src_val = src_val->next) > + { > + /* Now we do not use self-recursively generated value as propagation > + source, this is absolutely conservative, but could avoid explosion > + of lattice's value space, especially when one recursive function > + calls another recursive. */ > + if (self_recursively_generated_p (src_val)) > + { > + /* If the lattice has already been propagated for the call site, > + no need to do that again. */ > + for (ipcp_value_source<tree> *s = src_val->sources; s; > + s = s->next) > + if (s->cs == cs) > + return dest_lat->set_contains_variable (); > + } > + else > + val_seeds.safe_push (src_val); > + } > + > + int clone_copies = PARAM_VALUE (PARAM_IPA_CP_MAX_RECURSION_COPIES); > + int i; > + > + /* Recursively generate lattice values with a limited count. */ > + FOR_EACH_VEC_ELT (val_seeds, i, src_val) > + { > + for (int j = 1; j < clone_copies; j++) > + { > + tree cstval = ipa_get_jf_pass_through_result (jfunc, > + src_val->value, > + parm_type); > + if (!cstval) > + break; > + > + /* Try to place the new lattice value after its source, which > + can decrease iterations of propagate stage. */ > + ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, > + -1, &src_val, true); > + gcc_checking_assert (src_val); > + } > + } > + ret |= dest_lat->set_contains_variable (); > + } > else > for (src_val = src_lat->values; src_val; src_val = src_val->next) > { > + /* Now we do not use self-recursively generated value as propagation > + source, otherwise it is easy to make value space of normal lattice > + overflow. */ > + if (self_recursively_generated_p (src_val)) > + continue; > + > tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value, > parm_type); > - > if (cstval) > ret |= dest_lat->add_value (cstval, cs, src_val, src_idx); > else > @@ -3635,6 +3754,9 @@ get_info_about_necessary_edges (ipcp_value<valtype> > *val, cgraph_node *dest, > hot |= cs->maybe_hot_p (); > if (cs->caller != dest) > non_self_recursive = true; > + else if (src->val) > + gcc_assert (values_equal_for_ipcp_p (src->val->value, > + val->value)); > } > cs = get_next_cgraph_edge_clone (cs); > } > diff --git a/gcc/params.def b/gcc/params.def > index 322c37f8b96..3e242e09f01 100644 > --- a/gcc/params.def > +++ b/gcc/params.def > @@ -1135,6 +1135,11 @@ DEFPARAM (PARAM_IPA_CP_RECURSION_PENALTY, > "are evaluated for cloning.", > 40, 0, 100) > > +DEFPARAM (PARAM_IPA_CP_MAX_RECURSION_COPIES, > + "ipa-cp-max-recursion-copies", > + "Maximum function versioning copies for a self-recursive function.", > + 8, 0, 0) > + > DEFPARAM (PARAM_IPA_CP_SINGLE_CALL_PENALTY, > "ipa-cp-single-call-penalty", > "Percentage penalty functions containing a single call to another " > diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c > b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c > new file mode 100644 > index 00000000000..7c1c01047c6 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c > @@ -0,0 +1,47 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining" } */ > + > +int fn(); > + > +int data[100]; > + > +int recur_fn (int i) > +{ > + int j; > + > + if (i == 6) > + { > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + fn(); > + return 10; > + } > + > + data[i] = i; > + > + for (j = 0; j < 100; j++) > + recur_fn (i + 1); > + > + return i; > +} > + > +int main () > +{ > + int i; > + > + for (i = 0; i < 100; i++) > + recur_fn (1) + recur_fn (-5); > + > + return 1; > +} > + > +/* { dg-final { scan-ipa-dump-times "Creating a specialized node of > recur_fn/\[0-9\]*\\." 12 "cp" } } */ >