Hi, PR 50744 is an issue with an integer overflow when we propagate the estimated size and time effects from callees to callers. Because such paths in the parameter value graph can be arbitrarily long, we simply need to introduce an artificial cap on these values. This is what the patch below does. The cap should be more than enough for any reasonable values.
Moreover, I have looked at how we then process the accumulated estimates and decided to compute evaluation ratio in good_cloning_opportunity_p in HOST_WIDEST_INT. Call graph frequencies are numerators of fractions with denominator 1000 and therefore capping the size and cost estimate as well as the frequency sums so that their multiplication would not overflow an int seems too constraining on 32bit hosts. Bootstrapped and tested on x86_64-linux without any problems, OK for trunk? Thanks, Martin 2011-11-15 Martin Jambor <mjam...@suse.cz> PR tree-optimization/50744 * ipa-cp.c (good_cloning_opportunity_p): Assert size_cost is positive, compute evaluation in HOST_WIDEST_INT. (safe_add): New function (propagate_effects): Use safe_add to accumulate effects. * testsuite/gcc.dg/ipa/pr50744.c: New test. Index: src/gcc/testsuite/gcc.dg/ipa/pr50744.c =================================================================== --- /dev/null +++ src/gcc/testsuite/gcc.dg/ipa/pr50744.c @@ -0,0 +1,119 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-optimize-sibling-calls" } */ + +extern int use_data (void *p_01, void *p_02, void *p_03, void *p_04, void *p_05, + void *p_06, void *p_07, void *p_08, void *p_09, void *p_10, + void *p_11, void *p_12, void *p_13, void *p_14, void *p_15, + void *p_16, void *p_17, void *p_18, void *p_19, void *p_20, + void *p_21, void *p_22, void *p_23, void *p_24, void *p_25, + void *p_26, void *p_27, void *p_28, void *p_29, + void *p_30); + +extern int idx (int i, int j, int n); + +struct stuff +{ + int decision; + int *a, *b, *c; + int res; +}; + + +#define some_large_stuff(stuff, n) { \ + int i, j, k; \ + for (i = 0; i < n; i++) \ + for (j = 0; j < n; j++) \ + { \ + int v = stuff->c[idx(i, j, n)]; \ + for (k = 0; k < n; k++) \ + v += stuff->a[idx(i, k, n)] * stuff->b[idx(k,j,n)]; \ + stuff->c[idx(i, j, n)] = v; \ + } \ +} + +#define recursion if (iter > 0) \ + foo (stuff, iter - 1, (void *) -1, p_01, p_02, p_03, p_04, p_05, p_06, \ + p_07, p_08, p_09, p_10, p_11, p_12, p_13, p_14, p_15, p_16, p_17, \ + p_18, p_19, p_20, p_21, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29); \ + else \ + foo (stuff, iter, p_01, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, \ + p_10, p_11, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, \ + p_21,p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30) + +void +foo (struct stuff *stuff, + int iter, + void *p_01, void *p_02, void *p_03, void *p_04, void *p_05, + void *p_06, void *p_07, void *p_08, void *p_09, void *p_10, + void *p_11, void *p_12, void *p_13, void *p_14, void *p_15, + void *p_16, void *p_17, void *p_18, void *p_19, void *p_20, + void *p_21, void *p_22, void *p_23, void *p_24, void *p_25, + void *p_26, void *p_27, void *p_28, void *p_29, void *p_30) +{ + switch (stuff->decision) + { + case 0: + some_large_stuff (stuff, 83); + stuff->res = + use_data (p_01, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10, + p_11, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, + p_21, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30); + recursion; + break; + + case 1: + some_large_stuff (stuff, 25); + stuff->res = + use_data (p_11, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10, + p_21, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, + p_01, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30); + recursion; + break; + + case 3: + some_large_stuff (stuff, 139); + stuff->res = + use_data (p_01, p_12, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10, + p_11, p_22, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, + p_21, p_02, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30); + recursion; + break; + + case 4: + some_large_stuff (stuff, 32); + stuff->res = + use_data (p_01, p_02, p_13, p_04, p_05, p_06, p_07, p_08, p_09, p_10, + p_11, p_12, p_23, p_14, p_15, p_16, p_17, p_18, p_19, p_20, + p_21, p_22, p_03, p_24, p_25, p_26, p_27, p_28, p_29, p_30); + recursion; + break; + + case 5: + some_large_stuff (stuff, 205); + stuff->res = + use_data (p_01, p_02, p_03, p_04, p_15, p_06, p_07, p_08, p_09, p_10, + p_11, p_12, p_13, p_14, p_25, p_16, p_17, p_18, p_19, p_20, + p_21, p_22, p_23, p_24, p_05, p_26, p_27, p_28, p_29, p_30); + recursion; + break; + + case 6: + some_large_stuff (stuff, 64); + stuff->res = + use_data (p_01, p_02, p_03, p_04, p_05, p_16, p_07, p_08, p_09, p_10, + p_11, p_12, p_13, p_14, p_15, p_26, p_17, p_18, p_19, p_20, + p_21, p_22, p_23, p_24, p_25, p_06, p_27, p_28, p_29, p_30); + recursion; + break; + } +} + +#define NULL (void *)0 + +void +bar (struct stuff *stuff, int iter) +{ + foo (stuff, iter, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +} Index: src/gcc/ipa-cp.c =================================================================== --- src.orig/gcc/ipa-cp.c +++ src/gcc/ipa-cp.c @@ -1225,19 +1229,19 @@ good_cloning_opportunity_p (struct cgrap || !optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl))) return false; - gcc_checking_assert (size_cost >= 0); + gcc_assert (size_cost > 0); - /* FIXME: These decisions need tuning. */ if (max_count) { - int evaluation, factor = (count_sum * 1000) / max_count; - - evaluation = (time_benefit * factor) / size_cost; + int factor = (count_sum * 1000) / max_count; + HOST_WIDEST_INT evaluation = (((HOST_WIDEST_INT) time_benefit * factor) + / size_cost); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " "size: %i, count_sum: " HOST_WIDE_INT_PRINT_DEC - ") -> evaluation: %i, threshold: %i\n", + ") -> evaluation: " HOST_WIDEST_INT_PRINT_DEC + ", threshold: %i\n", time_benefit, size_cost, (HOST_WIDE_INT) count_sum, evaluation, 500); @@ -1245,11 +1249,13 @@ good_cloning_opportunity_p (struct cgrap } else { - int evaluation = (time_benefit * freq_sum) / size_cost; + HOST_WIDEST_INT evaluation = (((HOST_WIDEST_INT) time_benefit * freq_sum) + / size_cost); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " - "size: %i, freq_sum: %i) -> evaluation: %i, threshold: %i\n", + "size: %i, freq_sum: %i) -> evaluation: " + HOST_WIDEST_INT_PRINT_DEC ", threshold: %i\n", time_benefit, size_cost, freq_sum, evaluation, CGRAPH_FREQ_BASE /2); @@ -1574,6 +1580,20 @@ propagate_constants_topo (struct topo_in } } + +/* Return the sum of A and B if none of them is bigger than INT_MAX/2, return + the bigger one otherwise. */ + +static int +safe_add (int a, int b) +{ + if (a > INT_MAX/2 || b > INT_MAX/2) + return a > b ? a : b; + else + return a + b; +} + + /* Propagate the estimated effects of individual values along the topological from the dependant values to those they depend on. */ @@ -1590,8 +1610,9 @@ propagate_effects (void) for (val = base; val; val = val->scc_next) { - time += val->local_time_benefit + val->prop_time_benefit; - size += val->local_size_cost + val->prop_size_cost; + time = safe_add (time, + val->local_time_benefit + val->prop_time_benefit); + size = safe_add (size, val->local_size_cost + val->prop_size_cost); } for (val = base; val; val = val->scc_next) @@ -1599,8 +1620,10 @@ propagate_effects (void) if (src->val && cgraph_maybe_hot_edge_p (src->cs)) { - src->val->prop_time_benefit += time; - src->val->prop_size_cost += size; + src->val->prop_time_benefit = safe_add (time, + src->val->prop_time_benefit); + src->val->prop_size_cost = safe_add (size, + src->val->prop_size_cost); } } }