Hi,

PR 50744 is an issue with an integer overflow when we propagate the
estimated size and time effects from callees to callers.  Because such
paths in the parameter value graph can be arbitrarily long, we simply
need to introduce an artificial cap on these values.  This is what the
patch below does.  The cap should be more than enough for any
reasonable values.

Moreover, I have looked at how we then process the accumulated
estimates and decided to compute evaluation ratio in
good_cloning_opportunity_p in HOST_WIDEST_INT.  Call graph frequencies
are numerators of fractions with denominator 1000 and therefore
capping the size and cost estimate as well as the frequency sums so
that their multiplication would not overflow an int seems too
constraining on 32bit hosts.

Bootstrapped and tested on x86_64-linux without any problems, OK for
trunk?

Thanks,

Martin



2011-11-15  Martin Jambor  <mjam...@suse.cz>

        PR tree-optimization/50744
        * ipa-cp.c (good_cloning_opportunity_p): Assert size_cost is positive,
        compute evaluation in HOST_WIDEST_INT.
        (safe_add): New function
        (propagate_effects): Use safe_add to accumulate effects.

        * testsuite/gcc.dg/ipa/pr50744.c: New test.


Index: src/gcc/testsuite/gcc.dg/ipa/pr50744.c
===================================================================
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/ipa/pr50744.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-optimize-sibling-calls" } */
+
+extern int use_data (void *p_01, void *p_02, void *p_03, void *p_04, void 
*p_05,
+                    void *p_06, void *p_07, void *p_08, void *p_09, void *p_10,
+                    void *p_11, void *p_12, void *p_13, void *p_14, void *p_15,
+                    void *p_16, void *p_17, void *p_18, void *p_19, void *p_20,
+                    void *p_21, void *p_22, void *p_23, void *p_24, void *p_25,
+                    void *p_26, void *p_27, void *p_28, void *p_29,
+                    void *p_30);
+
+extern int idx (int i, int j, int n);
+
+struct stuff
+{
+  int decision;
+  int *a, *b, *c;
+  int res;
+};
+
+
+#define some_large_stuff(stuff, n) { \
+  int i, j, k; \
+  for (i = 0; i < n; i++) \
+    for (j = 0; j < n; j++) \
+      { \
+       int v = stuff->c[idx(i, j, n)]; \
+       for (k = 0; k < n; k++) \
+         v += stuff->a[idx(i, k, n)] * stuff->b[idx(k,j,n)]; \
+       stuff->c[idx(i, j, n)] = v; \
+      } \
+}
+
+#define recursion if (iter > 0) \
+    foo (stuff, iter - 1, (void *) -1, p_01, p_02, p_03, p_04, p_05, p_06, \
+      p_07, p_08, p_09, p_10, p_11, p_12, p_13, p_14, p_15, p_16, p_17, \
+     p_18, p_19, p_20, p_21, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29); \
+    else \
+      foo (stuff, iter, p_01, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, \
+       p_10, p_11, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, \
+        p_21,p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30)
+
+void
+foo (struct stuff *stuff,
+     int iter,
+     void *p_01, void *p_02, void *p_03, void *p_04, void *p_05,
+     void *p_06, void *p_07, void *p_08, void *p_09, void *p_10,
+     void *p_11, void *p_12, void *p_13, void *p_14, void *p_15,
+     void *p_16, void *p_17, void *p_18, void *p_19, void *p_20,
+     void *p_21, void *p_22, void *p_23, void *p_24, void *p_25,
+     void *p_26, void *p_27, void *p_28, void *p_29, void *p_30)
+{
+ switch (stuff->decision)
+   {
+   case 0:
+     some_large_stuff (stuff, 83);
+     stuff->res =
+       use_data (p_01, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10,
+                p_11, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20,
+                p_21, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30);
+     recursion;
+     break;
+
+   case 1:
+     some_large_stuff (stuff, 25);
+     stuff->res =
+       use_data (p_11, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10,
+                p_21, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20,
+                p_01, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30);
+     recursion;
+     break;
+
+   case 3:
+     some_large_stuff (stuff, 139);
+     stuff->res =
+       use_data (p_01, p_12, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10,
+                p_11, p_22, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20,
+                p_21, p_02, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30);
+     recursion;
+     break;
+
+   case 4:
+     some_large_stuff (stuff, 32);
+     stuff->res =
+       use_data (p_01, p_02, p_13, p_04, p_05, p_06, p_07, p_08, p_09, p_10,
+                p_11, p_12, p_23, p_14, p_15, p_16, p_17, p_18, p_19, p_20,
+                p_21, p_22, p_03, p_24, p_25, p_26, p_27, p_28, p_29, p_30);
+     recursion;
+     break;
+
+   case 5:
+     some_large_stuff (stuff, 205);
+     stuff->res =
+       use_data (p_01, p_02, p_03, p_04, p_15, p_06, p_07, p_08, p_09, p_10,
+                p_11, p_12, p_13, p_14, p_25, p_16, p_17, p_18, p_19, p_20,
+                p_21, p_22, p_23, p_24, p_05, p_26, p_27, p_28, p_29, p_30);
+     recursion;
+     break;
+
+   case 6:
+     some_large_stuff (stuff, 64);
+     stuff->res =
+       use_data (p_01, p_02, p_03, p_04, p_05, p_16, p_07, p_08, p_09, p_10,
+                p_11, p_12, p_13, p_14, p_15, p_26, p_17, p_18, p_19, p_20,
+                p_21, p_22, p_23, p_24, p_25, p_06, p_27, p_28, p_29, p_30);
+     recursion;
+     break;
+   }
+}
+
+#define NULL (void *)0
+
+void
+bar (struct stuff *stuff, int iter)
+{
+  foo (stuff, iter, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+       NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+       NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+}
Index: src/gcc/ipa-cp.c
===================================================================
--- src.orig/gcc/ipa-cp.c
+++ src/gcc/ipa-cp.c
@@ -1225,19 +1229,19 @@ good_cloning_opportunity_p (struct cgrap
       || !optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
     return false;
 
-  gcc_checking_assert (size_cost >= 0);
+  gcc_assert (size_cost > 0);
 
-  /* FIXME:  These decisions need tuning.  */
   if (max_count)
     {
-      int evaluation, factor = (count_sum * 1000) / max_count;
-
-      evaluation = (time_benefit * factor) / size_cost;
+      int factor = (count_sum * 1000) / max_count;
+      HOST_WIDEST_INT evaluation = (((HOST_WIDEST_INT) time_benefit * factor)
+                                   / size_cost);
 
       if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file, "     good_cloning_opportunity_p (time: %i, "
                 "size: %i, count_sum: " HOST_WIDE_INT_PRINT_DEC
-                ") -> evaluation: %i, threshold: %i\n",
+                ") -> evaluation: " HOST_WIDEST_INT_PRINT_DEC
+                ", threshold: %i\n",
                 time_benefit, size_cost, (HOST_WIDE_INT) count_sum,
                 evaluation, 500);
 
@@ -1245,11 +1249,13 @@ good_cloning_opportunity_p (struct cgrap
     }
   else
     {
-      int evaluation = (time_benefit * freq_sum) / size_cost;
+      HOST_WIDEST_INT evaluation = (((HOST_WIDEST_INT) time_benefit * freq_sum)
+                                   / size_cost);
 
       if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file, "     good_cloning_opportunity_p (time: %i, "
-                "size: %i, freq_sum: %i) -> evaluation: %i, threshold: %i\n",
+                "size: %i, freq_sum: %i) -> evaluation: "
+                HOST_WIDEST_INT_PRINT_DEC ", threshold: %i\n",
                 time_benefit, size_cost, freq_sum, evaluation,
                 CGRAPH_FREQ_BASE /2);
 
@@ -1574,6 +1580,20 @@ propagate_constants_topo (struct topo_in
     }
 }
 
+
+/* Return the sum of A and B if none of them is bigger than INT_MAX/2, return
+   the bigger one otherwise.  */
+
+static int
+safe_add (int a, int b)
+{
+  if (a > INT_MAX/2 || b > INT_MAX/2)
+    return a > b ? a : b;
+  else
+    return a + b;
+}
+
+
 /* Propagate the estimated effects of individual values along the topological
    from the dependant values to those they depend on.  */
 
@@ -1590,8 +1610,9 @@ propagate_effects (void)
 
       for (val = base; val; val = val->scc_next)
        {
-         time += val->local_time_benefit + val->prop_time_benefit;
-         size += val->local_size_cost + val->prop_size_cost;
+         time = safe_add (time,
+                          val->local_time_benefit + val->prop_time_benefit);
+         size = safe_add (size, val->local_size_cost + val->prop_size_cost);
        }
 
       for (val = base; val; val = val->scc_next)
@@ -1599,8 +1620,10 @@ propagate_effects (void)
          if (src->val
              && cgraph_maybe_hot_edge_p (src->cs))
            {
-             src->val->prop_time_benefit += time;
-             src->val->prop_size_cost += size;
+             src->val->prop_time_benefit = safe_add (time,
+                                               src->val->prop_time_benefit);
+             src->val->prop_size_cost = safe_add (size,
+                                                  src->val->prop_size_cost);
            }
     }
 }

Reply via email to