Hi,
this patch implements heuristics that increases inline limits (by the hints
mechanism) for inline functions that use builtin_constant_p on parameter. Those
are very likely intended to be always inlined and simplify after inlining.

The PR is about a function that we used to inline with
 --param inline-insns-single=200 but with new default of 70 for -O2 we no longer
do so.  Hints are currently configured to bump the bound up twice, so we
get limit of 140 that is still not enough to inline the particular testcase
but it should help in general.  I can implement a stronger bump if that seems
useful (maybe it is). The example is bit operation written as a decision chain
with 64 conditions:

return ( __builtin_constant_p((size) - 1) ? ( __builtin_constant_p((size) - 1)
? ( ((size) - 1) < 2 ? 0 : ((size) - 1) & (1ULL << 63) ? 63 : ((size) - 1) &
(1ULL << 62) ? 62 : ((size) - 1) & (1ULL << 61) ? 61 : ((size) - 1) & (1ULL <<
60) ? 60 : ((size) - 1) & (1ULL << 59) ? 59 : ((size) - 1) & (1ULL << 58) ? 58
: ((size) - 1) & (1ULL << 57) ? 57 : ((size) - 1) & (1ULL << 56) ? 56 : ((size)
- 1) & (1ULL << 55) ? 55 : ((size) - 1) & (1ULL << 54) ? 54 : ((size) - 1) &
(1ULL << 53) ? 53 : ((size) - 1) & (1ULL << 52) ? 52 : ((size) - 1) & (1ULL <<
51) ? 51 : ((size) - 1) & (1ULL << 50) ? 50 : ((size) - 1) & (1ULL << 49) ? 49
: ((size) - 1) & (1ULL << 48) ? 48 : ((size) - 1) & (1ULL << 47) ? 47 : ((size)
- 1) & (1ULL << 46) ? 46 : ((size) - 1) & (1ULL << 45) ? 45 : ((size) - 1) &
(1ULL << 44) ? 44 : ((size) - 1) & (1ULL << 43) ? 43 : ((size) - 1) & (1ULL <<
42) ? 42 : ((size) - 1) & (1ULL << 41) ? 41 : ((size) - 1) & (1ULL << 40) ? 40
: ((size) - 1) & (1ULL << 39) ? 39 : ((size) - 1) & (1ULL << 38) ? 38 : ((size)
- 1) & (1ULL << 37) ? 37 : ((size) - 1) & (1ULL << 36) ? 36 : ((size) - 1) &
(1ULL << 35) ? 35 : ((size) - 1) & (1ULL << 34) ? 34 : ((size) - 1) & (1ULL <<
33) ? 33 : ((size) - 1) & (1ULL << 32) ? 32 : ((size) - 1) & (1ULL << 31) ? 31
: ((size) - 1) & (1ULL << 30) ? 30 : ((size) - 1) & (1ULL << 29) ? 29 : ((size)
- 1) & (1ULL << 28) ? 28 : ((size) - 1) & (1ULL << 27) ? 27 : ((size) - 1) &
(1ULL << 26) ? 26 : ((size) - 1) & (1ULL << 25) ? 25 : ((size) - 1) & (1ULL <<
24) ? 24 : ((size) - 1) & (1ULL << 23) ? 23 : ((size) - 1) & (1ULL << 22) ? 22
: ((size) - 1) & (1ULL << 21) ? 21 : ((size) - 1) & (1ULL << 20) ? 20 : ((size)
- 1) & (1ULL << 19) ? 19 : ((size) - 1) & (1ULL << 18) ? 18 : ((size) - 1) &
(1ULL << 17) ? 17 : ((size) - 1) & (1ULL << 16) ? 16 : ((size) - 1) & (1ULL <<
15) ? 15 : ((size) - 1) & (1ULL << 14) ? 14 : ((size) - 1) & (1ULL << 13) ? 13
: ((size) - 1) & (1ULL << 12) ? 12 : ((size) - 1) & (1ULL << 11) ? 11 : ((size)
- 1) & (1ULL << 10) ? 10 : ((size) - 1) & (1ULL << 9) ? 9 : ((size) - 1) &
(1ULL << 8) ? 8 : ((size) - 1) & (1ULL << 7) ? 7 : ((size) - 1) & (1ULL << 6) ?
6 : ((size) - 1) & (1ULL << 5) ? 5 : ((size) - 1) & (1ULL << 4) ? 4 : ((size) -
1) & (1ULL << 3) ? 3 : ((size) - 1) & (1ULL << 2) ? 2 : 1) : -1) :
(sizeof((size) - 1) <= 4) ? __ilog2_u32((size) - 1) : __ilog2_u64((size) - 1) )
- 12 + 1;

This blows up the limit on number of conditions we track per funtion (which is
30) and thus the size/time estimates are not working that well.

Bootstrapped/regtsted x86_64-linux, will commit it after bit more testing.

gcc/ChangeLog:

2020-10-21  Jan Hubicka  <hubi...@ucw.cz>

        PR ipa/97445
        * ipa-fnsummary.c (ipa_dump_hints): Add INLINE_HINT_builtin_constant_p.
        (ipa_fn_summary::~ipa_fn_summary): Free builtin_constant_p_parms.
        (ipa_fn_summary_t::duplicate): Duplicate builtin_constant_p_parms.
        (ipa_dump_fn_summary): Dump builtin_constant_p_parms.
        (add_builtin_constant_p_parm): New function
        (set_cond_stmt_execution_predicate): Update builtin_constant_p_parms.
        (ipa_call_context::estimate_size_and_time): Set 
        INLINE_HINT_builtin_constant_p..
        (ipa_merge_fn_summary_after_inlining): Merge builtin_constant_p_parms.
        (inline_read_section): Read builtin_constant_p_parms.
        (ipa_fn_summary_write): Write builtin_constant_p_parms.
        * ipa-fnsummary.h (enum ipa_hints_vals): Add
        INLINE_HINT_builtin_constant_p.
        * ipa-inline.c (want_inline_small_function_p): Use
        INLINE_HINT_builtin_constant_p.
        (edge_badness): Use INLINE_HINT_builtin_constant_p.

gcc/testsuite/ChangeLog:

2020-10-21  Jan Hubicka  <hubi...@ucw.cz>

        PR ipa/97445
        * gcc.dg/ipa/inlinehint-5.c: New test.

diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
index 9e3eda4d3cb..eb7467a8d52 100644
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -141,6 +141,11 @@ ipa_dump_hints (FILE *f, ipa_hints hints)
       hints &= ~INLINE_HINT_known_hot;
       fprintf (f, " known_hot");
     }
+  if (hints & INLINE_HINT_builtin_constant_p)
+    {
+      hints &= ~INLINE_HINT_builtin_constant_p;
+      fprintf (f, " builtin_constant_p");
+    }
   gcc_assert (!hints);
 }
 
@@ -751,6 +756,7 @@ ipa_fn_summary::~ipa_fn_summary ()
   vec_free (call_size_time_table);
   vec_free (loop_iterations);
   vec_free (loop_strides);
+  vec_free (builtin_constant_p_parms);
 }
 
 void
@@ -908,6 +914,16 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
       info->loop_strides
        = remap_freqcounting_preds_after_dup (info->loop_strides,
                                              possible_truths);
+      if (info->builtin_constant_p_parms)
+       {
+         vec <int, va_gc> *parms = info->builtin_constant_p_parms;
+         int *ip;
+         info->builtin_constant_p_parms = NULL;
+         for (i = 0; vec_safe_iterate (parms,
+              i, &ip); i++)
+           if (avals.m_known_vals[*ip])
+             vec_safe_push (info->builtin_constant_p_parms, *ip);
+       }
 
       /* If inliner or someone after inliner will ever start producing
          non-trivial clones, we will get trouble with lack of information
@@ -921,6 +937,10 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
       info->loop_iterations = vec_safe_copy (info->loop_iterations);
       info->loop_strides = vec_safe_copy (info->loop_strides);
 
+      if (info->builtin_constant_p_parms)
+       info->builtin_constant_p_parms
+            = vec_safe_copy (info->builtin_constant_p_parms);
+
       ipa_freqcounting_predicate *f;
       for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
        {
@@ -1066,6 +1086,13 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
            fprintf (f, " inlinable");
          if (s->fp_expressions)
            fprintf (f, " fp_expression");
+         if (s->builtin_constant_p_parms)
+           {
+             fprintf (f, " builtin_constant_p_parms");
+             for (unsigned int i = 0;
+                  i < s->builtin_constant_p_parms->length (); i++)
+               fprintf (f, " %i", (*s->builtin_constant_p_parms)[i]);
+           }
          fprintf (f, "\n  global time:     %f\n", s->time.to_double ());
          fprintf (f, "  self size:       %i\n", ss->self_size);
          fprintf (f, "  global size:     %i\n", ss->size);
@@ -1517,6 +1544,21 @@ fail:
   return false;
 }
 
+/* Record to SUMMARY that PARM is used by builtin_constant_p.  */
+
+static void
+add_builtin_constant_p_parm (class ipa_fn_summary *summary, int parm)
+{
+  int *ip;
+
+  /* Avoid duplicates.  */
+  for (unsigned int i = 0;
+       vec_safe_iterate (summary->builtin_constant_p_parms, i, &ip); i++)
+    if (*ip == parm)
+      return;
+  vec_safe_push (summary->builtin_constant_p_parms, parm);
+}
+
 /* If BB ends by a conditional we can turn into predicates, attach 
corresponding
    predicates to the CFG edges.   */
 
@@ -1598,6 +1640,8 @@ set_cond_stmt_execution_predicate (struct 
ipa_func_body_info *fbi,
   op2 = gimple_call_arg (set_stmt, 0);
   if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
     return;
+  if (!aggpos.by_ref)
+    add_builtin_constant_p_parm (summary, index);
   FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
     {
       predicate p = add_condition (summary, params_summary, index,
@@ -3717,6 +3761,9 @@ ipa_call_context::estimate_size_and_time 
(ipa_call_estimates *estimates,
        hints |= INLINE_HINT_in_scc;
       if (DECL_DECLARED_INLINE_P (m_node->decl))
        hints |= INLINE_HINT_declared_inline;
+      if (info->builtin_constant_p_parms
+         && DECL_DECLARED_INLINE_P (m_node->decl))
+       hints |= INLINE_HINT_builtin_constant_p;
 
       ipa_freqcounting_predicate *fcp;
       for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
@@ -4044,8 +4091,14 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge 
*edge)
          operand_map[i] = map;
          gcc_assert (map < ipa_get_param_count (params_summary));
        }
+
+      int *ip;
+      for (i = 0; vec_safe_iterate (callee_info->builtin_constant_p_parms,
+                                   i, &ip); i++)
+       if (*ip < count && operand_map[*ip] > 0)
+         add_builtin_constant_p_parm (info, operand_map[*ip]);
     }
-  sreal freq =  edge->sreal_frequency ();
+  sreal freq = edge->sreal_frequency ();
   for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
     {
       predicate p;
@@ -4443,6 +4496,15 @@ inline_read_section (struct lto_file_decl_data 
*file_data, const char *data,
              vec_safe_push (info->loop_strides, fcp);
            }
        }
+      count2 = streamer_read_uhwi (&ib);
+      if (info && count2)
+       vec_safe_reserve_exact (info->builtin_constant_p_parms, count2);
+      for (j = 0; j < count2; j++)
+       {
+         int parm = streamer_read_uhwi (&ib);
+         if (info)
+           info->builtin_constant_p_parms->quick_push (parm);
+       }
       for (e = node->callees; e; e = e->next_callee)
        read_ipa_call_summary (&ib, e, info != NULL);
       for (e = node->indirect_calls; e; e = e->next_callee)
@@ -4618,6 +4680,13 @@ ipa_fn_summary_write (void)
              fcp->predicate->stream_out (ob);
              fcp->freq.stream_out (ob);
            }
+         streamer_write_uhwi (ob,
+                              vec_safe_length
+                                (info->builtin_constant_p_parms));
+         int *ip;
+         for (i = 0; vec_safe_iterate (info->builtin_constant_p_parms, i, &ip);
+              i++)
+           streamer_write_uhwi (ob, *ip);
          for (edge = cnode->callees; edge; edge = edge->next_callee)
            write_ipa_call_summary (ob, edge);
          for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
index f4dd5b85ab9..4c4a90dd622 100644
--- a/gcc/ipa-fnsummary.h
+++ b/gcc/ipa-fnsummary.h
@@ -49,7 +49,10 @@ enum ipa_hints_vals {
      Set by simple_edge_hints in ipa-inline-analysis.c.   */
   INLINE_HINT_cross_module = 64,
   /* We know that the callee is hot by profile.  */
-  INLINE_HINT_known_hot = 128
+  INLINE_HINT_known_hot = 128,
+  /* There is builtin_constant_p dependent on parameter which is usually
+     a strong hint to inline.  */
+  INLINE_HINT_builtin_constant_p = 256
 };
 
 typedef int ipa_hints;
@@ -123,10 +126,12 @@ public:
   ipa_fn_summary ()
     : min_size (0),
       inlinable (false), single_caller (false),
-      fp_expressions (false), estimated_stack_size (false),
+      fp_expressions (false),
+      estimated_stack_size (false),
       time (0), conds (NULL),
       size_time_table (NULL), call_size_time_table (NULL),
       loop_iterations (NULL), loop_strides (NULL),
+      builtin_constant_p_parms (NULL),
       growth (0), scc_no (0)
   {
   }
@@ -140,6 +145,7 @@ public:
     time (s.time), conds (s.conds), size_time_table (s.size_time_table),
     call_size_time_table (NULL),
     loop_iterations (s.loop_iterations), loop_strides (s.loop_strides),
+    builtin_constant_p_parms (s.builtin_constant_p_parms),
     growth (s.growth), scc_no (s.scc_no)
   {}
 
@@ -182,6 +188,8 @@ public:
   vec<ipa_freqcounting_predicate, va_gc> *loop_iterations;
   /* Predicates on when some loops in the function can have known strides.  */
   vec<ipa_freqcounting_predicate, va_gc> *loop_strides;
+  /* Parameters tested by builtin_constant_p.  */
+  vec<int, va_gc> * GTY((skip)) builtin_constant_p_parms;
   /* Estimated growth for inlining all copies of the function before start
      of small functions inlining.
      This value will get out of date as the callers are duplicated, but
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index 225a0140725..99e6002149b 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -878,7 +878,8 @@ want_inline_small_function_p (struct cgraph_edge *e, bool 
report)
       bool apply_hints = (hints & (INLINE_HINT_indirect_call
                                   | INLINE_HINT_known_hot
                                   | INLINE_HINT_loop_iterations
-                                  | INLINE_HINT_loop_stride));
+                                  | INLINE_HINT_loop_stride
+                                  | INLINE_HINT_builtin_constant_p));
 
       if (growth <= opt_for_fn (to->decl,
                                param_max_inline_insns_size))
@@ -1314,7 +1315,8 @@ edge_badness (struct cgraph_edge *edge, bool dump)
     badness = badness.shift (badness > 0 ? 4 : -4);
   if ((hints & (INLINE_HINT_indirect_call
                | INLINE_HINT_loop_iterations
-               | INLINE_HINT_loop_stride))
+               | INLINE_HINT_loop_stride
+               | INLINE_HINT_builtin_constant_p))
       || callee_info->growth <= 0)
     badness = badness.shift (badness > 0 ? -2 : 2);
   if (hints & (INLINE_HINT_same_scc))
diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c 
b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c
new file mode 100644
index 00000000000..3dd3a11dd3e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c
@@ -0,0 +1,33 @@
+/* { dg-options "-O2 -fdump-ipa-inline-details -fno-early-inlining " } */
+/* { dg-add-options bind_pic_locally } */
+int j,k,l;
+int test3(int);
+int test4(int);
+
+static inline int
+test2(int i)
+{
+  if (__builtin_constant_p (i))
+    {
+       switch (i)
+       {
+       case 1: return j;
+       case 2: return k;
+       case 3: return l;
+       }
+    }
+  else return test3(i)+test4(i);
+}
+
+static inline int
+test (int i)
+{
+  return test2(i) + test2(i+1) + test3 (i) + test3(i) + test3(i);
+}
+
+int
+run (int i)
+{
+   return test (i);
+}
+/* { dg-final { scan-ipa-dump-times "hints: declared_inline 
builtin_constant_p" 3 "inline"  } } */

Reply via email to