from:"Mark Heffernan"

[PATCH] PR middle-end/38509: add -Wfree-nonheap-object warning option

2011-08-12 Thread Mark Heffernan

This patch adds an option for enabling/disabling the warning for
attempting to free nonheap objects (PR/38509).  The warning is
imprecise and can issue false positives.

Bootstrapped on x86-64.  Ok for trunk?

Mark

2011-08-11  Mark Heffernan  

PR middle-end/38509
* common.opt (Wfree-nonheap-object): New option.
* doc/invoke.texi (Warning options): Document -Wfree-nonheap-object.
* builtins.c (maybe_emit_free_warning): Add OPT_Wfree_nonheap_object
to warning.



Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi (revision 177684)
+++ gcc/doc/invoke.texi (working copy)
@@ -244,7 +244,8 @@ Objective-C and Objective-C++ Dialects}.
 -Wfatal-errors  -Wfloat-equal  -Wformat  -Wformat=2 @gol
 -Wno-format-contains-nul -Wno-format-extra-args -Wformat-nonliteral @gol
 -Wformat-security  -Wformat-y2k @gol
--Wframe-larger-than=@var{len} -Wjump-misses-init -Wignored-qualifiers @gol
+-Wframe-larger-than=@var{len} -Wno-free-nonheap-object -Wjump-misses-init @gol
+-Wignored-qualifiers @gol
 -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int @gol
 -Winit-self  -Winline -Wmaybe-uninitialized @gol
 -Wno-int-to-pointer-cast -Wno-invalid-offsetof @gol
@@ -3960,6 +3961,12 @@ via @code{alloca}, variable-length array
 is not included by the compiler when determining
 whether or not to issue a warning.

+@item -Wno-free-nonheap-object
+@opindex Wno-free-nonheap-object
+@opindex Wfree-nonheap-object
+Do not warn when attempting to free an object which was not allocated
+on the heap.
+
 @item -Wstack-usage=@var{len}
 @opindex Wstack-usage
 Warn if the stack usage of a function might be larger than @var{len} bytes.

Index: gcc/builtins.c
===
--- gcc/builtins.c  (revision 177684)
+++ gcc/builtins.c  (working copy)
@@ -6087,7 +6087,8 @@ expand_builtin (tree exp, rtx target, rt
   break;

 case BUILT_IN_FREE:
-  maybe_emit_free_warning (exp);
+  if (warn_free_nonheap_object)
+   maybe_emit_free_warning (exp);
   break;

 default:   /* just do library call, if unknown builtin */
@@ -11863,11 +11864,11 @@ maybe_emit_free_warning (tree exp)
 return;

   if (SSA_VAR_P (arg))
-warning_at (tree_nonartificial_location (exp),
-   0, "%Kattempt to free a non-heap object %qD", exp, arg);
+warning_at (tree_nonartificial_location (exp), OPT_Wfree_nonheap_object,
+   "%Kattempt to free a non-heap object %qD", exp, arg);
   else
-warning_at (tree_nonartificial_location (exp),
-   0, "%Kattempt to free a non-heap object", exp);
+warning_at (tree_nonartificial_location (exp), OPT_Wfree_nonheap_object,
+   "%Kattempt to free a non-heap object", exp);
 }

 /* Fold a call to __builtin_object_size with arguments PTR and OST,
Index: gcc/common.opt
===
--- gcc/common.opt  (revision 177684)
+++ gcc/common.opt  (working copy)
@@ -543,6 +543,10 @@ Wframe-larger-than=
 Common RejectNegative Joined UInteger
 -Wframe-larger-than=   Warn if a function's stack frame
requires more than  bytes

+Wfree-nonheap-object
+Common Var(warn_free_nonheap_object) Init(1) Warning
+Warn when attempting to free a non-heap object
+
 Winline
 Common Var(warn_inline) Warning
 Warn when an inlined function cannot be inlined

Re: [PATCH] PR middle-end/38509: add -Wfree-nonheap-object warning option

2011-08-21 Thread Mark Heffernan

Ping?

Mark

On Fri, Aug 12, 2011 at 9:41 AM, Mark Heffernan  wrote:
> This patch adds an option for enabling/disabling the warning for
> attempting to free nonheap objects (PR/38509).  The warning is
> imprecise and can issue false positives.
>
> Bootstrapped on x86-64.  Ok for trunk?
>
> Mark
>
> 2011-08-11  Mark Heffernan  
>
>        PR middle-end/38509
>        * common.opt (Wfree-nonheap-object): New option.
>        * doc/invoke.texi (Warning options): Document -Wfree-nonheap-object.
>        * builtins.c (maybe_emit_free_warning): Add OPT_Wfree_nonheap_object
>        to warning.
>
>
>
> Index: gcc/doc/invoke.texi
> ===
> --- gcc/doc/invoke.texi (revision 177684)
> +++ gcc/doc/invoke.texi (working copy)
> @@ -244,7 +244,8 @@ Objective-C and Objective-C++ Dialects}.
>  -Wfatal-errors  -Wfloat-equal  -Wformat  -Wformat=2 @gol
>  -Wno-format-contains-nul -Wno-format-extra-args -Wformat-nonliteral @gol
>  -Wformat-security  -Wformat-y2k @gol
> --Wframe-larger-than=@var{len} -Wjump-misses-init -Wignored-qualifiers @gol
> +-Wframe-larger-than=@var{len} -Wno-free-nonheap-object -Wjump-misses-init 
> @gol
> +-Wignored-qualifiers @gol
>  -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int @gol
>  -Winit-self  -Winline -Wmaybe-uninitialized @gol
>  -Wno-int-to-pointer-cast -Wno-invalid-offsetof @gol
> @@ -3960,6 +3961,12 @@ via @code{alloca}, variable-length array
>  is not included by the compiler when determining
>  whether or not to issue a warning.
>
> +@item -Wno-free-nonheap-object
> +@opindex Wno-free-nonheap-object
> +@opindex Wfree-nonheap-object
> +Do not warn when attempting to free an object which was not allocated
> +on the heap.
> +
>  @item -Wstack-usage=@var{len}
>  @opindex Wstack-usage
>  Warn if the stack usage of a function might be larger than @var{len} bytes.
>
> Index: gcc/builtins.c
> ===
> --- gcc/builtins.c      (revision 177684)
> +++ gcc/builtins.c      (working copy)
> @@ -6087,7 +6087,8 @@ expand_builtin (tree exp, rtx target, rt
>       break;
>
>     case BUILT_IN_FREE:
> -      maybe_emit_free_warning (exp);
> +      if (warn_free_nonheap_object)
> +       maybe_emit_free_warning (exp);
>       break;
>
>     default:   /* just do library call, if unknown builtin */
> @@ -11863,11 +11864,11 @@ maybe_emit_free_warning (tree exp)
>     return;
>
>   if (SSA_VAR_P (arg))
> -    warning_at (tree_nonartificial_location (exp),
> -               0, "%Kattempt to free a non-heap object %qD", exp, arg);
> +    warning_at (tree_nonartificial_location (exp), OPT_Wfree_nonheap_object,
> +               "%Kattempt to free a non-heap object %qD", exp, arg);
>   else
> -    warning_at (tree_nonartificial_location (exp),
> -               0, "%Kattempt to free a non-heap object", exp);
> +    warning_at (tree_nonartificial_location (exp), OPT_Wfree_nonheap_object,
> +               "%Kattempt to free a non-heap object", exp);
>  }
>
>  /* Fold a call to __builtin_object_size with arguments PTR and OST,
> Index: gcc/common.opt
> ===
> --- gcc/common.opt      (revision 177684)
> +++ gcc/common.opt      (working copy)
> @@ -543,6 +543,10 @@ Wframe-larger-than=
>  Common RejectNegative Joined UInteger
>  -Wframe-larger-than=   Warn if a function's stack frame
> requires more than  bytes
>
> +Wfree-nonheap-object
> +Common Var(warn_free_nonheap_object) Init(1) Warning
> +Warn when attempting to free a non-heap object
> +
>  Winline
>  Common Var(warn_inline) Warning
>  Warn when an inlined function cannot be inlined
>

[google] With FDO/LIPO inline some cold callsites

2011-08-23 Thread Mark Heffernan

The following patch changes the inliner callsite filter with FDO/LIPO.
 Previously, cold callsites were unconditionally rejected.  Now the
callsite may still be inlined if the _caller_ is sufficiently hot (max
count of any bb in the function is above hot threshold).  This gives
about 0.5 - 1% geomean performance on x86-64 (depending on microarch)
on internal benchmarks with < 1% average code size increase.

Bootstrapped and reg tested.  Ok for google/gcc-4_6?

Mark

2011-08-23  Mark Heffernan  

* basic-block.h (maybe_hot_frequency_p): Add prototype.
* cgraph.c (dump_cgraph_node): Add field to dump.
(cgraph_clone_node) Handle new field.
* cgraph.h (cgraph_node): New field max_bb_count.
* cgraphbuild.c (rebuild_cgraph_edges): Compute max_bb_count.
* cgraphunit.c (cgraph_copy_node_for_versioning) Handle new field.
* common.opt (finline-hot-caller): New option.
* ipa-inline.c (cgraph_mark_inline_edge) Update max_bb_count.
(edge_hot_enough_p) New function.
(cgraph_decide_inlining_of_small_functions) Call edge_hot_enough_p.
* predict.c (maybe_hot_frequency_p): Remove static keyword and
guard with profile_info check.
* testsuite/gcc.dg/tree-prof/inliner-1.c: Add flag.
* testsuite/gcc.dg/tree-prof/lipo/inliner-1_0.c: Add flag.
Index: cgraphbuild.c
===
--- cgraphbuild.c	(revision 177964)
+++ cgraphbuild.c	(working copy)
@@ -591,9 +591,12 @@ rebuild_cgraph_edges (void)
   ipa_remove_all_references (&node->ref_list);
 
   node->count = ENTRY_BLOCK_PTR->count;
+  node->max_bb_count = 0;
 
   FOR_EACH_BB (bb)
 {
+  if (bb->count > node->max_bb_count)
+	node->max_bb_count = bb->count;
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 	{
 	  gimple stmt = gsi_stmt (gsi);
Index: cgraph.c
===
--- cgraph.c	(revision 177964)
+++ cgraph.c	(working copy)
@@ -1904,6 +1904,9 @@ dump_cgraph_node (FILE *f, struct cgraph
   if (node->count)
 fprintf (f, " executed "HOST_WIDEST_INT_PRINT_DEC"x",
 	 (HOST_WIDEST_INT)node->count);
+  if (node->max_bb_count)
+fprintf (f, " hottest bb executed "HOST_WIDEST_INT_PRINT_DEC"x",
+	 (HOST_WIDEST_INT)node->max_bb_count);
   if (node->local.inline_summary.self_time)
 fprintf (f, " %i time, %i benefit", node->local.inline_summary.self_time,
 	node->local.inline_summary.time_inlining_benefit);
@@ -2234,6 +2237,9 @@ cgraph_clone_node (struct cgraph_node *n
   new_node->global = n->global;
   new_node->rtl = n->rtl;
   new_node->count = count;
+  new_node->max_bb_count = count;
+  if (n->count)
+new_node->max_bb_count = count * n->max_bb_count / n->count;
   new_node->is_versioned_clone = n->is_versioned_clone;
   new_node->frequency = n->frequency;
   new_node->clone = n->clone;
@@ -2252,6 +2258,9 @@ cgraph_clone_node (struct cgraph_node *n
   n->count -= count;
   if (n->count < 0)
 	n->count = 0;
+  n->max_bb_count -= new_node->max_bb_count;
+  if (n->max_bb_count < 0)
+	n->max_bb_count = 0;
 }
 
   FOR_EACH_VEC_ELT (cgraph_edge_p, redirect_callers, i, e)
Index: cgraph.h
===
--- cgraph.h	(revision 177964)
+++ cgraph.h	(working copy)
@@ -235,6 +235,8 @@ struct GTY((chain_next ("%h.next"), chai
 
   /* Expected number of executions: calculated in profile.c.  */
   gcov_type count;
+  /* Maximum count of any basic block in the function.  */
+  gcov_type max_bb_count;
   /* How to scale counts at materialization time; used to merge
  LTO units with different number of profile runs.  */
   int count_materialization_scale;
Index: cgraphunit.c
===
--- cgraphunit.c	(revision 177964)
+++ cgraphunit.c	(working copy)
@@ -2187,6 +2187,7 @@ cgraph_copy_node_for_versioning (struct 
new_version->rtl = old_version->rtl;
new_version->reachable = true;
new_version->count = old_version->count;
+   new_version->max_bb_count = old_version->max_bb_count;
new_version->is_versioned_clone = true;
 
for (e = old_version->callees; e; e=e->next_callee)
Index: testsuite/gcc.dg/tree-prof/inliner-1.c
===
--- testsuite/gcc.dg/tree-prof/inliner-1.c	(revision 177964)
+++ testsuite/gcc.dg/tree-prof/inliner-1.c	(working copy)
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-inline-hot-caller -fdump-tree-optimized" } */
 int a;
 int b[100];
 void abort (void);
@@ -34,7 +34,7 @@ main ()
   return 0;
 }
 
-/* cold fu

[google] Increase hotness count fraction

2011-08-24 Thread Mark Heffernan

This patch bumps up the parameter 'hot-bb-count-fraction' from 1
to 4.  This results in about a 0.5% geomean performance
improvement across internal benchmarks for x86-64 LIPO.  The parameter
change effectively increases the number of functions/callsites which
are considered hot.  The performance improvement is likely due to
increased inlining (more callsites are considered hot and available
for inlining).

Bootstrapped and reg-tested on x86-64.  OK for google/gcc-4_6?

Mark


2011-08-24  Mark Heffernan  

* params.def (hot-bb-count-fraction): Change default value.



Index: params.def
===
--- params.def  (revision 177964)
+++ params.def  (working copy)
@@ -382,7 +382,7 @@ DEFPARAM(PARAM_SMS_LOOP_AVERAGE_COUNT_TH
 DEFPARAM(HOT_BB_COUNT_FRACTION,
 "hot-bb-count-fraction",
 "Select fraction of the maximal count of repetitions of basic
block in program given basic block needs to have to be considered
hot",
-1, 0, 0)
+4, 0, 0)
 DEFPARAM(HOT_BB_FREQUENCY_FRACTION,
 "hot-bb-frequency-fraction",
 "Select fraction of the maximal frequency of executions of
basic block in function given basic block needs to have to be
considered hot",

Re: [google] Increase hotness count fraction

2011-08-25 Thread Mark Heffernan

On Thu, Aug 25, 2011 at 6:49 AM, Ramana Radhakrishnan
 wrote:
> I know this is intended for the google branches but shouldn't such a
> change be in the x86_64 backend rather than such a general change to
> params.def .

I wouldn't consider this a backend-specific change.  The parameter
generally affects performance-space tradeoff with FDO.  A higher value
means more code will be optimized for performance rather than size
(perhaps most significantly in the inliner).  The value is too
conservative (too much optimizing for size) and leaves performance on
the table, at least for our benchmarks.  Though I've only tested
x86-64, I'd imagine other arches would benefit especially those with
relatively large I-caches where a larger code footprint can be
tolerated.  Of course YMMV...

Mark

>
> My 2 cents.
>
> cheers
> Ramana
>

[google] Pessimistic stack frame accounting during inlining

2011-06-07 Thread Mark Heffernan

This patch pessimizes stack accounting during inlining.  This enables
setting a firm stack size limit (via parameters "large-stack-frame"
and "large-stack-frame-growth").  Without this patch the inliner is
overly optimistic about potential stack reuse resulting in actual
stack frames much larger than the parameterized limits.

Internal benchmarks show minor performance differences with non-fdo
and lipo, but overall neutral.  Tested/bootstrapped on x86-64.

Ok for google-main?

Mark

2011-06-07  Mark Heffernan  
        * cgraph.h (cgraph_global_info): Remove field.
        * ipa-inline.c (cgraph_clone_inlined_nodes): Change
        stack frame computation.
        (cgraph_check_inline_limits): Ditto.
        (compute_inline_parameters): Remove dead initialization.

Index: gcc/cgraph.h
===
--- gcc/cgraph.h        (revision 174512)
+++ gcc/cgraph.h        (working copy)
@@ -136,8 +136,6 @@ struct GTY(()) cgraph_local_info {
 struct GTY(()) cgraph_global_info {
   /* Estimated stack frame consumption by the function.  */
   HOST_WIDE_INT estimated_stack_size;
-  /* Expected offset of the stack frame of inlined function.  */
-  HOST_WIDE_INT stack_frame_offset;

   /* For inline clones this points to the function they will be
      inlined into.  */
Index: gcc/ipa-inline.c
===
--- gcc/ipa-inline.c    (revision 174512)
+++ gcc/ipa-inline.c    (working copy)
@@ -229,8 +229,6 @@ void
 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
                            bool update_original)
 {
-  HOST_WIDE_INT peak;
-
   if (duplicate)
     {
       /* We may eliminate the need for out-of-line copy to be output.
@@ -279,13 +277,13 @@ cgraph_clone_inlined_nodes (struct cgrap
     e->callee->global.inlined_to = e->caller->global.inlined_to;
   else
     e->callee->global.inlined_to = e->caller;
-  e->callee->global.stack_frame_offset
-    = e->caller->global.stack_frame_offset
-      + inline_summary (e->caller)->estimated_self_stack_size;
-  peak = e->callee->global.stack_frame_offset
-      + inline_summary (e->callee)->estimated_self_stack_size;
-  if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
-    e->callee->global.inlined_to->global.estimated_stack_size = peak;
+
+  /* Pessimistically assume no sharing of stack space.  That is, the
+     frame size of a function is estimated as the original frame size
+     plus the sum of the frame sizes of all inlined callees.  */
+  e->callee->global.inlined_to->global.estimated_stack_size +=
+    inline_summary (e->callee)->estimated_self_stack_size;
+
   cgraph_propagate_frequency (e->callee);

   /* Recursively clone all bodies.  */
@@ -430,8 +428,7 @@ cgraph_check_inline_limits (struct cgrap

   stack_size_limit += stack_size_limit * PARAM_VALUE
(PARAM_STACK_FRAME_GROWTH) / 100;

-  inlined_stack = (to->global.stack_frame_offset
-                  + inline_summary (to)->estimated_self_stack_size
+  inlined_stack = (to->global.estimated_stack_size
                   + what->global.estimated_stack_size);
   if (inlined_stack  > stack_size_limit
       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
@@ -2064,7 +2061,6 @@ compute_inline_parameters (struct cgraph
   self_stack_size = optimize ? estimated_stack_frame_size (node) : 0;
   inline_summary (node)->estimated_self_stack_size = self_stack_size;
   node->global.estimated_stack_size = self_stack_size;
-  node->global.stack_frame_offset = 0;

   /* Can this function be inlined at all?  */
   node->local.inlinable = tree_inlinable_function_p (node->decl);

Re: [google] pessimize stack accounting during inlining

2011-06-09 Thread Mark Heffernan

On Wed, Jun 8, 2011 at 1:54 AM, Richard Guenther
 wrote:
> Well, it's still not a hard limit as we can't tell how many spill slots
> or extra call argument or return value slots we need.

Agreed.  It's not perfect.  But I've found this does a reasonable job
of preventing the inliner from pushing the frame size much beyond the
imposed limit especially if the limit is large (eg, many K) relative
to the typical total size of spill slots, arguments, etc.

Mark

>
> Richard.
>
> > Thanks,
> >
> > David
> >
> > On Tue, Jun 7, 2011 at 4:29 PM, Mark Heffernan  wrote:
> >> This patch pessimizes stack accounting during inlining.  This enables
> >> setting a firm stack size limit (via parameters "large-stack-frame" and
> >> "large-stack-frame-growth").  Without this patch the inliner is overly
> >> optimistic about potential stack reuse resulting in actual stack frames 
> >> much
> >> larger than the parameterized limits.
> >> Internal benchmarks show minor performance differences with non-fdo and
> >> lipo, but overall neutral.  Tested/bootstrapped on x86-64.
> >> Ok for google-main?
> >> Mark
> >>
> >> 2011-06-07  Mark Heffernan  
> >>         * cgraph.h (cgraph_global_info): Remove field.
> >>         * ipa-inline.c (cgraph_clone_inlined_nodes): Change
> >>         stack frame computation.
> >>         (cgraph_check_inline_limits): Ditto.
> >>         (compute_inline_parameters): Remove dead initialization.
> >>
> >> Index: gcc/cgraph.h
> >> ===
> >> --- gcc/cgraph.h        (revision 174512)
> >> +++ gcc/cgraph.h        (working copy)
> >> @@ -136,8 +136,6 @@ struct GTY(()) cgraph_local_info {
> >>  struct GTY(()) cgraph_global_info {
> >>    /* Estimated stack frame consumption by the function.  */
> >>    HOST_WIDE_INT estimated_stack_size;
> >> -  /* Expected offset of the stack frame of inlined function.  */
> >> -  HOST_WIDE_INT stack_frame_offset;
> >>
> >>    /* For inline clones this points to the function they will be
> >>       inlined into.  */
> >> Index: gcc/ipa-inline.c
> >> ===
> >> --- gcc/ipa-inline.c    (revision 174512)
> >> +++ gcc/ipa-inline.c    (working copy)
> >> @@ -229,8 +229,6 @@ void
> >>  cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
> >>                             bool update_original)
> >>  {
> >> -  HOST_WIDE_INT peak;
> >> -
> >>    if (duplicate)
> >>      {
> >>        /* We may eliminate the need for out-of-line copy to be output.
> >> @@ -279,13 +277,13 @@ cgraph_clone_inlined_nodes (struct cgrap
> >>      e->callee->global.inlined_to = e->caller->global.inlined_to;
> >>    else
> >>      e->callee->global.inlined_to = e->caller;
> >> -  e->callee->global.stack_frame_offset
> >> -    = e->caller->global.stack_frame_offset
> >> -      + inline_summary (e->caller)->estimated_self_stack_size;
> >> -  peak = e->callee->global.stack_frame_offset
> >> -      + inline_summary (e->callee)->estimated_self_stack_size;
> >> -  if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
> >> -    e->callee->global.inlined_to->global.estimated_stack_size = peak;
> >> +
> >> +  /* Pessimistically assume no sharing of stack space.  That is, the
> >> +     frame size of a function is estimated as the original frame size
> >> +     plus the sum of the frame sizes of all inlined callees.  */
> >> +  e->callee->global.inlined_to->global.estimated_stack_size +=
> >> +    inline_summary (e->callee)->estimated_self_stack_size;
> >> +
> >>    cgraph_propagate_frequency (e->callee);
> >>
> >>    /* Recursively clone all bodies.  */
> >> @@ -430,8 +428,7 @@ cgraph_check_inline_limits (struct cgrap
> >>
> >>    stack_size_limit += stack_size_limit * PARAM_VALUE
> >> (PARAM_STACK_FRAME_GROWTH) / 100;
> >>
> >> -  inlined_stack = (to->global.stack_frame_offset
> >> -                  + inline_summary (to)->estimated_self_stack_size
> >> +  inlined_stack = (to->global.estimated_stack_size
> >>                    + what->global.estimated_stack_size);
> >>    if (inlined_stack  > stack_size_limit
> >>        && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
> >> @@ -2064,7 +2061,6 @@ compute_inline_parameters (struct cgraph
> >>    self_stack_size = optimize ? estimated_stack_frame_size (node) : 0;
> >>    inline_summary (node)->estimated_self_stack_size = self_stack_size;
> >>    node->global.estimated_stack_size = self_stack_size;
> >> -  node->global.stack_frame_offset = 0;
> >>
> >>    /* Can this function be inlined at all?  */
> >>    node->local.inlinable = tree_inlinable_function_p (node->decl);
> >>
> >

[google] Increase inlining limits with FDO/LIPO

2011-05-17 Thread Mark Heffernan

This small patch greatly expands the function size limits for inlining
with FDO/LIPO.  With profile information, the inliner is much more
selective and precise and so the limits can be increased with less
worry that functions and total code size will blow up.  This speeds up
x86-64 internal benchmarks by about geomean 1.5% to 3% with LIPO
(depending on microarch), and 1% to 1.5% with FDO.  Size increase is
negligible (0.1% mean).

Bootstrapped and regression tested on x86-64.

Trunk testing to follow.

Ok for google/main?

Mark


2011-05-17  Mark Heffernan  

   * opts.c (finish_options): Increase inlining limits with profile
   generate and use.

Index: opts.c
===
--- opts.c  (revision 173666)
+++ opts.c  (working copy)
@@ -828,6 +828,22 @@ finish_options (struct gcc_options *opts
  opts->x_flag_split_stack = 0;
}
 }
+
+  if (opts->x_flag_profile_use
+  || opts->x_profile_arc_flag
+  || opts->x_flag_profile_values)
+{
+  /* With accurate profile information, inlining is much more
+selective and makes better decisions, so increase the
+inlining function size limits.  Changes must be added to both
+the generate and use builds to avoid profile mismatches.  */
+  maybe_set_param_value
+   (PARAM_MAX_INLINE_INSNS_SINGLE, 1000,
+opts->x_param_values, opts_set->x_param_values);
+  maybe_set_param_value
+   (PARAM_MAX_INLINE_INSNS_AUTO, 1000,
+opts->x_param_values, opts_set->x_param_values);
+}
 }

Re: [google] Increase inlining limits with FDO/LIPO

2011-05-18 Thread Mark Heffernan

On Tue, May 17, 2011 at 11:34 PM, Xinliang David Li  wrote:
> flag_profile_arcs and flag_branch_probabilities.  -fprofile-use
> enables profile-arcs, and value profiling is enabled only when
> edge/branch profiling is enabled (so no need to be checked).

I changed the location where these parameters are set to someplace
more appropriate (to where the flags are set when profile gen/use is
indicated).  Verified identical binaries are generated.

OK as updated?

Mark

2011-05-18  Mark Heffernan  

* opts.c (set_profile_parameters): New function.


Index: opts.c
===
--- opts.c  (revision 173666)
+++ opts.c  (working copy)
@@ -1209,6 +1209,25 @@ print_specific_help (unsigned int includ
   opts->x_help_columns, opts, lang_mask);
 }

+
+/* Set parameters to more appropriate values when profile information
+   is available.  */
+static void
+set_profile_parameters (struct gcc_options *opts,
+   struct gcc_options *opts_set)
+{
+  /* With accurate profile information, inlining is much more
+ selective and makes better decisions, so increase the
+ inlining function size limits.  */
+  maybe_set_param_value
+(PARAM_MAX_INLINE_INSNS_SINGLE, 1000,
+ opts->x_param_values, opts_set->x_param_values);
+  maybe_set_param_value
+(PARAM_MAX_INLINE_INSNS_AUTO, 1000,
+ opts->x_param_values, opts_set->x_param_values);
+}
+
+
 /* Handle target- and language-independent options.  Return zero to
generate an "unknown option" message.  Only options that need
extra handling need to be listed here; if you simply want
@@ -1560,6 +1579,7 @@ common_handle_option (struct gcc_options
opts->x_flag_unswitch_loops = value;
   if (!opts_set->x_flag_gcse_after_reload)
opts->x_flag_gcse_after_reload = value;
+  set_profile_parameters (opts, opts_set);
   break;

 case OPT_fprofile_generate_:
@@ -1580,6 +1600,7 @@ common_handle_option (struct gcc_options
 is done.  */
   if (!opts_set->x_flag_ipa_reference && in_lto_p)
 opts->x_flag_ipa_reference = false;
+  set_profile_parameters (opts, opts_set);
   break;

 case OPT_fshow_column:

Re: [google] Increase inlining limits with FDO/LIPO

2011-05-18 Thread Mark Heffernan

On Wed, May 18, 2011 at 10:52 AM, Xinliang David Li  wrote:
> The new change won't help those. Your original place will be ok if you
> test profile_arcs and branch_probability flags.

Ah, yes.  I see your point now. Reverted to the original change with
condition profile_arc_flag and flag_branch_probabilities.

Mark

>
> David
>
>
> On Wed, May 18, 2011 at 10:39 AM, Mark Heffernan  wrote:
>> On Tue, May 17, 2011 at 11:34 PM, Xinliang David Li 
>> wrote:
>>>
>>> To make consistent inline decisions between profile-gen and
>>> profile-use, probably better to check these two:
>>>
>>> flag_profile_arcs and flag_branch_probabilities.  -fprofile-use
>>> enables profile-arcs, and value profiling is enabled only when
>>> edge/branch profiling is enabled (so no need to be checked).
>>
>> I changed the location where these parameters are set to someplace more
>> appropriate (to where the flags are set when profile gen/use is indicated).
>>  Verified identical binaries are generated.
>> OK as updated?
>>
>> Mark
>> 2011-05-18  Mark Heffernan  
>> * opts.c (set_profile_parameters): New function.
>> Index: opts.c
>> ===
>> --- opts.c      (revision 173666)
>> +++ opts.c      (working copy)
>> @@ -1209,6 +1209,25 @@ print_specific_help (unsigned int includ
>>                        opts->x_help_columns, opts, lang_mask);
>>  }
>>
>> +
>> +/* Set parameters to more appropriate values when profile information
>> +   is available.  */
>> +static void
>> +set_profile_parameters (struct gcc_options *opts,
>> +                       struct gcc_options *opts_set)
>> +{
>> +  /* With accurate profile information, inlining is much more
>> +     selective and makes better decisions, so increase the
>> +     inlining function size limits.  */
>> +  maybe_set_param_value
>> +    (PARAM_MAX_INLINE_INSNS_SINGLE, 1000,
>> +     opts->x_param_values, opts_set->x_param_values);
>> +  maybe_set_param_value
>> +    (PARAM_MAX_INLINE_INSNS_AUTO, 1000,
>> +     opts->x_param_values, opts_set->x_param_values);
>> +}
>> +
>> +
>>  /* Handle target- and language-independent options.  Return zero to
>>     generate an "unknown option" message.  Only options that need
>>     extra handling need to be listed here; if you simply want
>> @@ -1560,6 +1579,7 @@ common_handle_option (struct gcc_options
>>         opts->x_flag_unswitch_loops = value;
>>        if (!opts_set->x_flag_gcse_after_reload)
>>         opts->x_flag_gcse_after_reload = value;
>> +      set_profile_parameters (opts, opts_set);
>>        break;
>>
>>      case OPT_fprofile_generate_:
>> @@ -1580,6 +1600,7 @@ common_handle_option (struct gcc_options
>>          is done.  */
>>        if (!opts_set->x_flag_ipa_reference && in_lto_p)
>>          opts->x_flag_ipa_reference = false;
>> +      set_profile_parameters (opts, opts_set);
>>        break;
>>
>>      case OPT_fshow_column:
>>
>

Re: [google] Increase inlining limits with FDO/LIPO

2011-05-18 Thread Mark Heffernan

Verified identical binaries created and submitted.

Mark

On Wed, May 18, 2011 at 11:37 AM, Xinliang David Li  wrote:
> Ok with that change to google/main with some retesting.
>
> David
>
> On Wed, May 18, 2011 at 11:34 AM, Mark Heffernan  wrote:
>> On Wed, May 18, 2011 at 10:52 AM, Xinliang David Li  
>> wrote:
>>> The new change won't help those. Your original place will be ok if you
>>> test profile_arcs and branch_probability flags.
>>
>> Ah, yes.  I see your point now. Reverted to the original change with
>> condition profile_arc_flag and flag_branch_probabilities.
>>
>> Mark
>>
>>>
>>> David
>>>
>>>
>>> On Wed, May 18, 2011 at 10:39 AM, Mark Heffernan  wrote:
>>>> On Tue, May 17, 2011 at 11:34 PM, Xinliang David Li 
>>>> wrote:
>>>>>
>>>>> To make consistent inline decisions between profile-gen and
>>>>> profile-use, probably better to check these two:
>>>>>
>>>>> flag_profile_arcs and flag_branch_probabilities.  -fprofile-use
>>>>> enables profile-arcs, and value profiling is enabled only when
>>>>> edge/branch profiling is enabled (so no need to be checked).
>>>>
>>>> I changed the location where these parameters are set to someplace more
>>>> appropriate (to where the flags are set when profile gen/use is indicated).
>>>>  Verified identical binaries are generated.
>>>> OK as updated?
>>>>
>>>> Mark
>>>> 2011-05-18  Mark Heffernan  
>>>> * opts.c (set_profile_parameters): New function.
>>>> Index: opts.c
>>>> ===
>>>> --- opts.c      (revision 173666)
>>>> +++ opts.c      (working copy)
>>>> @@ -1209,6 +1209,25 @@ print_specific_help (unsigned int includ
>>>>                        opts->x_help_columns, opts, lang_mask);
>>>>  }
>>>>
>>>> +
>>>> +/* Set parameters to more appropriate values when profile information
>>>> +   is available.  */
>>>> +static void
>>>> +set_profile_parameters (struct gcc_options *opts,
>>>> +                       struct gcc_options *opts_set)
>>>> +{
>>>> +  /* With accurate profile information, inlining is much more
>>>> +     selective and makes better decisions, so increase the
>>>> +     inlining function size limits.  */
>>>> +  maybe_set_param_value
>>>> +    (PARAM_MAX_INLINE_INSNS_SINGLE, 1000,
>>>> +     opts->x_param_values, opts_set->x_param_values);
>>>> +  maybe_set_param_value
>>>> +    (PARAM_MAX_INLINE_INSNS_AUTO, 1000,
>>>> +     opts->x_param_values, opts_set->x_param_values);
>>>> +}
>>>> +
>>>> +
>>>>  /* Handle target- and language-independent options.  Return zero to
>>>>     generate an "unknown option" message.  Only options that need
>>>>     extra handling need to be listed here; if you simply want
>>>> @@ -1560,6 +1579,7 @@ common_handle_option (struct gcc_options
>>>>         opts->x_flag_unswitch_loops = value;
>>>>        if (!opts_set->x_flag_gcse_after_reload)
>>>>         opts->x_flag_gcse_after_reload = value;
>>>> +      set_profile_parameters (opts, opts_set);
>>>>        break;
>>>>
>>>>      case OPT_fprofile_generate_:
>>>> @@ -1580,6 +1600,7 @@ common_handle_option (struct gcc_options
>>>>          is done.  */
>>>>        if (!opts_set->x_flag_ipa_reference && in_lto_p)
>>>>          opts->x_flag_ipa_reference = false;
>>>> +      set_profile_parameters (opts, opts_set);
>>>>        break;
>>>>
>>>>      case OPT_fshow_column:
>>>>
>>>
>>
>

[PATCH] PR middle-end/38509: add -Wfree-nonheap-object warning option

Re: [PATCH] PR middle-end/38509: add -Wfree-nonheap-object warning option

[google] With FDO/LIPO inline some cold callsites

[google] Increase hotness count fraction

Re: [google] Increase hotness count fraction

[google] Pessimistic stack frame accounting during inlining

Re: [google] pessimize stack accounting during inlining

[google] Increase inlining limits with FDO/LIPO

Re: [google] Increase inlining limits with FDO/LIPO

Re: [google] Increase inlining limits with FDO/LIPO

Re: [google] Increase inlining limits with FDO/LIPO

11 matches

Site Navigation

Mail list logo

Footer information