Hi,
this patch further reduce memory use and time of WPA stage, especially without 
-g
 phase opt and generate  :  75.66 (39%) usr   1.78 (14%) sys  77.44 (37%) wall  
855644 kB (21%) ggc
 phase stream in         :  34.62 (18%) usr   1.95 (16%) sys  36.57 (18%) wall 
3245604 kB (79%) ggc
 phase stream out        :  81.89 (42%) usr   8.49 (69%) sys  90.37 (44%) wall  
    50 kB ( 0%) ggc
 ipa dead code removal   :   4.33 ( 2%) usr   0.06 ( 0%) sys   4.24 ( 2%) wall  
     0 kB ( 0%) ggc
 ipa virtual call target :  25.15 (13%) usr   0.14 ( 1%) sys  25.42 (12%) wall  
     0 kB ( 0%) ggc
 ipa cp                  :   3.92 ( 2%) usr   0.21 ( 2%) sys   4.18 ( 2%) wall  
340698 kB ( 8%) ggc
 ipa inlining heuristics :  24.12 (12%) usr   0.38 ( 3%) sys  24.37 (12%) wall  
500427 kB (12%) ggc
 lto stream inflate      :   7.07 ( 4%) usr   0.38 ( 3%) sys   7.33 ( 4%) wall  
     0 kB ( 0%) ggc
 ipa lto gimple in       :   1.95 ( 1%) usr   0.61 ( 5%) sys   2.42 ( 1%) wall  
324875 kB ( 8%) ggc
 ipa lto gimple out      :   9.16 ( 5%) usr   1.64 (13%) sys  10.49 ( 5%) wall  
    50 kB ( 0%) ggc
 ipa lto decl in         :  21.25 (11%) usr   1.01 ( 8%) sys  22.37 (11%) wall 
2348869 kB (57%) ggc
 ipa lto decl out        :  67.33 (34%) usr   1.66 (13%) sys  68.96 (33%) wall  
     0 kB ( 0%) ggc
 ipa lto constructors out:   1.39 ( 1%) usr   0.38 ( 3%) sys   2.18 ( 1%) wall  
     0 kB ( 0%) ggc
 ipa lto decl merge      :   2.12 ( 2%) usr   0.00 ( 0%) sys   2.12 ( 2%) wall  
 13737 kB ( 0%) ggc
 ipa reference           :   2.14 ( 2%) usr   0.00 ( 0%) sys   2.13 ( 2%) wall  
     0 kB ( 0%) ggc
 ipa pure const          :   2.29 ( 2%) usr   0.01 ( 0%) sys   2.35 ( 2%) wall  
     0 kB ( 0%) ggc
 ipa icf                 :   9.02 ( 7%) usr   0.18 ( 2%) sys   9.72 ( 7%) wall  
 19203 kB ( 0%) ggc
 TOTAL                 : 195.27            12.37           207.64            
4103297 kB

to:

 phase setup             :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 0%) wall  
  1996 kB ( 0%) ggc
 phase opt and generate  :  77.17 (53%) usr   1.69 ( 9%) sys  79.45 (48%) wall  
856874 kB (26%) ggc
 phase stream in         :  25.92 (18%) usr   1.75 (10%) sys  27.66 (17%) wall 
2418654 kB (74%) ggc
 phase stream out        :  39.90 (27%) usr  14.74 (81%) sys  54.82 (33%) wall  
    50 kB ( 0%) ggc
 phase finalize          :   2.52 ( 2%) usr   0.11 ( 1%) sys   2.63 ( 2%) wall  
     0 kB ( 0%) ggc
 garbage collection      :   4.56 ( 3%) usr   0.01 ( 0%) sys   4.56 ( 3%) wall  
     0 kB ( 0%) ggc
 ipa dead code removal   :   4.32 ( 3%) usr   0.03 ( 0%) sys   4.59 ( 3%) wall  
     2 kB ( 0%) ggc
 ipa virtual call target :  23.19 (16%) usr   0.18 ( 1%) sys  23.31 (14%) wall  
     0 kB ( 0%) ggc
 ipa cp                  :   4.06 ( 3%) usr   0.18 ( 1%) sys   4.10 ( 2%) wall  
339974 kB (10%) ggc
 ipa inlining heuristics :  25.05 (17%) usr   0.32 ( 2%) sys  25.86 (16%) wall  
500986 kB (15%) ggc
 lto stream inflate      :   5.50 ( 4%) usr   0.42 ( 2%) sys   5.73 ( 3%) wall  
     0 kB ( 0%) ggc
 ipa lto gimple in       :   1.97 ( 1%) usr   0.51 ( 3%) sys   2.70 ( 2%) wall  
324937 kB (10%) ggc
 ipa lto gimple out      :   9.00 ( 6%) usr   1.59 ( 9%) sys  10.22 ( 6%) wall  
    50 kB ( 0%) ggc
 ipa lto decl in         :  14.29 (10%) usr   0.73 ( 4%) sys  15.18 ( 9%) wall 
1522854 kB (46%) ggc
 ipa lto decl out        :  25.35 (17%) usr   0.59 ( 3%) sys  25.91 (16%) wall  
     0 kB ( 0%) ggc
 ipa lto constructors out:   1.48 ( 1%) usr   0.51 ( 3%) sys   2.38 ( 1%) wall  
     0 kB ( 0%) ggc
 ipa lto cgraph I/O      :   0.74 ( 1%) usr   0.22 ( 1%) sys   0.97 ( 1%) wall  
408576 kB (12%) ggc
 ipa lto decl merge      :   1.94 ( 1%) usr   0.00 ( 0%) sys   1.95 ( 1%) wall  
 13556 kB ( 0%) ggc
 whopr wpa I/O           :   2.95 ( 2%) usr  12.03 (66%) sys  15.17 ( 9%) wall  
     0 kB ( 0%) ggc
 whopr partitioning      :   3.99 ( 3%) usr   0.03 ( 0%) sys   4.01 ( 2%) wall  
 13619 kB ( 0%) ggc
 ipa reference           :   2.45 ( 2%) usr   0.01 ( 0%) sys   2.46 ( 1%) wall  
     0 kB ( 0%) ggc
 ipa pure const          :   2.30 ( 2%) usr   0.03 ( 0%) sys   2.33 ( 1%) wall  
     0 kB ( 0%) ggc
 ipa icf                 :   8.30 ( 6%) usr   0.26 ( 1%) sys   8.37 ( 5%) wall  
 19276 kB ( 1%) ggc
 TOTAL                 : 145.51            18.29           164.57            
3277576 kB

With debug output the numbers are not that impressive, but sitll about 17% down 
from decl in.
It also leads to about 63% code size reduction for global decl streams.

I built WPA with -flto-partition=max and looked into one of partitions that 
seemed most absurd.
We used about 180k type delcs to produce about 700 lines of assembler that 
mostly contained
a calls to various methods. THe thing is that each method borught in a lot of 
declarations
so I looked into why and noticed that TYPE_FIELDS contains TYPE_DECLS that are 
mostly ignored
by the back-end expect for dwaf2out and dwarf2out actually ignores good portion 
of them, too.

I thus made a predicate to tell waht decls are going to be useful for dwarf2out 
and removed
rest in free_lang_data.  Clearly with early debug, we will be able to remove 
them all.

Honza


        * tree.c (free_lang_data_in_type): Skip irrelevant typedecls.
        (find_decls_types_r): Likewise.
        * tree.h (type_decl_relevant_for_debug_p): Declare.
        * dwarf2out.c (type_decl_relevant_for_debug_p): New function.
Index: tree.c
===================================================================
--- tree.c      (revision 231546)
+++ tree.c      (working copy)
@@ -5191,7 +5191,8 @@ free_lang_data_in_type (tree type)
       while (member)
        {
          if (TREE_CODE (member) == FIELD_DECL
-             || TREE_CODE (member) == TYPE_DECL)
+             || (TREE_CODE (member) == TYPE_DECL
+                 && type_decl_relevant_for_debug_p (member)))
            {
              if (prev)
                TREE_CHAIN (prev) = member;
@@ -5666,7 +5667,8 @@ find_decls_types_r (tree *tp, int *ws, v
          while (tem)
            {
              if (TREE_CODE (tem) == FIELD_DECL
-                 || TREE_CODE (tem) == TYPE_DECL)
+                 || (TREE_CODE (tem) == TYPE_DECL
+                     && type_decl_relevant_for_debug_p (tem)))
                fld_worklist_push (tem, fld);
              tem = TREE_CHAIN (tem);
            }
Index: tree.h
===================================================================
--- tree.h      (revision 231546)
+++ tree.h      (working copy)
@@ -5417,4 +5417,6 @@ desired_pro_or_demotion_p (const_tree to
   return to_type_precision <= TYPE_PRECISION (from_type);
 }
 
+extern bool type_decl_relevant_for_debug_p (const_tree);
+
 #endif  /* GCC_TREE_H  */
Index: dwarf2out.c
===================================================================
--- dwarf2out.c (revision 231546)
+++ dwarf2out.c (working copy)
@@ -21134,6 +21134,15 @@ is_redundant_typedef (const_tree decl)
   return 0;
 }
 
+/* Return true if DECL is going to be useful for debug output.  */
+bool
+type_decl_relevant_for_debug_p (const_tree decl)
+{
+  if (debug_info_level <= DINFO_LEVEL_TERSE)
+    return false;
+  return (!DECL_IGNORED_P (decl) && !is_redundant_typedef (decl));
+}
+
 /* Return TRUE if TYPE is a typedef that names a type for linkage
    purposes. This kind of typedefs is produced by the C++ FE for
    constructs like:

Reply via email to