Hi, this patch further reduce memory use and time of WPA stage, especially without -g phase opt and generate : 75.66 (39%) usr 1.78 (14%) sys 77.44 (37%) wall 855644 kB (21%) ggc phase stream in : 34.62 (18%) usr 1.95 (16%) sys 36.57 (18%) wall 3245604 kB (79%) ggc phase stream out : 81.89 (42%) usr 8.49 (69%) sys 90.37 (44%) wall 50 kB ( 0%) ggc ipa dead code removal : 4.33 ( 2%) usr 0.06 ( 0%) sys 4.24 ( 2%) wall 0 kB ( 0%) ggc ipa virtual call target : 25.15 (13%) usr 0.14 ( 1%) sys 25.42 (12%) wall 0 kB ( 0%) ggc ipa cp : 3.92 ( 2%) usr 0.21 ( 2%) sys 4.18 ( 2%) wall 340698 kB ( 8%) ggc ipa inlining heuristics : 24.12 (12%) usr 0.38 ( 3%) sys 24.37 (12%) wall 500427 kB (12%) ggc lto stream inflate : 7.07 ( 4%) usr 0.38 ( 3%) sys 7.33 ( 4%) wall 0 kB ( 0%) ggc ipa lto gimple in : 1.95 ( 1%) usr 0.61 ( 5%) sys 2.42 ( 1%) wall 324875 kB ( 8%) ggc ipa lto gimple out : 9.16 ( 5%) usr 1.64 (13%) sys 10.49 ( 5%) wall 50 kB ( 0%) ggc ipa lto decl in : 21.25 (11%) usr 1.01 ( 8%) sys 22.37 (11%) wall 2348869 kB (57%) ggc ipa lto decl out : 67.33 (34%) usr 1.66 (13%) sys 68.96 (33%) wall 0 kB ( 0%) ggc ipa lto constructors out: 1.39 ( 1%) usr 0.38 ( 3%) sys 2.18 ( 1%) wall 0 kB ( 0%) ggc ipa lto decl merge : 2.12 ( 2%) usr 0.00 ( 0%) sys 2.12 ( 2%) wall 13737 kB ( 0%) ggc ipa reference : 2.14 ( 2%) usr 0.00 ( 0%) sys 2.13 ( 2%) wall 0 kB ( 0%) ggc ipa pure const : 2.29 ( 2%) usr 0.01 ( 0%) sys 2.35 ( 2%) wall 0 kB ( 0%) ggc ipa icf : 9.02 ( 7%) usr 0.18 ( 2%) sys 9.72 ( 7%) wall 19203 kB ( 0%) ggc TOTAL : 195.27 12.37 207.64 4103297 kB
to: phase setup : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.01 ( 0%) wall 1996 kB ( 0%) ggc phase opt and generate : 77.17 (53%) usr 1.69 ( 9%) sys 79.45 (48%) wall 856874 kB (26%) ggc phase stream in : 25.92 (18%) usr 1.75 (10%) sys 27.66 (17%) wall 2418654 kB (74%) ggc phase stream out : 39.90 (27%) usr 14.74 (81%) sys 54.82 (33%) wall 50 kB ( 0%) ggc phase finalize : 2.52 ( 2%) usr 0.11 ( 1%) sys 2.63 ( 2%) wall 0 kB ( 0%) ggc garbage collection : 4.56 ( 3%) usr 0.01 ( 0%) sys 4.56 ( 3%) wall 0 kB ( 0%) ggc ipa dead code removal : 4.32 ( 3%) usr 0.03 ( 0%) sys 4.59 ( 3%) wall 2 kB ( 0%) ggc ipa virtual call target : 23.19 (16%) usr 0.18 ( 1%) sys 23.31 (14%) wall 0 kB ( 0%) ggc ipa cp : 4.06 ( 3%) usr 0.18 ( 1%) sys 4.10 ( 2%) wall 339974 kB (10%) ggc ipa inlining heuristics : 25.05 (17%) usr 0.32 ( 2%) sys 25.86 (16%) wall 500986 kB (15%) ggc lto stream inflate : 5.50 ( 4%) usr 0.42 ( 2%) sys 5.73 ( 3%) wall 0 kB ( 0%) ggc ipa lto gimple in : 1.97 ( 1%) usr 0.51 ( 3%) sys 2.70 ( 2%) wall 324937 kB (10%) ggc ipa lto gimple out : 9.00 ( 6%) usr 1.59 ( 9%) sys 10.22 ( 6%) wall 50 kB ( 0%) ggc ipa lto decl in : 14.29 (10%) usr 0.73 ( 4%) sys 15.18 ( 9%) wall 1522854 kB (46%) ggc ipa lto decl out : 25.35 (17%) usr 0.59 ( 3%) sys 25.91 (16%) wall 0 kB ( 0%) ggc ipa lto constructors out: 1.48 ( 1%) usr 0.51 ( 3%) sys 2.38 ( 1%) wall 0 kB ( 0%) ggc ipa lto cgraph I/O : 0.74 ( 1%) usr 0.22 ( 1%) sys 0.97 ( 1%) wall 408576 kB (12%) ggc ipa lto decl merge : 1.94 ( 1%) usr 0.00 ( 0%) sys 1.95 ( 1%) wall 13556 kB ( 0%) ggc whopr wpa I/O : 2.95 ( 2%) usr 12.03 (66%) sys 15.17 ( 9%) wall 0 kB ( 0%) ggc whopr partitioning : 3.99 ( 3%) usr 0.03 ( 0%) sys 4.01 ( 2%) wall 13619 kB ( 0%) ggc ipa reference : 2.45 ( 2%) usr 0.01 ( 0%) sys 2.46 ( 1%) wall 0 kB ( 0%) ggc ipa pure const : 2.30 ( 2%) usr 0.03 ( 0%) sys 2.33 ( 1%) wall 0 kB ( 0%) ggc ipa icf : 8.30 ( 6%) usr 0.26 ( 1%) sys 8.37 ( 5%) wall 19276 kB ( 1%) ggc TOTAL : 145.51 18.29 164.57 3277576 kB With debug output the numbers are not that impressive, but sitll about 17% down from decl in. It also leads to about 63% code size reduction for global decl streams. I built WPA with -flto-partition=max and looked into one of partitions that seemed most absurd. We used about 180k type delcs to produce about 700 lines of assembler that mostly contained a calls to various methods. THe thing is that each method borught in a lot of declarations so I looked into why and noticed that TYPE_FIELDS contains TYPE_DECLS that are mostly ignored by the back-end expect for dwaf2out and dwarf2out actually ignores good portion of them, too. I thus made a predicate to tell waht decls are going to be useful for dwarf2out and removed rest in free_lang_data. Clearly with early debug, we will be able to remove them all. Honza * tree.c (free_lang_data_in_type): Skip irrelevant typedecls. (find_decls_types_r): Likewise. * tree.h (type_decl_relevant_for_debug_p): Declare. * dwarf2out.c (type_decl_relevant_for_debug_p): New function. Index: tree.c =================================================================== --- tree.c (revision 231546) +++ tree.c (working copy) @@ -5191,7 +5191,8 @@ free_lang_data_in_type (tree type) while (member) { if (TREE_CODE (member) == FIELD_DECL - || TREE_CODE (member) == TYPE_DECL) + || (TREE_CODE (member) == TYPE_DECL + && type_decl_relevant_for_debug_p (member))) { if (prev) TREE_CHAIN (prev) = member; @@ -5666,7 +5667,8 @@ find_decls_types_r (tree *tp, int *ws, v while (tem) { if (TREE_CODE (tem) == FIELD_DECL - || TREE_CODE (tem) == TYPE_DECL) + || (TREE_CODE (tem) == TYPE_DECL + && type_decl_relevant_for_debug_p (tem))) fld_worklist_push (tem, fld); tem = TREE_CHAIN (tem); } Index: tree.h =================================================================== --- tree.h (revision 231546) +++ tree.h (working copy) @@ -5417,4 +5417,6 @@ desired_pro_or_demotion_p (const_tree to return to_type_precision <= TYPE_PRECISION (from_type); } +extern bool type_decl_relevant_for_debug_p (const_tree); + #endif /* GCC_TREE_H */ Index: dwarf2out.c =================================================================== --- dwarf2out.c (revision 231546) +++ dwarf2out.c (working copy) @@ -21134,6 +21134,15 @@ is_redundant_typedef (const_tree decl) return 0; } +/* Return true if DECL is going to be useful for debug output. */ +bool +type_decl_relevant_for_debug_p (const_tree decl) +{ + if (debug_info_level <= DINFO_LEVEL_TERSE) + return false; + return (!DECL_IGNORED_P (decl) && !is_redundant_typedef (decl)); +} + /* Return TRUE if TYPE is a typedef that names a type for linkage purposes. This kind of typedefs is produced by the C++ FE for constructs like: