On 04/03/2014 10:40 PM, Jan Hubicka wrote:
Firefox:
cgraph.c:869 (cgraph_create_edge_1) 0: 0.0% 0:
0.0% 130358176: 6.9% 0: 0.0% 1253444
cgraph.c:510 (cgraph_allocate_node) 0: 0.0% 0:
0.0% 182236800: 9.7% 0: 0.0% 555600
toplev.c:960 (realloc_for_line_map) 0: 0.0% 89503888:
5.5% 268468240:14.3% 160: 0.0% 13
tree-streamer-in.c:621 (streamer_alloc_tree) 93089976:77.5%
972848816:59.6% 639230248:33.9% 21332480:32.3% 13496198
Total 120076578 1632997043
1883064062 65981723 24732501
source location Garbage Freed
Leak Overhead Times
Hi,
this is the linemap compression patch. For me it reduces realloc_for_line_map
to about 60MB, 4%
toplev.c:960 (realloc_for_line_map) 0: 0.0% 22395000:
1.5% 67141656: 4.1% 144: 0.0% 12
vec.h:626 ((null)) 0: 0.0% 134568360:
9.3% 75497528: 4.6% 1569368: 2.4% 1009306
vec.h:614 ((null)) 10659408: 8.3% 61265504:
4.2% 78711104: 4.8% 793720: 1.2% 632110
vec.h:698 ((null)) 908768: 0.7% 99564524:
6.9% 82255800: 5.0% 4201148: 6.5% 874628
vec.h:666 ((null)) 12840: 0.0% 73534476:
5.1% 92633604: 5.7% 2929176: 4.5% 776347
lto/lto.c:245 (lto_read_in_decl_state) 0: 0.0% 43115656:
3.0% 94137976: 5.8% 25396856:39.2% 2698570
cgraph.c:869 (cgraph_create_edge_1) 0: 0.0% 0:
0.0% 124069712: 7.6% 0: 0.0% 1192978
cgraph.c:510 (cgraph_allocate_node) 0: 0.0% 0:
0.0% 189855256:11.6% 0: 0.0% 578827
tree-streamer-in.c:621 (streamer_alloc_tree) 97891888:76.1%
892961808:61.6% 613594816:37.6% 22268544:34.3% 12574164
Total 128555402 1448954963
1633186049 64847582 24190936
source location Garbage Freed
Leak Overhead Times
So I get about 1GB of peak GGC memory and about 2.6GB for WPA in TOP. (still on
the old tree) You may try to experiment with LOCATION_CACHE_SIZE. It sould be
big enough so the locations get shared across different input files.
Hi!
Suggested patch looks fine, according to following data:
https://drive.google.com/file/d/0B0pisUJ80pO1X05SMXdXS2ZScEE/edit?usp=sharing,
it saves about ~1GB during non-parallel WPA phase. Average memory
consumption during parallel WPA phase reduces by 1-2GB. It would be good
to apply the patch.
Martin
Honza
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c (revision 209047)
+++ lto-streamer-in.c (working copy)
@@ -145,21 +145,49 @@ canon_file_name (const char *string)
}
+/* location_cache is used at LTO read in to avoid too many duplicates in
+ the linemap tables. */
+
+#define LOCATION_CACHE_SIZE 524287
+struct location_cache_entry
+{
+ const char *file;
+ int line;
+ int col;
+ location_t location;
+};
+static struct location_cache_entry *location_cache;
+
+/* Return hash of FILE/LINE/COL. */
+
+int
+location_cache_hash (const char *file, int line, int col)
+{
+ return iterative_hash_hashval_t ((size_t)file,
+ iterative_hash_hashval_t (line, col)) %
LOCATION_CACHE_SIZE;
+}
+
+
/* Read a location bitpack from input block IB. */
location_t
lto_input_location (struct bitpack_d *bp, struct data_in *data_in)
{
- static const char *current_file;
- static int current_line;
+ static const char *current_file, *last_file;
+ static int current_line, last_line;
static int current_col;
bool file_change, line_change, column_change;
unsigned len;
- bool prev_file = current_file != NULL;
+ bool prev_file = last_file != NULL;
+ int hash;
+ const char *cfile;
if (bp_unpack_value (bp, 1))
return UNKNOWN_LOCATION;
+ if (!location_cache)
+ location_cache = XCNEWVEC (struct location_cache_entry,
LOCATION_CACHE_SIZE);
+
file_change = bp_unpack_value (bp, 1);
line_change = bp_unpack_value (bp, 1);
column_change = bp_unpack_value (bp, 1);
@@ -175,18 +203,32 @@ lto_input_location (struct bitpack_d *bp
if (column_change)
current_col = bp_unpack_var_len_unsigned (bp);
+ cfile = current_file;
+ hash = location_cache_hash (cfile, current_line, current_col);
- if (file_change)
+ if (location_cache[hash].file == cfile
+ && location_cache[hash].line == current_line
+ && location_cache[hash].col == current_col + 1)
+ return location_cache[hash].location;
+ location_cache[hash].file = cfile;
+ location_cache[hash].line = current_line;
+ location_cache[hash].col = current_col + 1;
+
+ if (current_file != last_file)
{
if (prev_file)
linemap_add (line_table, LC_LEAVE, false, NULL, 0);
linemap_add (line_table, LC_ENTER, false, current_file, current_line);
}
- else if (line_change)
+ else if (current_line != last_line)
linemap_line_start (line_table, current_line, current_col);
- return linemap_position_for_column (line_table, current_col);
+ location_cache[hash].location
+ = linemap_position_for_column (line_table, current_col);
+ last_file = current_file;
+ last_line = current_line;
+ return location_cache[hash].location;
}
@@ -981,6 +1023,27 @@ input_function (tree fn_decl, struct dat
}
bsi = gsi_start_bb (bb);
while (!gsi_end_p (bsi))
+ {
+ gimple stmt = gsi_stmt (bsi);
+ /* If we're recompiling LTO objects with debug stmts but
+ we're not supposed to have debug stmts, remove them now.
+ We can't remove them earlier because this would cause uid
+ mismatches in fixups, but we can do it at this point, as
+ long as debug stmts don't require fixups. */
+ if (!MAY_HAVE_DEBUG_STMTS && is_gimple_debug (stmt))
+ {
+ gimple_stmt_iterator gsi = bsi;
+ gsi_next (&bsi);
+ gsi_remove (&gsi, true);
+ }
+ else
+ {
+ gsi_next (&bsi);
+ stmts[gimple_uid (stmt)] = stmt;
+ }
+ }
+ bsi = gsi_start_bb (bb);
+ while (!gsi_end_p (bsi))
{
gimple stmt = gsi_stmt (bsi);
/* If we're recompiling LTO objects with debug stmts but