Hi,
with the attached patch that saves roughly 10 minutes of tree-into-ssa
pass, I can compile with -O3 -fno-tree-fre -fno-tree-pre.  Only without
checking-enabled since we do incredibly deep dominator walks running out
of stack space that can be considered as bug too. 
TER still manages to enfore few thousdand temporaries with overlapping
liveranges.

THe out-of-ssa pass spends most of time in calculate_live_on_exit
and calculate_live_on_entry that looks rather symmetric to problem cured
by the attached patch, but I don't see directly how to avoid the
quadratic behaviour there.

Honza

 garbage collection    :   1.22 ( 0%) usr   0.10 ( 1%) sys   8.40 ( 1%) wall    
   0 kB ( 0%) ggc
 callgraph construction:   0.14 ( 0%) usr   0.03 ( 0%) sys   0.18 ( 0%) wall    
1147 kB ( 0%) ggc
 callgraph optimization:   0.07 ( 0%) usr   0.01 ( 0%) sys   0.45 ( 0%) wall    
 533 kB ( 0%) ggc
 ipa reference         :   0.05 ( 0%) usr   0.00 ( 0%) sys   0.06 ( 0%) wall    
   0 kB ( 0%) ggc
 ipa pure const        :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 0%) wall    
   0 kB ( 0%) ggc
 ipa type escape       :   0.04 ( 0%) usr   0.00 ( 0%) sys   0.04 ( 0%) wall    
   0 kB ( 0%) ggc
 cfg cleanup           :   3.89 ( 1%) usr   0.01 ( 0%) sys   4.11 ( 0%) wall    
1576 kB ( 1%) ggc
 trivially dead code   :   0.46 ( 0%) usr   0.00 ( 0%) sys   0.53 ( 0%) wall    
   0 kB ( 0%) ggc
 life analysis         :  51.34 ( 9%) usr   2.65 (21%) sys  73.91 ( 5%) wall    
2653 kB ( 1%) ggc
 life info update      :  48.97 ( 9%) usr   0.14 ( 1%) sys  50.68 ( 4%) wall    
 641 kB ( 0%) ggc
 alias analysis        :   0.69 ( 0%) usr   0.00 ( 0%) sys   1.05 ( 0%) wall    
4139 kB ( 1%) ggc
 register scan         :   0.41 ( 0%) usr   0.00 ( 0%) sys   0.40 ( 0%) wall    
   0 kB ( 0%) ggc
 rebuild jump labels   :   0.14 ( 0%) usr   0.00 ( 0%) sys   0.12 ( 0%) wall    
   0 kB ( 0%) ggc
 preprocessing         :   0.37 ( 0%) usr   0.06 ( 0%) sys   0.34 ( 0%) wall    
 471 kB ( 0%) ggc
 lexical analysis      :   0.01 ( 0%) usr   0.05 ( 0%) sys   0.07 ( 0%) wall    
   0 kB ( 0%) ggc
 parser                :   0.09 ( 0%) usr   0.02 ( 0%) sys   0.18 ( 0%) wall    
3207 kB ( 1%) ggc
 inline heuristics     :  14.79 ( 3%) usr   0.02 ( 0%) sys  14.86 ( 1%) wall    
1118 kB ( 0%) ggc
 integration           :  17.07 ( 3%) usr   0.22 ( 2%) sys  17.36 ( 1%) wall   
79483 kB (27%) ggc
 tree gimplify         :   0.15 ( 0%) usr   0.01 ( 0%) sys   0.17 ( 0%) wall    
3341 kB ( 1%) ggc
 tree eh               :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 0%) wall    
   0 kB ( 0%) ggc
 tree CFG construction :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall    
1338 kB ( 0%) ggc
 tree CFG cleanup      :   4.27 ( 1%) usr   0.00 ( 0%) sys   4.27 ( 0%) wall    
  20 kB ( 0%) ggc
 tree VRP              :   1.26 ( 0%) usr   0.03 ( 0%) sys   1.33 ( 0%) wall    
  14 kB ( 0%) ggc
 tree copy propagation :   0.85 ( 0%) usr   0.05 ( 0%) sys   0.94 ( 0%) wall    
 313 kB ( 0%) ggc
 tree store copy prop  :   0.27 ( 0%) usr   0.01 ( 0%) sys   0.28 ( 0%) wall    
   5 kB ( 0%) ggc
 tree find ref. vars   :   0.16 ( 0%) usr   0.03 ( 0%) sys   0.18 ( 0%) wall   
12044 kB ( 4%) ggc
 tree PTA              :   1.55 ( 0%) usr   0.06 ( 0%) sys   1.63 ( 0%) wall    
  57 kB ( 0%) ggc
 tree alias analysis   :   2.81 ( 0%) usr   0.29 ( 2%) sys   3.10 ( 0%) wall    
   0 kB ( 0%) ggc
 tree PHI insertion    :   0.57 ( 0%) usr   0.92 ( 7%) sys   1.52 ( 0%) wall    
3137 kB ( 1%) ggc
 tree SSA rewrite      :   2.33 ( 0%) usr   0.06 ( 0%) sys   5.02 ( 0%) wall   
21592 kB ( 7%) ggc
 tree SSA other        :   0.41 ( 0%) usr   0.16 ( 1%) sys   0.65 ( 0%) wall    
   0 kB ( 0%) ggc
 tree SSA incremental  :   4.18 ( 1%) usr   0.45 ( 4%) sys   4.72 ( 0%) wall    
 520 kB ( 0%) ggc
 tree operand scan     :   1.79 ( 0%) usr   0.69 ( 5%) sys  39.97 ( 3%) wall   
18374 kB ( 6%) ggc
 dominator optimization:   2.91 ( 1%) usr   0.05 ( 0%) sys   2.99 ( 0%) wall   
11155 kB ( 4%) ggc
 tree SRA              :   4.24 ( 1%) usr   0.15 ( 1%) sys   4.51 ( 0%) wall   
25568 kB ( 9%) ggc
 tree STORE-CCP        :   0.29 ( 0%) usr   0.01 ( 0%) sys   0.31 ( 0%) wall    
  18 kB ( 0%) ggc
 tree CCP              :   0.87 ( 0%) usr   0.01 ( 0%) sys   2.39 ( 0%) wall    
 154 kB ( 0%) ggc
 tree split crit edges :   0.11 ( 0%) usr   0.02 ( 0%) sys   0.14 ( 0%) wall    
9284 kB ( 3%) ggc
 tree reassociation    :   0.34 ( 0%) usr   0.00 ( 0%) sys   0.33 ( 0%) wall    
   0 kB ( 0%) ggc
 tree code sinking     :   0.32 ( 0%) usr   0.00 ( 0%) sys   0.32 ( 0%) wall    
   0 kB ( 0%) ggc
 tree linearize phis   :   0.12 ( 0%) usr   0.00 ( 0%) sys   0.12 ( 0%) wall    
   0 kB ( 0%) ggc
 tree forward propagate:   0.10 ( 0%) usr   0.00 ( 0%) sys   0.09 ( 0%) wall    
   0 kB ( 0%) ggc
 tree conservative DCE :   1.13 ( 0%) usr   0.00 ( 0%) sys   1.11 ( 0%) wall    
   0 kB ( 0%) ggc
 tree aggressive DCE   :   0.28 ( 0%) usr   0.00 ( 0%) sys   0.28 ( 0%) wall    
   0 kB ( 0%) ggc
 tree DSE              :   0.25 ( 0%) usr   0.00 ( 0%) sys   0.22 ( 0%) wall    
   1 kB ( 0%) ggc
 PHI merge             :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall    
   0 kB ( 0%) ggc
 complete unrolling    :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall    
   0 kB ( 0%) ggc
 tree loop init        :   0.14 ( 0%) usr   0.00 ( 0%) sys   0.15 ( 0%) wall    
   0 kB ( 0%) ggc
 tree copy headers     :   0.10 ( 0%) usr   0.00 ( 0%) sys   0.10 ( 0%) wall    
   0 kB ( 0%) ggc
 tree SSA uncprop      :   0.09 ( 0%) usr   0.00 ( 0%) sys   0.09 ( 0%) wall    
   0 kB ( 0%) ggc
 tree SSA to normal    : 228.94 (40%) usr   0.64 ( 5%) sys 337.06 (25%) wall   
10323 kB ( 4%) ggc
 tree rename SSA copies:   0.49 ( 0%) usr   0.03 ( 0%) sys   0.51 ( 0%) wall    
   0 kB ( 0%) ggc
 dominance frontiers   :   0.23 ( 0%) usr   0.00 ( 0%) sys   0.26 ( 0%) wall    
   0 kB ( 0%) ggc
 dominance computation :   2.63 ( 0%) usr   0.09 ( 1%) sys   2.85 ( 0%) wall    
   0 kB ( 0%) ggc
 control dependences   :   0.04 ( 0%) usr   0.00 ( 0%) sys   0.04 ( 0%) wall    
   0 kB ( 0%) ggc
 expand                :   6.10 ( 1%) usr   1.13 ( 9%) sys 192.49 (14%) wall   
35008 kB (12%) ggc
 jump                  :   0.09 ( 0%) usr   0.00 ( 0%) sys   0.08 ( 0%) wall    
   0 kB ( 0%) ggc
 CSE                   :   0.89 ( 0%) usr   0.01 ( 0%) sys   0.89 ( 0%) wall    
  53 kB ( 0%) ggc
 loop analysis         :   0.29 ( 0%) usr   0.00 ( 0%) sys   0.28 ( 0%) wall    
 930 kB ( 0%) ggc
 CPROP 1               :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall    
   0 kB ( 0%) ggc
 CSE 2                 :   0.46 ( 0%) usr   0.00 ( 0%) sys   0.46 ( 0%) wall    
  29 kB ( 0%) ggc
 branch prediction     :   0.55 ( 0%) usr   0.00 ( 0%) sys   0.56 ( 0%) wall    
   0 kB ( 0%) ggc
 flow analysis         :  37.33 ( 6%) usr   0.10 ( 1%) sys  53.59 ( 4%) wall    
   0 kB ( 0%) ggc
 combiner              :   1.02 ( 0%) usr   0.02 ( 0%) sys   1.37 ( 0%) wall    
2685 kB ( 1%) ggc
 if-conversion         :   5.21 ( 1%) usr   0.00 ( 0%) sys   5.36 ( 0%) wall    
1614 kB ( 1%) ggc
 regmove               :   0.72 ( 0%) usr   0.01 ( 0%) sys   0.83 ( 0%) wall    
   4 kB ( 0%) ggc
 mode switching        :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.02 ( 0%) wall    
   0 kB ( 0%) ggc
 local alloc           :   1.06 ( 0%) usr   0.02 ( 0%) sys   1.46 ( 0%) wall    
1045 kB ( 0%) ggc
 global alloc          :  86.33 (15%) usr   4.12 (32%) sys 452.97 (34%) wall    
8488 kB ( 3%) ggc
 reload CSE regs       :  24.86 ( 4%) usr   0.07 ( 1%) sys  28.13 ( 2%) wall    
3370 kB ( 1%) ggc
 load CSE after reload :   0.08 ( 0%) usr   0.00 ( 0%) sys   0.11 ( 0%) wall    
   0 kB ( 0%) ggc
 flow 2                :   0.36 ( 0%) usr   0.01 ( 0%) sys   1.19 ( 0%) wall    
5064 kB ( 2%) ggc
 if-conversion 2       :   0.22 ( 0%) usr   0.00 ( 0%) sys   0.24 ( 0%) wall    
   0 kB ( 0%) ggc
 peephole 2            :   0.22 ( 0%) usr   0.00 ( 0%) sys   0.24 ( 0%) wall    
   0 kB ( 0%) ggc
 rename registers      :   0.38 ( 0%) usr   0.05 ( 0%) sys   0.50 ( 0%) wall    
   1 kB ( 0%) ggc
 scheduling 2          :   2.10 ( 0%) usr   0.07 ( 1%) sys   2.40 ( 0%) wall    
4347 kB ( 1%) ggc
 machine dep reorg     :   0.31 ( 0%) usr   0.00 ( 0%) sys   0.31 ( 0%) wall    
  79 kB ( 0%) ggc
 reorder blocks        :   0.63 ( 0%) usr   0.01 ( 0%) sys   1.06 ( 0%) wall    
2738 kB ( 1%) ggc
 reg stack             :   1.07 ( 0%) usr   0.02 ( 0%) sys   1.53 ( 0%) wall   
11030 kB ( 4%) ggc
 final                 :   1.06 ( 0%) usr   0.04 ( 0%) sys   1.18 ( 0%) wall    
2182 kB ( 1%) ggc
 symout                :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.11 ( 0%) wall    
   0 kB ( 0%) ggc
 TOTAL                 : 575.62            12.78          1351.48             
291955 kB
Index: tree-into-ssa.c
===================================================================
*** tree-into-ssa.c     (revision 115645)
--- tree-into-ssa.c     (working copy)
*************** compute_global_livein (bitmap livein, bi
*** 369,377 ****
--- 369,386 ----
    tos = worklist
      = (basic_block *) xmalloc (sizeof (basic_block) * (last_basic_block + 1));
  
+   /* This function tends to be CPU hog for large CFGs.  Avoid unnecesary
+      bitmap queries by caching the in aux blocks.  */
    EXECUTE_IF_SET_IN_BITMAP (livein, 0, i, bi)
      {
        *tos++ = BASIC_BLOCK (i);
+       gcc_assert (!BASIC_BLOCK (i)->aux);
+       BASIC_BLOCK (i)->aux = (void *)1;
+     }
+   EXECUTE_IF_SET_IN_BITMAP (def_blocks, 0, i, bi)
+     {
+       gcc_assert (!BASIC_BLOCK (i)->aux);
+       BASIC_BLOCK (i)->aux = (void *)1;
      }
  
    /* Iterate until the worklist is empty.  */
*************** compute_global_livein (bitmap livein, bi
*** 390,404 ****
          int pred_index = pred->index;
  
          /* None of this is necessary for the entry block.  */
!         if (pred != ENTRY_BLOCK_PTR
!             && ! bitmap_bit_p (livein, pred_index)
!             && ! bitmap_bit_p (def_blocks, pred_index))
            {
              *tos++ = pred;
              bitmap_set_bit (livein, pred_index);
            }
        }
      }
  
    free (worklist);
  }
--- 399,416 ----
          int pred_index = pred->index;
  
          /* None of this is necessary for the entry block.  */
!         if (pred != ENTRY_BLOCK_PTR && !bb->aux)
            {
              *tos++ = pred;
              bitmap_set_bit (livein, pred_index);
+             bb->aux = (void *)1;
            }
        }
      }
+   EXECUTE_IF_SET_IN_BITMAP (livein, 0, i, bi)
+     BASIC_BLOCK (i)->aux = NULL;
+   EXECUTE_IF_SET_IN_BITMAP (def_blocks, 0, i, bi)
+     BASIC_BLOCK (i)->aux = NULL;
  
    free (worklist);
  }

Reply via email to