Hi, this patch modifies profitable_jump_thread_path heuristics by enabling code expansion when the threaded path contains at least one hot path. The basic idea is that while we do not decrease instruction count on the non-duplicated path, we reduce number of entry edges and by this path separation we possibly enable later optimization.
We may try to get more careful about when optimization is enabled but it is hard to do and I don't think the number of cold paths that meet hot paths is large enough to make this matter too much. Bootstrapped/regtested x86_64-linux, OK? * tree-ssa-threadbackward.c (profitable_jump_thread_path): Adjust hot/cold heuristics. * gcc.dg/tree-ssa/pr77445-2.c: Update testcase. Index: tree-ssa-threadbackward.c =================================================================== --- tree-ssa-threadbackward.c (revision 244732) +++ tree-ssa-threadbackward.c (working copy) @@ -159,6 +159,7 @@ profitable_jump_thread_path (vec<basic_b bool threaded_through_latch = false; bool multiway_branch_in_path = false; bool threaded_multiway_branch = false; + bool contains_hot_bb = false; /* Count the number of instructions on the path: as these instructions will have to be duplicated, we will not record the path if there @@ -168,6 +169,9 @@ profitable_jump_thread_path (vec<basic_b { basic_block bb = (*path)[j]; + if (!contains_hot_bb && speed_p) + contains_hot_bb |= optimize_bb_for_speed_p (bb); + /* Remember, blocks in the path are stored in opposite order in the PATH array. The last entry in the array represents the block with an outgoing edge that we will redirect to the @@ -311,7 +315,11 @@ profitable_jump_thread_path (vec<basic_b return NULL; } - if (speed_p && optimize_edge_for_speed_p (taken_edge)) + /* Threading is profitable if the path duplicated is hot but also + in a case we separate cold path from hot path and permit optimization + of the hot path later. Be on the agressive side here. In some testcases, + as in PR 78407 this leads to noticeable improvements. */ + if (speed_p && (optimize_edge_for_speed_p (taken_edge) || contains_hot_bb)) { if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS)) { Index: testsuite/gcc.dg/tree-ssa/pr77445-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/pr77445-2.c (revision 244746) +++ testsuite/gcc.dg/tree-ssa/pr77445-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-thread1-details-blocks-stats" } */ +/* { dg-options "-O2 -fdump-tree-thread1-details-blocks-stats -fdump-tree-thread2-details-blocks-stats -fdump-tree-thread3-details-blocks-stats -fdump-tree-thread4-details-blocks-stats" } */ typedef enum STATES { START=0, INVALID, @@ -121,3 +121,7 @@ enum STATES FMS( u8 **in , u32 *transiti increase much. */ /* { dg-final { scan-tree-dump "Jumps threaded: 1[1-9]" "thread1" } } */ /* { dg-final { scan-tree-dump-times "Invalid sum" 2 "thread1" } } */ +/* { dg-final { scan-tree-dump-not "not considered" "thread1" } } */ +/* { dg-final { scan-tree-dump-not "not considered" "thread2" } } */ +/* { dg-final { scan-tree-dump-not "not considered" "thread3" } } */ +/* { dg-final { scan-tree-dump-not "not considered" "thread4" } } */ Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/threadbackward-1.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c (working copy) @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-ethread" } */ +char *c; +int t() +{ + for (int i=0;i<5000;i++) + c[i]=i; +} +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1 "ethread"} } */