>> Hmm, maybe attack from the other direction? -- could we clear SCHED_GROUP_P >> for each insn at the start of this loop in sched_analyze? >> >> It's not as clean in the sense that SCHED_GROUP_P "escapes" the scheduler, >> but it might be an option. >> >> for (insn = head;; insn = NEXT_INSN (insn)) >> { >> >> if (INSN_P (insn)) >> { >> /* And initialize deps_lists. */ >> sd_init_insn (insn); >> } >> >> deps_analyze_insn (deps, insn); >> >> if (insn == tail) >> { >> if (sched_deps_info->use_cselib) >> cselib_finish (); >> return; >> } >> } >> Jeff >>> >>> >> > > Thanks for the suggestion. It looks workable. Then I need to move the > SCHED_GROUP_P setting for macrofusion from sched_init to a place > inside sched_analyze after the SCHED_GROUP_P cleanup. It will be more > consistent with the settings for cc0 setter-user group and call group, > which are both inside sched_analyze. > I am trying this method... > > Thanks, > Wei.
Here is the patch. The patch does the SCHED_GROUP_P cleanup in sched_analyze before deps_analyze_insn set SCHED_GROUP_P and chain the insn with prev insns. And it move try_group_insn for macrofusion from sched_init to sched_analyze_insn. bootstrap and regression pass on x86_64-linux-gnu. Is it ok? Thanks, Wei. 2013-11-27 Wei Mi <w...@google.com> PR rtl-optimization/59020 * sched-deps.c (try_group_insn): Move it from haifa-sched.c to here. (sched_analyze_insn): Call try_group_insn. (sched_analyze): Cleanup SCHED_GROUP_P before start the analysis. * haifa-sched.c (try_group_insn): Moved to sched-deps.c. (group_insns_for_macro_fusion): Removed. (sched_init): Remove calling group_insns_for_macro_fusion. 2013-11-27 Wei Mi <w...@google.com> PR rtl-optimization/59020 * testsuite/gcc.dg/pr59020.c: New. * testsuite/gcc.dg/macro-fusion-1.c: New. * testsuite/gcc.dg/macro-fusion-2.c: New. Index: sched-deps.c =================================================================== --- sched-deps.c (revision 204923) +++ sched-deps.c (working copy) @@ -2820,6 +2820,37 @@ sched_analyze_2 (struct deps_desc *deps, sched_deps_info->finish_rhs (); } +/* Try to group comparison and the following conditional jump INSN if + they're already adjacent. This is to prevent scheduler from scheduling + them apart. */ + +static void +try_group_insn (rtx insn) +{ + unsigned int condreg1, condreg2; + rtx cc_reg_1; + rtx prev; + + if (!any_condjump_p (insn)) + return; + + targetm.fixed_condition_code_regs (&condreg1, &condreg2); + cc_reg_1 = gen_rtx_REG (CCmode, condreg1); + prev = prev_nonnote_nondebug_insn (insn); + if (!reg_referenced_p (cc_reg_1, PATTERN (insn)) + || !prev + || !modified_in_p (cc_reg_1, prev)) + return; + + /* Different microarchitectures support macro fusions for different + combinations of insn pairs. */ + if (!targetm.sched.macro_fusion_pair_p + || !targetm.sched.macro_fusion_pair_p (prev, insn)) + return; + + SCHED_GROUP_P (insn) = 1; +} + /* Analyze an INSN with pattern X to find all dependencies. */ static void sched_analyze_insn (struct deps_desc *deps, rtx x, rtx insn) @@ -2843,6 +2874,11 @@ sched_analyze_insn (struct deps_desc *de can_start_lhs_rhs_p = (NONJUMP_INSN_P (insn) && code == SET); + /* Group compare and branch insns for macro-fusion. */ + if (targetm.sched.macro_fusion_p + && targetm.sched.macro_fusion_p ()) + try_group_insn (insn); + if (may_trap_p (x)) /* Avoid moving trapping instructions across function calls that might not always return. */ @@ -3733,6 +3769,10 @@ sched_analyze (struct deps_desc *deps, r { /* And initialize deps_lists. */ sd_init_insn (insn); + /* Clean up SCHED_GROUP_P which may have been set by last + scheduler pass. */ + if (SCHED_GROUP_P (insn)) + SCHED_GROUP_P (insn) = 0; } deps_analyze_insn (deps, insn); Index: haifa-sched.c =================================================================== --- haifa-sched.c (revision 204923) +++ haifa-sched.c (working copy) @@ -6554,50 +6554,6 @@ setup_sched_dump (void) ? stderr : dump_file); } -/* Try to group comparison and the following conditional jump INSN if - they're already adjacent. This is to prevent scheduler from scheduling - them apart. */ - -static void -try_group_insn (rtx insn) -{ - unsigned int condreg1, condreg2; - rtx cc_reg_1; - rtx prev; - - if (!any_condjump_p (insn)) - return; - - targetm.fixed_condition_code_regs (&condreg1, &condreg2); - cc_reg_1 = gen_rtx_REG (CCmode, condreg1); - prev = prev_nonnote_nondebug_insn (insn); - if (!reg_referenced_p (cc_reg_1, PATTERN (insn)) - || !prev - || !modified_in_p (cc_reg_1, prev)) - return; - - /* Different microarchitectures support macro fusions for different - combinations of insn pairs. */ - if (!targetm.sched.macro_fusion_pair_p - || !targetm.sched.macro_fusion_pair_p (prev, insn)) - return; - - SCHED_GROUP_P (insn) = 1; -} - -/* If the last cond jump and the cond register defining insn are consecutive - before scheduling, we want them to be in a schedule group. This is good - for performance on microarchitectures supporting macro-fusion. */ - -static void -group_insns_for_macro_fusion () -{ - basic_block bb; - - FOR_EACH_BB (bb) - try_group_insn (BB_END (bb)); -} - /* Initialize some global state for the scheduler. This function works with the common data shared between all the schedulers. It is called from the scheduler specific initialization routine. */ @@ -6726,11 +6682,6 @@ sched_init (void) } curr_state = xmalloc (dfa_state_size); - - /* Group compare and branch insns for macro-fusion. */ - if (targetm.sched.macro_fusion_p - && targetm.sched.macro_fusion_p ()) - group_insns_for_macro_fusion (); } static void haifa_init_only_bb (basic_block, basic_block); Index: testsuite/gcc.dg/macro-fusion-1.c =================================================================== --- testsuite/gcc.dg/macro-fusion-1.c (revision 0) +++ testsuite/gcc.dg/macro-fusion-1.c (revision 0) @@ -0,0 +1,13 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -mtune=corei7 -fdump-rtl-sched2" } */ +/* { dg-final { scan-rtl-dump-not "compare.*insn.*jump_insn.*jump_insn" "sched2" } } */ + +int a[100]; + +double bar (double sum) +{ + int i; + for (i = 0; i < 1000000; i++) + sum += (0.5 + (a[i%100] - 128)); + return sum; +} Index: testsuite/gcc.dg/macro-fusion-2.c =================================================================== --- testsuite/gcc.dg/macro-fusion-2.c (revision 0) +++ testsuite/gcc.dg/macro-fusion-2.c (revision 0) @@ -0,0 +1,16 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -mtune=corei7-avx -fdump-rtl-sched2" } */ +/* { dg-final { scan-rtl-dump-not "compare.*insn.*jump_insn.*jump_insn" "sched2" } } */ + +int a[100]; + +double bar (double sum) +{ + int i = 100000; + while (i != 0) + { + sum += (0.5 + (a[i%100] - 128)); + i--; + } + return sum; +} Index: testsuite/gcc.dg/pr59020.c =================================================================== --- testsuite/gcc.dg/pr59020.c (revision 0) +++ testsuite/gcc.dg/pr59020.c (revision 0) @@ -0,0 +1,15 @@ +/* PR rtl-optimization/59020 */ + +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -fmodulo-sched -fno-inline -march=corei7" } */ + +int a, b, d; +unsigned c; + +void f() +{ + unsigned q; + for(; a; a++) + if(((c %= d && 1) ? : 1) & 1) + for(; b; q++); +}