Hi. This patch makes the scheduler prefer instructions with higher cost if two given instructions are equally good. Issuing more restricted instructions first is particularly useful on in-order cores because it increases the number of dual issue opportunities.
For example, on AArch64, instead of: add x11, x2, 96 mov x4, x2 mov w10, 1 ldrh w5, [x0] ldrh w13, [x0, 2] ldrh w9, [x0, 4] ldrh w12, [x0, 6] b .L759 Generate: ldrh w5, [x0] add x11, x2, 96 ldrh w13, [x0, 2] mov x4, x2 ldrh w9, [x0, 4] mov w10, 1 ldrh w12, [x0, 6] b .L759 Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions. Ok for trunk? Thanks, Vlad gcc/ Changelog for gcc/Changelog 2018-09-11 Vlad Lazar <vlad.la...@arm.com> * haifa-sched.c (rank_for_schedule): Schedule by INSN_COST. (rfs_decision): New scheduling decision.
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c index 4f0221f6f43..3095e0375b5 100644 --- a/gcc/haifa-sched.c +++ b/gcc/haifa-sched.c @@ -2542,7 +2542,7 @@ enum rfs_decision { RFS_SCHED_GROUP, RFS_PRESSURE_DELAY, RFS_PRESSURE_TICK, RFS_FEEDS_BACKTRACK_INSN, RFS_PRIORITY, RFS_SPECULATION, RFS_SCHED_RANK, RFS_LAST_INSN, RFS_PRESSURE_INDEX, - RFS_DEP_COUNT, RFS_TIE, RFS_FUSION, RFS_N }; + RFS_DEP_COUNT, RFS_TIE, RFS_FUSION, RFS_COST, RFS_N }; /* Corresponding strings for print outs. */ static const char *rfs_str[RFS_N] = { @@ -2550,7 +2550,7 @@ static const char *rfs_str[RFS_N] = { "RFS_SCHED_GROUP", "RFS_PRESSURE_DELAY", "RFS_PRESSURE_TICK", "RFS_FEEDS_BACKTRACK_INSN", "RFS_PRIORITY", "RFS_SPECULATION", "RFS_SCHED_RANK", "RFS_LAST_INSN", "RFS_PRESSURE_INDEX", - "RFS_DEP_COUNT", "RFS_TIE", "RFS_FUSION" }; + "RFS_DEP_COUNT", "RFS_TIE", "RFS_FUSION", "RFS_COST" }; /* Statistical breakdown of rank_for_schedule decisions. */ struct rank_for_schedule_stats_t { unsigned stats[RFS_N]; }; @@ -2803,6 +2803,14 @@ rank_for_schedule (const void *x, const void *y) if (flag_sched_dep_count_heuristic && val != 0) return rfs_result (RFS_DEP_COUNT, val, tmp, tmp2); + /* Sort by INSN_COST rather than INSN_LUID. This means that instructions + which take longer to execute are prioritised and it leads to more + dual-issue opportunities on in-order cores which have this feature. */ + + if (INSN_COST (tmp) != INSN_COST (tmp2)) + return rfs_result (RFS_COST, INSN_COST (tmp2) - INSN_COST (tmp), + tmp, tmp2); + /* If insns are equally good, sort by INSN_LUID (original insn order), so that we make the sort stable. This minimizes instruction movement, thus minimizing sched's effect on debugging and cross-jumping. */