Hi.

This patch makes the scheduler prefer instructions with higher cost if two 
given instructions are equally good.
Issuing more restricted instructions first is particularly useful on in-order 
cores because it increases the
number of dual issue opportunities.

For example, on AArch64, instead of:

  add     x11, x2, 96
  mov     x4, x2
  mov     w10, 1
  ldrh    w5, [x0]
  ldrh    w13, [x0, 2]
  ldrh    w9, [x0, 4]
  ldrh    w12, [x0, 6]
  b       .L759

Generate:

  ldrh    w5, [x0]
  add     x11, x2, 96
  ldrh    w13, [x0, 2]
  mov     x4, x2
  ldrh    w9, [x0, 4]
  mov     w10, 1
  ldrh    w12, [x0, 6]
  b       .L759

Bootstrapped and regtested on aarch64-none-linux-gnu and there are no 
regressions.
Ok for trunk?

Thanks,
Vlad

gcc/
Changelog for gcc/Changelog
2018-09-11  Vlad Lazar  <vlad.la...@arm.com>

        * haifa-sched.c (rank_for_schedule): Schedule by INSN_COST.
        (rfs_decision): New scheduling decision.
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 4f0221f6f43..3095e0375b5 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -2542,7 +2542,7 @@ enum rfs_decision {
   RFS_SCHED_GROUP, RFS_PRESSURE_DELAY, RFS_PRESSURE_TICK,
   RFS_FEEDS_BACKTRACK_INSN, RFS_PRIORITY, RFS_SPECULATION,
   RFS_SCHED_RANK, RFS_LAST_INSN, RFS_PRESSURE_INDEX,
-  RFS_DEP_COUNT, RFS_TIE, RFS_FUSION, RFS_N };
+  RFS_DEP_COUNT, RFS_TIE, RFS_FUSION, RFS_COST, RFS_N };
 
 /* Corresponding strings for print outs.  */
 static const char *rfs_str[RFS_N] = {
@@ -2550,7 +2550,7 @@ static const char *rfs_str[RFS_N] = {
   "RFS_SCHED_GROUP", "RFS_PRESSURE_DELAY", "RFS_PRESSURE_TICK",
   "RFS_FEEDS_BACKTRACK_INSN", "RFS_PRIORITY", "RFS_SPECULATION",
   "RFS_SCHED_RANK", "RFS_LAST_INSN", "RFS_PRESSURE_INDEX",
-  "RFS_DEP_COUNT", "RFS_TIE", "RFS_FUSION" };
+  "RFS_DEP_COUNT", "RFS_TIE", "RFS_FUSION", "RFS_COST" };
 
 /* Statistical breakdown of rank_for_schedule decisions.  */
 struct rank_for_schedule_stats_t { unsigned stats[RFS_N]; };
@@ -2803,6 +2803,14 @@ rank_for_schedule (const void *x, const void *y)
   if (flag_sched_dep_count_heuristic && val != 0)
     return rfs_result (RFS_DEP_COUNT, val, tmp, tmp2);
 
+  /* Sort by INSN_COST rather than INSN_LUID.  This means that instructions
+     which take longer to execute are prioritised and it leads to more
+     dual-issue opportunities on in-order cores which have this feature.  */
+
+  if (INSN_COST (tmp) != INSN_COST (tmp2))
+    return rfs_result (RFS_COST, INSN_COST (tmp2) - INSN_COST (tmp),
+		       tmp, tmp2);
+
   /* If insns are equally good, sort by INSN_LUID (original insn order),
      so that we make the sort stable.  This minimizes instruction movement,
      thus minimizing sched's effect on debugging and cross-jumping.  */

Reply via email to