Insn seqs before sched:
.L1:
a5 = insn-1 (a0)
a6 = insn-2 (a1)
a7 = insn-3 (a7, a5)
a8 = insn-4 (a8, a6)
Jmp .L1
Insn-3 & insn-4 is REG_DEP_TRUE of insn-1 & insn-2,
so insn-3 & insn-4 will be as the last of ready list.
And this patch will put 0 cost dependency due to a bypass
as highest numbered class also if some target have forward
feature between DEP_PRO and DEP_CON.
if the insns are in the same cost class on -fsched-last-insn-heuristic,
And then, go to "prefer the insn which has more later insns that depend on it",
return from dep_list_size() is not satisfied, it includes all dependence of
insn.
We need to ignore the ones that have a 0 cost dependency due to a bypass.
With this patch and pipeline description as below:
(define_bypass 0 "insn-1, insn-2" "insn-3, insn-4")
We can get better insn seqs after sched:
.L1:
a5 = insn-1 (a0)
a7 = insn-3 (a7, a5)
a6 = insn-2 (a1)
a8 = insn-4 (a8, a6)
Jmp .L1
I have tested on ck860 of C-SKY arch and C960 of T-Head based on RISCV arch
gcc/
* haifa-sched.c (dep_list_costs): New.
(rank_for_schedule): Replace dep_list_size with dep_list_costs.
Add 0 cost dependency due to bypass on -fsched-last-insn-heuristic.
---
gcc/haifa-sched.c | 49 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 45 insertions(+), 4 deletions(-)
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 350178c82b8..51c6d23d3a5 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -1584,6 +1584,44 @@ dep_list_size (rtx_insn *insn, sd_list_types_def list)
return nodbgcount;
}
+/* Get the bypass cost of dependence DEP. */
+
+HAIFA_INLINE static int
+dep_cost_bypass(dep_t dep)
+{
+ if (dep == NULL)
+ return -1;
+
+ if (INSN_CODE (DEP_PRO (dep)) >= 0
+ && bypass_p (DEP_PRO (dep))
+ && recog_memoized (DEP_CON (dep)) >= 0)
+ return dep_cost (dep);
+
+ return -1;
+}
+
+/* Compute the costs of nondebug deps in list LIST for INSN. */
+
+static int
+dep_list_costs (rtx_insn *insn, sd_list_types_def list)
+{
+ sd_iterator_def sd_it;
+ dep_t dep;
+ int costs = 0;
+
+ FOR_EACH_DEP (insn, list, sd_it, dep)
+ {
+ if (!DEBUG_INSN_P (DEP_CON (dep))
+ && !DEBUG_INSN_P (DEP_PRO (dep)))
+ {
+ if (dep_cost_bypass (dep) != 0)
+ costs++;
+ }
+ }
+
+ return costs;
+}
+
bool sched_fusion;
/* Compute the priority number for INSN. */
@@ -2758,10 +2796,12 @@ rank_for_schedule (const void *x, const void *y)
1) Data dependent on last schedule insn.
2) Anti/Output dependent on last scheduled insn.
3) Independent of last scheduled insn, or has latency of one.
+ 4) bypass of last scheduled insn, and has latency of zero.
Choose the insn from the highest numbered class if different. */
dep1 = sd_find_dep_between (last, tmp, true);
- if (dep1 == NULL || dep_cost (dep1) == 1)
+ if (dep1 == NULL || dep_cost (dep1) == 1
+ || (dep_cost_bypass (dep1) == 0))
tmp_class = 3;
else if (/* Data dependence. */
DEP_TYPE (dep1) == REG_DEP_TRUE)
@@ -2771,7 +2811,8 @@ rank_for_schedule (const void *x, const void *y)
dep2 = sd_find_dep_between (last, tmp2, true);
- if (dep2 == NULL || dep_cost (dep2) == 1)
+ if (dep2 == NULL || dep_cost (dep2) == 1
+ || (dep_cost_bypass (dep2) == 0))
tmp2_class = 3;
else if (/* Data dependence. */
DEP_TYPE (dep2) == REG_DEP_TRUE)
@@ -2795,8 +2836,8 @@ rank_for_schedule (const void *x, const void *y)
This gives the scheduler more freedom when scheduling later
instructions at the expense of added register pressure. */
- val = (dep_list_size (tmp2, SD_LIST_FORW)
- - dep_list_size (tmp, SD_LIST_FORW));
+ val = (dep_list_costs (tmp2, SD_LIST_FORW)
+ - dep_list_costs (tmp, SD_LIST_FORW));
if (flag_sched_dep_count_heuristic && val != 0)
return rfs_result (RFS_DEP_COUNT, val, tmp, tmp2);
--
2.24.3 (Apple Git-128)