On 12/7/2021 10:54 PM, Xionghu Luo via Gcc-patches wrote:
In tree-ssa-loop-split.c, split_loop and split_loop_on_cond does two
kind of split. split_loop only works for single loop and insert edge at
exit when split, while split_loop_on_cond is not limited to single loop
and insert edge at latch when split.  Both split behavior should consider
loop count and probability update.  For split_loop, loop split condition
is moved in front of loop1 and loop2; But split_loop_on_cond moves the
condition between loop1 and loop2, this patch does:
  1) profile count proportion for both original loop and copied loop
without dropping down the true branch's count;
  2) probability update in the two loops and between the two loops.

Regression tested pass, OK for master?

Changes diff for split_loop and split_loop_on_cond cases:

1) diff base/loop-split.c.151t.lsplit patched/loop-split.c.152t.lsplit
...
    <bb 2> [local count: 118111600]:
    if (beg_5(D) < end_8(D))
      goto <bb 14>; [89.00%]
    else
      goto <bb 6>; [11.00%]

    <bb 14> [local count: 105119324]:
    if (beg2_6(D) < c_9(D))
-    goto <bb 15>; [100.00%]
+    goto <bb 15>; [33.00%]
    else
-    goto <bb 16>; [100.00%]
+    goto <bb 16>; [67.00%]

-  <bb 15> [local count: 105119324]:
+  <bb 15> [local count: 34689377]:
    _25 = beg_5(D) + 1;
    _26 = end_8(D) - beg_5(D);
    _27 = beg2_6(D) + _26;
    _28 = MIN_EXPR <c_9(D), _27>;

-  <bb 3> [local count: 955630225]:
+  <bb 3> [local count: 315357973]:
    # i_16 = PHI <i_11(8), beg_5(D)(15)>
    # j_17 = PHI <j_12(8), beg2_6(D)(15)>
    printf ("a: %d %d\n", i_16, j_17);
    i_11 = i_16 + 1;
    j_12 = j_17 + 1;
    if (j_12 < _28)
-    goto <bb 8>; [89.00%]
+    goto <bb 8>; [29.37%]
    else
-    goto <bb 17>; [11.00%]
+    goto <bb 17>; [70.63%]

-  <bb 8> [local count: 850510901]:
+  <bb 8> [local count: 280668596]:
    goto <bb 3>; [100.00%]

-  <bb 16> [local count: 105119324]:
+  <bb 16> [local count: 70429947]:
    # i_22 = PHI <beg_5(D)(14), i_29(17)>
    # j_23 = PHI <beg2_6(D)(14), j_30(17)>

    <bb 10> [local count: 955630225]:
    # i_2 = PHI <i_22(16), i_20(13)>
    # j_1 = PHI <j_23(16), j_21(13)>
    i_20 = i_2 + 1;
    j_21 = j_1 + 1;
    if (end_8(D) > i_20)
-    goto <bb 13>; [89.00%]
+    goto <bb 13>; [59.63%]
    else
-    goto <bb 9>; [11.00%]
+    goto <bb 9>; [40.37%]

-  <bb 13> [local count: 850510901]:
+  <bb 13> [local count: 569842305]:
    goto <bb 10>; [100.00%]

    <bb 17> [local count: 105119324]:
    # i_29 = PHI <i_11(3)>
    # j_30 = PHI <j_12(3)>
    if (end_8(D) > i_29)
      goto <bb 16>; [80.00%]
    else
      goto <bb 9>; [20.00%]

    <bb 9> [local count: 105119324]:

    <bb 6> [local count: 118111600]:
    return 0;

  }
    <bb 2> [local count: 118111600]:
-  if (beg_5(D) < end_8(D))
+  _1 = end_6(D) - beg_7(D);
+  j_9 = _1 + beg2_8(D);
+  if (end_6(D) > beg_7(D))
      goto <bb 14>; [89.00%]
    else
      goto <bb 6>; [11.00%]

    <bb 14> [local count: 105119324]:
-  if (beg2_6(D) < c_9(D))
-    goto <bb 15>; [100.00%]
+  if (j_9 >= c_11(D))
+    goto <bb 15>; [33.00%]
    else
-    goto <bb 16>; [100.00%]
+    goto <bb 16>; [67.00%]

-  <bb 15> [local count: 105119324]:
-  _25 = beg_5(D) + 1;
-  _26 = end_8(D) - beg_5(D);
-  _27 = beg2_6(D) + _26;
-  _28 = MIN_EXPR <c_9(D), _27>;
-
-  <bb 3> [local count: 955630225]:
-  # i_16 = PHI <i_11(8), beg_5(D)(15)>
-  # j_17 = PHI <j_12(8), beg2_6(D)(15)>
-  printf ("a: %d %d\n", i_16, j_17);
-  i_11 = i_16 + 1;
-  j_12 = j_17 + 1;
-  if (j_12 < _28)
-    goto <bb 8>; [89.00%]
+  <bb 15> [local count: 34689377]:
+  _27 = end_6(D) + -1;
+  _28 = beg_7(D) - end_6(D);
+  _29 = j_9 + _28;
+  _30 = _29 + 1;
+  _31 = MAX_EXPR <c_11(D), _30>;
+
+  <bb 3> [local count: 315357973]:
+  # i_18 = PHI <i_13(8), end_6(D)(15)>
+  # j_19 = PHI <j_14(8), j_9(15)>
+  printf ("a: %d %d\n", i_18, j_19);
+  i_13 = i_18 + -1;
+  j_14 = j_19 + -1;
+  if (j_14 >= _31)
+    goto <bb 8>; [29.37%]
    else
-    goto <bb 17>; [11.00%]
+    goto <bb 17>; [70.63%]

-  <bb 8> [local count: 850510901]:
+  <bb 8> [local count: 280668596]:
    goto <bb 3>; [100.00%]

-  <bb 16> [local count: 105119324]:
-  # i_22 = PHI <beg_5(D)(14), i_29(17)>
-  # j_23 = PHI <beg2_6(D)(14), j_30(17)>
+  <bb 16> [local count: 70429947]:
+  # i_24 = PHI <end_6(D)(14), i_32(17)>
+  # j_25 = PHI <j_9(14), j_33(17)>

    <bb 10> [local count: 955630225]:
-  # i_2 = PHI <i_22(16), i_20(13)>
-  # j_1 = PHI <j_23(16), j_21(13)>
-  i_20 = i_2 + 1;
-  j_21 = j_1 + 1;
-  if (end_8(D) > i_20)
+  # i_3 = PHI <i_24(16), i_22(13)>
+  # j_2 = PHI <j_25(16), j_23(13)>
+  i_22 = i_3 + -1;
+  j_23 = j_2 + -1;
+  if (beg_7(D) < i_22)
      goto <bb 13>; [89.00%]
    else
      goto <bb 9>; [11.00%]

-  <bb 13> [local count: 850510901]:
+  <bb 13> [local count: 569842305]:
    goto <bb 10>; [100.00%]

    <bb 17> [local count: 105119324]:
-  # i_29 = PHI <i_11(3)>
-  # j_30 = PHI <j_12(3)>
-  if (end_8(D) > i_29)
+  # i_32 = PHI <i_13(3)>
+  # j_33 = PHI <j_14(3)>
+  if (beg_7(D) < i_32)
      goto <bb 16>; [80.00%]
    else
      goto <bb 9>; [20.00%]

    <bb 9> [local count: 105119324]:

    <bb 6> [local count: 118111600]:
    return 0;

  }

2) diff base/loop-cond-split-1.c.151t.lsplit  
patched/loop-cond-split-1.c.151t.lsplit:
...
    <bb 2> [local count: 118111600]:
    if (n_7(D) > 0)
      goto <bb 4>; [89.00%]
    else
      goto <bb 3>; [11.00%]

    <bb 3> [local count: 118111600]:
    return;

    <bb 4> [local count: 105119324]:
    pretmp_3 = ga;

-  <bb 5> [local count: 955630225]:
+  <bb 5> [local count: 315357973]:
    # i_13 = PHI <i_10(20), 0(4)>
    # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)>
    if (prephitmp_12 != 0)
      goto <bb 6>; [33.00%]
    else
      goto <bb 7>; [67.00%]

    <bb 6> [local count: 315357972]:
    _2 = do_something ();
    ga = _2;

-  <bb 7> [local count: 955630225]:
+  <bb 7> [local count: 315357973]:
    # prephitmp_5 = PHI <prephitmp_12(5), _2(6)>
    i_10 = inc (i_13);
    if (n_7(D) > i_10)
      goto <bb 21>; [89.00%]
    else
      goto <bb 11>; [11.00%]

    <bb 11> [local count: 105119324]:
    goto <bb 3>; [100.00%]

-  <bb 21> [local count: 850510901]:
+  <bb 21> [local count: 280668596]:
    if (prephitmp_12 != 0)
-    goto <bb 20>; [100.00%]
+    goto <bb 20>; [33.00%]
    else
-    goto <bb 19>; [INV]
+    goto <bb 19>; [67.00%]

-  <bb 20> [local count: 850510901]:
+  <bb 20> [local count: 280668596]:
    goto <bb 5>; [100.00%]

-  <bb 19> [count: 0]:
+  <bb 19> [local count: 70429947]:
    # i_23 = PHI <i_10(21)>
    # prephitmp_25 = PHI <prephitmp_5(21)>

-  <bb 12> [local count: 955630225]:
+  <bb 12> [local count: 640272252]:
    # i_15 = PHI <i_23(19), i_22(16)>
    # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)>
    i_22 = inc (i_15);
    if (n_7(D) > i_22)
      goto <bb 16>; [89.00%]
    else
      goto <bb 11>; [11.00%]

-  <bb 16> [local count: 850510901]:
+  <bb 16> [local count: 569842305]:
    goto <bb 12>; [100.00%]

  }

gcc/ChangeLog:

        * tree-ssa-loop-split.c (split_loop): Fix incorrect
        profile_count and probability.
        (do_split_loop_on_cond): Likewise.
---
  gcc/tree-ssa-loop-split.c | 85 +++++++++++++++++++++++++++++++++++----
  1 file changed, 77 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3f6ad046623..33128061aab 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c

@@ -607,6 +610,38 @@ split_loop (class loop *loop1)
        tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
        patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
+ update_ssa (TODO_update_ssa);
+
+       /* Proportion first loop's bb counts except those dominated by true
+          branch to avoid drop 1s down.  */
+       basic_block *bbs1, *bbs2;
+       bbs1 = get_loop_body (loop1);
+       unsigned j;
+       for (j = 0; j < loop1->num_nodes; j++)
+         if (bbs1[j] == loop1->latch
+             || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
+           bbs1[j]->count
+             = bbs1[j]->count.apply_probability (true_edge->probability);
+       free (bbs1);
It looks like there's two copies of this code in this patch, one in split_loop and the other in do_split_loop_on_cond.  Would it make sense to factor it out into its own little function?


+
+       /* Proportion second loop's bb counts except those dominated by false
+          branch to avoid drop 1s down.  */
+       basic_block bbi_copy = get_bb_copy (false_edge->dest);
+       bbs2 = get_loop_body (loop2);
+       for (j = 0; j < loop2->num_nodes; j++)
+         if (bbs2[j] == loop2->latch
+             || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
+           bbs2[j]->count = bbs2[j]->count.apply_probability (
+             true_edge->probability.invert ());
+       free (bbs2);
Similarly for this block of code.

If those can be reasonably factored out into two helper functions to be called from split_loop and do_split_loop_on_cond, then this is OK with the refactoring.

jeff

Reply via email to