Hi!

Unlike LOOP_VECTORIZED ifns, LOOP_DIST_ALIAS is added by the ldist pass
and needs to be maintained until the vectorizer, and parloops in between
that.  Earlier I've added code to update or drop orig_loop_num during
move_sese_region_to_fn, but that is not sufficient.  If we move
the whole pair of loops with the associated LOOP_DIST_ALIAS call into
the outlined loopfn, we need to update the first argument, as orig_loop_num
is likely changing.  If the whole triplet (two loops with orig_loop_num
and LOOP_DIST_ALIAS with the same first argument) stays in parent function,
we don't need to adjust it.  In all other cases, this patch folds the
LOOP_DIST_ALIAS ifn to the second argument, like the vectorizer does if
it fails to vectorize it.

Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux,
bootstrapped on powerpc64-linux, regtest there pending.  Ok for trunk?

2017-12-11  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/83359
        * tree-cfg.h (fold_loop_internal_call): Declare.
        * tree-vectorizer.c (fold_loop_internal_call): Moved to ...
        * tree-cfg.c (fold_loop_internal_call): ... here.  No longer static.
        (find_loop_dist_alias): New function.
        (move_sese_region_to_fn): If any dloop->orig_loop_num value is
        updated, also adjust any corresponding LOOP_DIST_ALIAS internal
        calls.

        * gcc.dg/graphite/pr83359.c: New test.

--- gcc/tree-cfg.h.jj   2017-09-05 23:28:14.000000000 +0200
+++ gcc/tree-cfg.h      2017-12-11 12:35:24.284777550 +0100
@@ -77,6 +77,7 @@ extern void gather_blocks_in_sese_region
                                          vec<basic_block> *bbs_p);
 extern void verify_sese (basic_block, basic_block, vec<basic_block> *);
 extern bool gather_ssa_name_hash_map_from (tree const &, tree const &, void *);
+extern void fold_loop_internal_call (gimple *, tree);
 extern basic_block move_sese_region_to_fn (struct function *, basic_block,
                                           basic_block, tree);
 extern void dump_function_to_file (tree, FILE *, dump_flags_t);
--- gcc/tree-vectorizer.c.jj    2017-09-01 09:26:37.000000000 +0200
+++ gcc/tree-vectorizer.c       2017-12-11 12:33:41.436055580 +0100
@@ -464,27 +464,6 @@ vect_loop_vectorized_call (struct loop *
   return NULL;
 }
 
-/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS
-   to VALUE and update any immediate uses of it's LHS.  */
-
-static void
-fold_loop_internal_call (gimple *g, tree value)
-{
-  tree lhs = gimple_call_lhs (g);
-  use_operand_p use_p;
-  imm_use_iterator iter;
-  gimple *use_stmt;
-  gimple_stmt_iterator gsi = gsi_for_stmt (g);
-
-  update_call_from_tree (&gsi, value);
-  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
-    {
-      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
-       SET_USE (use_p, value);
-      update_stmt (use_stmt);
-    }
-}
-
 /* If LOOP has been versioned during loop distribution, return the gurading
    internal call.  */
 
--- gcc/tree-cfg.c.jj   2017-12-07 18:05:30.000000000 +0100
+++ gcc/tree-cfg.c      2017-12-11 12:34:55.054140750 +0100
@@ -7337,6 +7337,47 @@ gather_ssa_name_hash_map_from (tree cons
   return true;
 }
 
+/* Return LOOP_DIST_ALIAS call if present in BB.  */
+
+static gimple *
+find_loop_dist_alias (basic_block bb)
+{
+  gimple *g = last_stmt (bb);
+  if (g == NULL || gimple_code (g) != GIMPLE_COND)
+    return NULL;
+
+  gimple_stmt_iterator gsi = gsi_for_stmt (g);
+  gsi_prev (&gsi);
+  if (gsi_end_p (gsi))
+    return NULL;
+
+  g = gsi_stmt (gsi);
+  if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS))
+    return g;
+  return NULL;
+}
+
+/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS
+   to VALUE and update any immediate uses of it's LHS.  */
+
+void
+fold_loop_internal_call (gimple *g, tree value)
+{
+  tree lhs = gimple_call_lhs (g);
+  use_operand_p use_p;
+  imm_use_iterator iter;
+  gimple *use_stmt;
+  gimple_stmt_iterator gsi = gsi_for_stmt (g);
+
+  update_call_from_tree (&gsi, value);
+  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+    {
+      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+       SET_USE (use_p, value);
+      update_stmt (use_stmt);
+    }
+}
+
 /* Move a single-entry, single-exit region delimited by ENTRY_BB and
    EXIT_BB to function DEST_CFUN.  The whole region is replaced by a
    single basic block in the original CFG and the new basic block is
@@ -7510,7 +7551,6 @@ move_sese_region_to_fn (struct function
          }
     }
 
-
   /* Adjust the number of blocks in the tree root of the outlined part.  */
   get_loop (dest_cfun, 0)->num_nodes = bbs.length () + 2;
 
@@ -7521,19 +7561,77 @@ move_sese_region_to_fn (struct function
   /* Fix up orig_loop_num.  If the block referenced in it has been moved
      to dest_cfun, update orig_loop_num field, otherwise clear it.  */
   struct loop *dloop;
+  signed char *moved_orig_loop_num = NULL;
   FOR_EACH_LOOP_FN (dest_cfun, dloop, 0)
     if (dloop->orig_loop_num)
       {
+       if (moved_orig_loop_num == NULL)
+         moved_orig_loop_num
+           = XCNEWVEC (signed char, vec_safe_length (larray));
        if ((*larray)[dloop->orig_loop_num] != NULL
            && get_loop (saved_cfun, dloop->orig_loop_num) == NULL)
-         dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;
+         {
+           if (moved_orig_loop_num[dloop->orig_loop_num] >= 0
+               && moved_orig_loop_num[dloop->orig_loop_num] < 2)
+             moved_orig_loop_num[dloop->orig_loop_num]++;
+           dloop->orig_loop_num = (*larray)[dloop->orig_loop_num]->num;
+         }
        else
-         dloop->orig_loop_num = 0;
+         {
+           moved_orig_loop_num[dloop->orig_loop_num] = -1;
+           dloop->orig_loop_num = 0;
+         }
       }
-  ggc_free (larray);
-
   pop_cfun ();
 
+  if (moved_orig_loop_num)
+    {
+      FOR_EACH_VEC_ELT (bbs, i, bb)
+       {
+         gimple *g = find_loop_dist_alias (bb);
+         if (g == NULL)
+           continue;
+
+         int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));
+         gcc_assert (orig_loop_num
+                     && (unsigned) orig_loop_num < vec_safe_length (larray));
+         if (moved_orig_loop_num[orig_loop_num] == 2)
+           {
+             /* If we have moved both loops with this orig_loop_num into
+                dest_cfun and the LOOP_DIST_ALIAS call is being moved there
+                too, update the first argument.  */
+             gcc_assert ((*larray)[dloop->orig_loop_num] != NULL
+                         && (get_loop (saved_cfun, dloop->orig_loop_num)
+                             == NULL));
+             tree t = build_int_cst (integer_type_node,
+                                     (*larray)[dloop->orig_loop_num]->num);
+             gimple_call_set_arg (g, 0, t);
+             update_stmt (g);
+             /* Make sure the following loop will not update it.  */
+             moved_orig_loop_num[orig_loop_num] = 0;
+           }
+         else
+           /* Otherwise at least one of the loops stayed in saved_cfun.
+              Remove the LOOP_DIST_ALIAS call.  */
+           fold_loop_internal_call (g, gimple_call_arg (g, 1));
+       }
+      FOR_EACH_BB_FN (bb, saved_cfun)
+       {
+         gimple *g = find_loop_dist_alias (bb);
+         if (g == NULL)
+           continue;
+         int orig_loop_num = tree_to_shwi (gimple_call_arg (g, 0));
+         gcc_assert (orig_loop_num
+                     && (unsigned) orig_loop_num < vec_safe_length (larray));
+         if (moved_orig_loop_num[orig_loop_num])
+           /* LOOP_DIST_ALIAS call remained in saved_cfun, if at least one
+              of the corresponding loops was moved, remove it.  */
+           fold_loop_internal_call (g, gimple_call_arg (g, 1));
+       }
+      XDELETEVEC (moved_orig_loop_num);
+    }
+  ggc_free (larray);
+
   /* Move blocks from BBS into DEST_CFUN.  */
   gcc_assert (bbs.length () >= 2);
   after = dest_cfun->cfg->x_entry_block_ptr;
--- gcc/testsuite/gcc.dg/graphite/pr83359.c.jj  2017-12-11 11:43:10.433737382 
+0100
+++ gcc/testsuite/gcc.dg/graphite/pr83359.c     2017-12-11 11:43:01.000000000 
+0100
@@ -0,0 +1,40 @@
+/* PR tree-optimization/83359 */
+/* { dg-do compile { target pthread } } */
+/* { dg-options "-O3 -floop-parallelize-all -ftree-parallelize-loops=2" } */
+
+int a, b, c;
+
+void
+foo (int x, int y)
+{
+  int *d = &a;
+  int *e = &x;
+
+  for (a = 0; a < 1; ++a)
+    d = &x;
+
+  while (b < 10)
+    {
+      for (b = 0; b < 1; ++b)
+        if (x == 0)
+          while (x < 1)
+            ++x;
+        else
+          while (x < 1)
+            {
+              d = &y;
+              ++x;
+            }
+      ++b;
+    }
+
+  for (;;)
+    for (c = 0; c < 2; ++c)
+      {
+        if (*d != 0)
+          a = *e;
+
+        e = &b;
+        y = 0;
+      }
+}

        Jakub

Reply via email to