Hi!

The following patch implements what I've failed to do in time for 9.x,
in particular, if #pragma omp simd has if clause and the argument is
not constant zero (handled by the previous patch), or constant non-zero
(ignored, we want normal vectorization in that case as before), this patch
arranges for that information to be preserved until vectorization in the IL
through the .GOMP_SIMD_LANE ifn (whether it is used for some data
privatization or not) argument and during vectorization makes sure we
version such loop with that runtime condition (either solely or together
with say runtime checks for aliasing, alignment etc.).

Bootstrapped/regtested on x86_64-linux and i686-linux.  Richard, is this
approach ok with you?

2019-05-15  Jakub Jelinek  <ja...@redhat.com>

        * omp-low.c (lower_rec_input_clauses): If OMP_CLAUSE_IF
        has non-constant expression, force sctx.lane and use two
        argument IFN_GOMP_SIMD_LANE instead of single argument.
        * tree-ssa-dce.c (eliminate_unnecessary_stmts): Don't DCE
        two argument IFN_GOMP_SIMD_LANE without lhs.
        * tree-vectorizer.h (struct _loop_vec_info): Add simd_if_cond
        member.
        (LOOP_VINFO_SIMD_IF_COND, LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND):
        Define.
        (LOOP_REQUIRES_VERSIONING): Or in
        LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND.
        * tree-vect-loop.c (vect_determine_vectorization_factor): Punt
        for IFN_GOMP_SIMD_LANE with zero second argument.  Initialize
        LOOP_VINFO_SIMD_IF_COND from IFN_GOMP_SIMD_LANE second argument.
        (_loop_vec_info::_loop_vec_info): Clear simd_if_cond.
        * tree-vect-loop-manip.c (vect_loop_versioning): Add runtime check
        from simd if clause if needed.

        * gcc.dg/vect/vect-simd-1.c: New test.
        * gcc.dg/vect/vect-simd-2.c: New test.
        * gcc.dg/vect/vect-simd-3.c: New test.
        * gcc.dg/vect/vect-simd-4.c: New test.

--- gcc/omp-low.c.jj    2019-05-15 14:56:19.564136934 +0200
+++ gcc/omp-low.c       2019-05-15 16:59:38.686639583 +0200
@@ -3783,6 +3783,7 @@ lower_rec_input_clauses (tree clauses, g
   tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
   tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
   gimple_seq llist[3] = { };
+  tree nonconst_simd_if = NULL_TREE;
 
   copyin_seq = NULL;
   sctx.is_simt = is_simd && omp_find_clause (clauses, OMP_CLAUSE__SIMT_);
@@ -3814,6 +3815,8 @@ lower_rec_input_clauses (tree clauses, g
        case OMP_CLAUSE_IF:
          if (integer_zerop (OMP_CLAUSE_IF_EXPR (c)))
            sctx.max_vf = 1;
+         else if (TREE_CODE (OMP_CLAUSE_IF_EXPR (c)) != INTEGER_CST)
+           nonconst_simd_if = OMP_CLAUSE_IF_EXPR (c);
          break;
         case OMP_CLAUSE_SIMDLEN:
          if (integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (c)))
@@ -5190,6 +5193,17 @@ lower_rec_input_clauses (tree clauses, g
   if (known_eq (sctx.max_vf, 1U))
     sctx.is_simt = false;
 
+  if (nonconst_simd_if)
+    {
+      if (sctx.lane == NULL_TREE)
+       {
+         sctx.idx = create_tmp_var (unsigned_type_node);
+         sctx.lane = create_tmp_var (unsigned_type_node);
+       }
+      /* FIXME: For now.  */
+      sctx.is_simt = false;
+    }
+
   if (sctx.lane || sctx.is_simt)
     {
       uid = create_tmp_var (ptr_type_node, "simduid");
@@ -5219,8 +5233,9 @@ lower_rec_input_clauses (tree clauses, g
     }
   if (sctx.lane)
     {
-      gimple *g
-       = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
+      gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE,
+                                             1 + (nonconst_simd_if != NULL),
+                                             uid, nonconst_simd_if);
       gimple_call_set_lhs (g, sctx.lane);
       gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
       gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
--- gcc/tree-ssa-dce.c.jj       2019-05-03 09:27:00.236840002 +0200
+++ gcc/tree-ssa-dce.c  2019-05-15 17:22:31.176822544 +0200
@@ -1328,12 +1328,16 @@ eliminate_unnecessary_stmts (void)
                  update_stmt (stmt);
                  release_ssa_name (name);
 
-                 /* GOMP_SIMD_LANE or ASAN_POISON without lhs is not
-                    needed.  */
+                 /* GOMP_SIMD_LANE (unless two argument) or ASAN_POISON
+                    without lhs is not needed.  */
                  if (gimple_call_internal_p (stmt))
                    switch (gimple_call_internal_fn (stmt))
                      {
                      case IFN_GOMP_SIMD_LANE:
+                       if (gimple_call_num_args (stmt) >= 2
+                           && !integer_nonzerop (gimple_call_arg (stmt, 1)))
+                         break;
+                       /* FALLTHRU */
                      case IFN_ASAN_POISON:
                        remove_dead_stmt (&gsi, bb, to_remove_edges);
                        break;
--- gcc/tree-vectorizer.h.jj    2019-04-10 14:19:14.946896353 +0200
+++ gcc/tree-vectorizer.h       2019-05-15 18:26:16.176917741 +0200
@@ -428,6 +428,13 @@ typedef struct _loop_vec_info : public v
      loops.  */
   tree mask_compare_type;
 
+  /* For #pragma omp simd if (x) loops the x expression.  If constant 0,
+     the loop should not be vectorized, if constant non-zero, simd_if_cond
+     shouldn't be set and loop vectorized normally, if SSA_NAME, the loop
+     should be versioned on that condition, using scalar loop if the condition
+     is false and vectorized loop otherwise.  */
+  tree simd_if_cond;
+
   /* Unknown DRs according to which loop was peeled.  */
   struct dr_vec_info *unaligned_dr;
 
@@ -591,6 +598,7 @@ typedef struct _loop_vec_info : public v
 #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
 #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) 
(L)->single_scalar_iteration_cost
 #define LOOP_VINFO_ORIG_LOOP_INFO(L)       (L)->orig_loop_info
+#define LOOP_VINFO_SIMD_IF_COND(L)         (L)->simd_if_cond
 
 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L)      \
   ((L)->may_misalign_stmts.length () > 0)
@@ -600,10 +608,13 @@ typedef struct _loop_vec_info : public v
    || (L)->lower_bounds.length () > 0)
 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L)         \
   (LOOP_VINFO_NITERS_ASSUMPTIONS (L))
+#define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L)   \
+  (LOOP_VINFO_SIMD_IF_COND (L))
 #define LOOP_REQUIRES_VERSIONING(L)                    \
   (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L)          \
    || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L)           \
-   || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L))
+   || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)          \
+   || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
 
 #define LOOP_VINFO_NITERS_KNOWN_P(L)          \
   (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
--- gcc/tree-vect-loop.c.jj     2019-05-08 09:18:31.924685516 +0200
+++ gcc/tree-vect-loop.c        2019-05-15 18:53:59.257068299 +0200
@@ -357,6 +357,32 @@ vect_determine_vectorization_factor (loo
                                          &mask_producers);
          if (!res)
            return res;
+         if (loop->simduid)
+           {
+             gimple *g = gsi_stmt (si);
+             /* If .GOMP_SIMD_LANE call for the current loop has 2 arguments,
+                the second argument is the #pragma omp simd if (x) condition,
+                when 0, loop shouldn't be vectorized, when non-zero constant,
+                it should be vectorized normally, otherwise versioned with
+                vectorized loop done if the condition is non-zero at
+                runtime.  */
+             if (is_gimple_call (g)
+                 && gimple_call_internal_p (g)
+                 && gimple_call_internal_fn (g) == IFN_GOMP_SIMD_LANE
+                 && gimple_call_num_args (g) >= 2
+                 && TREE_CODE (gimple_call_arg (g, 0)) == SSA_NAME
+                 && (loop->simduid
+                     == SSA_NAME_VAR (gimple_call_arg (g, 0))))
+               {
+                 tree arg = gimple_call_arg (g, 1);
+                 if (integer_zerop (arg))
+                   return opt_result::failure_at (g,
+                                                  "not vectorized: "
+                                                  "simd if(0)\n");
+                 if (TREE_CODE (arg) == SSA_NAME)
+                   LOOP_VINFO_SIMD_IF_COND (loop_vinfo) = arg;
+               }
+           }
         }
     }
 
@@ -819,6 +845,7 @@ _loop_vec_info::_loop_vec_info (struct l
     max_vectorization_factor (0),
     mask_skip_niters (NULL_TREE),
     mask_compare_type (NULL_TREE),
+    simd_if_cond (NULL_TREE),
     unaligned_dr (NULL),
     peeling_for_alignment (0),
     ptr_mask (0),
--- gcc/tree-vect-loop-manip.c.jj       2019-03-11 13:43:47.361455501 +0100
+++ gcc/tree-vect-loop-manip.c  2019-05-15 18:36:21.068725974 +0200
@@ -3009,6 +3009,8 @@ vect_loop_versioning (loop_vec_info loop
   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
+  tree version_simd_if_cond
+    = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
 
   if (check_profitability)
     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
@@ -3044,6 +3046,32 @@ vect_loop_versioning (loop_vec_info loop
       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
     }
 
+  if (version_simd_if_cond)
+    {
+      gcc_assert (TREE_CODE (version_simd_if_cond) == SSA_NAME);
+      gcc_assert (dom_info_available_p (CDI_DOMINATORS));
+      if (basic_block bb
+         = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
+       {
+         if (!dominated_by_p (CDI_DOMINATORS, loop->header, bb)
+             || (scalar_loop
+                 && !dominated_by_p (CDI_DOMINATORS, scalar_loop->header,
+                                     bb)))
+           version_simd_if_cond = boolean_false_node;
+       }
+      tree zero = build_int_cst (TREE_TYPE (version_simd_if_cond), 0);
+      tree c = fold_build2 (NE_EXPR, boolean_type_node,
+                           version_simd_if_cond, zero);
+      if (cond_expr)
+        cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+                                c, cond_expr);
+      else
+        cond_expr = c;
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "created versioning for simd if condition check.\n");
+    }
+
   cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
                                      &gimplify_stmt_list,
                                      is_gimple_condexpr, NULL_TREE);
--- gcc/testsuite/gcc.dg/vect/vect-simd-1.c.jj  2019-05-15 18:45:09.779910995 
+0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-1.c     2019-05-15 18:55:49.006235391 
+0200
@@ -0,0 +1,64 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#define N 1024
+int a[N];
+int x;
+
+__attribute__((noipa)) int
+bar (void)
+{
+  return x;
+}
+
+__attribute__((noipa)) void
+foo (void)
+{
+  #pragma omp simd if (bar ())
+  for (int i = 0; i < N; ++i)
+    a[i] = a[i] + 1;
+}
+
+__attribute__((noipa)) void
+baz (void)
+{
+  int c = 0;
+  #pragma omp simd if (c)
+  for (int i = 0; i < N; ++i)
+    a[i] = a[i] + 1;
+}
+
+__attribute__((noipa)) void
+qux (void)
+{
+  int c = 1;
+  #pragma omp simd if (c)
+  for (int i = 0; i < N; ++i)
+    a[i] = a[i] + 1;
+}
+
+int
+main ()
+{
+  check_vect ();
+  foo ();
+  for (int i = 0; i < N; ++i)
+    if (a[i] != 1)
+      abort ();
+  x = 1;
+  foo ();
+  for (int i = 0; i < N; ++i)
+    if (a[i] != 2)
+      abort ();
+  baz ();
+  for (int i = 0; i < N; ++i)
+    if (a[i] != 3)
+      abort ();
+  qux ();
+  for (int i = 0; i < N; ++i)
+    if (a[i] != 4)
+      abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/vect/vect-simd-2.c.jj  2019-05-15 18:56:43.000333651 
+0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-2.c     2019-05-15 18:59:13.692816968 
+0200
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fopenmp-simd" } */
+
+#define N 1024
+int a[N];
+int bar (void);
+
+void
+foo (void)
+{
+  #pragma omp simd if (bar ())
+  for (int i = 0; i < N; ++i)
+    a[i] = a[i] + 1;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "created versioning for simd if condition 
check" 1 "vect" } } */
--- gcc/testsuite/gcc.dg/vect/vect-simd-3.c.jj  2019-05-15 18:59:21.865680474 
+0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-3.c     2019-05-15 18:59:56.381104037 
+0200
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fopenmp-simd" } */
+
+#define N 1024
+int a[N];
+
+void
+foo (void)
+{
+  int c = 0;
+  #pragma omp simd if (c)
+  for (int i = 0; i < N; ++i)
+    a[i] = a[i] + 1;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
--- gcc/testsuite/gcc.dg/vect/vect-simd-4.c.jj  2019-05-15 18:59:51.550184714 
+0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-4.c     2019-05-15 19:00:35.752446509 
+0200
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fopenmp-simd" } */
+
+#define N 1024
+int a[N];
+
+void
+foo (void)
+{
+  int c = 1;
+  #pragma omp simd if (c)
+  for (int i = 0; i < N; ++i)
+    a[i] = a[i] + 1;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "created versioning for simd if condition 
check" "vect" } } */

        Jakub

Reply via email to