Hi! The following patch implements what I've failed to do in time for 9.x, in particular, if #pragma omp simd has if clause and the argument is not constant zero (handled by the previous patch), or constant non-zero (ignored, we want normal vectorization in that case as before), this patch arranges for that information to be preserved until vectorization in the IL through the .GOMP_SIMD_LANE ifn (whether it is used for some data privatization or not) argument and during vectorization makes sure we version such loop with that runtime condition (either solely or together with say runtime checks for aliasing, alignment etc.).
Bootstrapped/regtested on x86_64-linux and i686-linux. Richard, is this approach ok with you? 2019-05-15 Jakub Jelinek <ja...@redhat.com> * omp-low.c (lower_rec_input_clauses): If OMP_CLAUSE_IF has non-constant expression, force sctx.lane and use two argument IFN_GOMP_SIMD_LANE instead of single argument. * tree-ssa-dce.c (eliminate_unnecessary_stmts): Don't DCE two argument IFN_GOMP_SIMD_LANE without lhs. * tree-vectorizer.h (struct _loop_vec_info): Add simd_if_cond member. (LOOP_VINFO_SIMD_IF_COND, LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND): Define. (LOOP_REQUIRES_VERSIONING): Or in LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND. * tree-vect-loop.c (vect_determine_vectorization_factor): Punt for IFN_GOMP_SIMD_LANE with zero second argument. Initialize LOOP_VINFO_SIMD_IF_COND from IFN_GOMP_SIMD_LANE second argument. (_loop_vec_info::_loop_vec_info): Clear simd_if_cond. * tree-vect-loop-manip.c (vect_loop_versioning): Add runtime check from simd if clause if needed. * gcc.dg/vect/vect-simd-1.c: New test. * gcc.dg/vect/vect-simd-2.c: New test. * gcc.dg/vect/vect-simd-3.c: New test. * gcc.dg/vect/vect-simd-4.c: New test. --- gcc/omp-low.c.jj 2019-05-15 14:56:19.564136934 +0200 +++ gcc/omp-low.c 2019-05-15 16:59:38.686639583 +0200 @@ -3783,6 +3783,7 @@ lower_rec_input_clauses (tree clauses, g tree simt_lane = NULL_TREE, simtrec = NULL_TREE; tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE; gimple_seq llist[3] = { }; + tree nonconst_simd_if = NULL_TREE; copyin_seq = NULL; sctx.is_simt = is_simd && omp_find_clause (clauses, OMP_CLAUSE__SIMT_); @@ -3814,6 +3815,8 @@ lower_rec_input_clauses (tree clauses, g case OMP_CLAUSE_IF: if (integer_zerop (OMP_CLAUSE_IF_EXPR (c))) sctx.max_vf = 1; + else if (TREE_CODE (OMP_CLAUSE_IF_EXPR (c)) != INTEGER_CST) + nonconst_simd_if = OMP_CLAUSE_IF_EXPR (c); break; case OMP_CLAUSE_SIMDLEN: if (integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (c))) @@ -5190,6 +5193,17 @@ lower_rec_input_clauses (tree clauses, g if (known_eq (sctx.max_vf, 1U)) sctx.is_simt = false; + if (nonconst_simd_if) + { + if (sctx.lane == NULL_TREE) + { + sctx.idx = create_tmp_var (unsigned_type_node); + sctx.lane = create_tmp_var (unsigned_type_node); + } + /* FIXME: For now. */ + sctx.is_simt = false; + } + if (sctx.lane || sctx.is_simt) { uid = create_tmp_var (ptr_type_node, "simduid"); @@ -5219,8 +5233,9 @@ lower_rec_input_clauses (tree clauses, g } if (sctx.lane) { - gimple *g - = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid); + gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, + 1 + (nonconst_simd_if != NULL), + uid, nonconst_simd_if); gimple_call_set_lhs (g, sctx.lane); gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt)); gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT); --- gcc/tree-ssa-dce.c.jj 2019-05-03 09:27:00.236840002 +0200 +++ gcc/tree-ssa-dce.c 2019-05-15 17:22:31.176822544 +0200 @@ -1328,12 +1328,16 @@ eliminate_unnecessary_stmts (void) update_stmt (stmt); release_ssa_name (name); - /* GOMP_SIMD_LANE or ASAN_POISON without lhs is not - needed. */ + /* GOMP_SIMD_LANE (unless two argument) or ASAN_POISON + without lhs is not needed. */ if (gimple_call_internal_p (stmt)) switch (gimple_call_internal_fn (stmt)) { case IFN_GOMP_SIMD_LANE: + if (gimple_call_num_args (stmt) >= 2 + && !integer_nonzerop (gimple_call_arg (stmt, 1))) + break; + /* FALLTHRU */ case IFN_ASAN_POISON: remove_dead_stmt (&gsi, bb, to_remove_edges); break; --- gcc/tree-vectorizer.h.jj 2019-04-10 14:19:14.946896353 +0200 +++ gcc/tree-vectorizer.h 2019-05-15 18:26:16.176917741 +0200 @@ -428,6 +428,13 @@ typedef struct _loop_vec_info : public v loops. */ tree mask_compare_type; + /* For #pragma omp simd if (x) loops the x expression. If constant 0, + the loop should not be vectorized, if constant non-zero, simd_if_cond + shouldn't be set and loop vectorized normally, if SSA_NAME, the loop + should be versioned on that condition, using scalar loop if the condition + is false and vectorized loop otherwise. */ + tree simd_if_cond; + /* Unknown DRs according to which loop was peeled. */ struct dr_vec_info *unaligned_dr; @@ -591,6 +598,7 @@ typedef struct _loop_vec_info : public v #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info +#define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ ((L)->may_misalign_stmts.length () > 0) @@ -600,10 +608,13 @@ typedef struct _loop_vec_info : public v || (L)->lower_bounds.length () > 0) #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) +#define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \ + (LOOP_VINFO_SIMD_IF_COND (L)) #define LOOP_REQUIRES_VERSIONING(L) \ (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ - || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)) + || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ + || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) #define LOOP_VINFO_NITERS_KNOWN_P(L) \ (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) --- gcc/tree-vect-loop.c.jj 2019-05-08 09:18:31.924685516 +0200 +++ gcc/tree-vect-loop.c 2019-05-15 18:53:59.257068299 +0200 @@ -357,6 +357,32 @@ vect_determine_vectorization_factor (loo &mask_producers); if (!res) return res; + if (loop->simduid) + { + gimple *g = gsi_stmt (si); + /* If .GOMP_SIMD_LANE call for the current loop has 2 arguments, + the second argument is the #pragma omp simd if (x) condition, + when 0, loop shouldn't be vectorized, when non-zero constant, + it should be vectorized normally, otherwise versioned with + vectorized loop done if the condition is non-zero at + runtime. */ + if (is_gimple_call (g) + && gimple_call_internal_p (g) + && gimple_call_internal_fn (g) == IFN_GOMP_SIMD_LANE + && gimple_call_num_args (g) >= 2 + && TREE_CODE (gimple_call_arg (g, 0)) == SSA_NAME + && (loop->simduid + == SSA_NAME_VAR (gimple_call_arg (g, 0)))) + { + tree arg = gimple_call_arg (g, 1); + if (integer_zerop (arg)) + return opt_result::failure_at (g, + "not vectorized: " + "simd if(0)\n"); + if (TREE_CODE (arg) == SSA_NAME) + LOOP_VINFO_SIMD_IF_COND (loop_vinfo) = arg; + } + } } } @@ -819,6 +845,7 @@ _loop_vec_info::_loop_vec_info (struct l max_vectorization_factor (0), mask_skip_niters (NULL_TREE), mask_compare_type (NULL_TREE), + simd_if_cond (NULL_TREE), unaligned_dr (NULL), peeling_for_alignment (0), ptr_mask (0), --- gcc/tree-vect-loop-manip.c.jj 2019-03-11 13:43:47.361455501 +0100 +++ gcc/tree-vect-loop-manip.c 2019-05-15 18:36:21.068725974 +0200 @@ -3009,6 +3009,8 @@ vect_loop_versioning (loop_vec_info loop bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo); + tree version_simd_if_cond + = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo); if (check_profitability) cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters, @@ -3044,6 +3046,32 @@ vect_loop_versioning (loop_vec_info loop vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); } + if (version_simd_if_cond) + { + gcc_assert (TREE_CODE (version_simd_if_cond) == SSA_NAME); + gcc_assert (dom_info_available_p (CDI_DOMINATORS)); + if (basic_block bb + = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond))) + { + if (!dominated_by_p (CDI_DOMINATORS, loop->header, bb) + || (scalar_loop + && !dominated_by_p (CDI_DOMINATORS, scalar_loop->header, + bb))) + version_simd_if_cond = boolean_false_node; + } + tree zero = build_int_cst (TREE_TYPE (version_simd_if_cond), 0); + tree c = fold_build2 (NE_EXPR, boolean_type_node, + version_simd_if_cond, zero); + if (cond_expr) + cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, + c, cond_expr); + else + cond_expr = c; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "created versioning for simd if condition check.\n"); + } + cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr), &gimplify_stmt_list, is_gimple_condexpr, NULL_TREE); --- gcc/testsuite/gcc.dg/vect/vect-simd-1.c.jj 2019-05-15 18:45:09.779910995 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-1.c 2019-05-15 18:55:49.006235391 +0200 @@ -0,0 +1,64 @@ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "tree-vect.h" + +#define N 1024 +int a[N]; +int x; + +__attribute__((noipa)) int +bar (void) +{ + return x; +} + +__attribute__((noipa)) void +foo (void) +{ + #pragma omp simd if (bar ()) + for (int i = 0; i < N; ++i) + a[i] = a[i] + 1; +} + +__attribute__((noipa)) void +baz (void) +{ + int c = 0; + #pragma omp simd if (c) + for (int i = 0; i < N; ++i) + a[i] = a[i] + 1; +} + +__attribute__((noipa)) void +qux (void) +{ + int c = 1; + #pragma omp simd if (c) + for (int i = 0; i < N; ++i) + a[i] = a[i] + 1; +} + +int +main () +{ + check_vect (); + foo (); + for (int i = 0; i < N; ++i) + if (a[i] != 1) + abort (); + x = 1; + foo (); + for (int i = 0; i < N; ++i) + if (a[i] != 2) + abort (); + baz (); + for (int i = 0; i < N; ++i) + if (a[i] != 3) + abort (); + qux (); + for (int i = 0; i < N; ++i) + if (a[i] != 4) + abort (); + return 0; +} --- gcc/testsuite/gcc.dg/vect/vect-simd-2.c.jj 2019-05-15 18:56:43.000333651 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-2.c 2019-05-15 18:59:13.692816968 +0200 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fopenmp-simd" } */ + +#define N 1024 +int a[N]; +int bar (void); + +void +foo (void) +{ + #pragma omp simd if (bar ()) + for (int i = 0; i < N; ++i) + a[i] = a[i] + 1; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "created versioning for simd if condition check" 1 "vect" } } */ --- gcc/testsuite/gcc.dg/vect/vect-simd-3.c.jj 2019-05-15 18:59:21.865680474 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-3.c 2019-05-15 18:59:56.381104037 +0200 @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fopenmp-simd" } */ + +#define N 1024 +int a[N]; + +void +foo (void) +{ + int c = 0; + #pragma omp simd if (c) + for (int i = 0; i < N; ++i) + a[i] = a[i] + 1; +} + +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ --- gcc/testsuite/gcc.dg/vect/vect-simd-4.c.jj 2019-05-15 18:59:51.550184714 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-simd-4.c 2019-05-15 19:00:35.752446509 +0200 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fopenmp-simd" } */ + +#define N 1024 +int a[N]; + +void +foo (void) +{ + int c = 1; + #pragma omp simd if (c) + for (int i = 0; i < N; ++i) + a[i] = a[i] + 1; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "created versioning for simd if condition check" "vect" } } */ Jakub