On Thu, 16 May 2019, Jakub Jelinek wrote: > On Thu, May 16, 2019 at 11:30:38AM +0200, Richard Biener wrote: > > > note_simd_array_uses indeed does walk the IL and does look at the calls, > > > but I'd need some data structure where to store the argument; we don't > > > have > > > loop_vinfo yet (we don't have it even before the loop over vector sizes), > > > adding another tree to struct loop seems undesirable from memory usage > > > POV, > > > we'd need it just for the duration between note_simd_array_uses and > > > the actual loop_vinfo creation; so would you prefer some extra hash_map > > > for > > > that? > > > > Maybe that or move it to the _loop_vec_info constructor which also > > walks over the loop body for setting UIDs and creating stmt infos? > > Good idea. So, here is an updated patch that does that and does a fatal punt > in vect_analyze_loop_2 for if (0) which means we don't even try other vector > sizes in that case. > > > OK, I see. Indeed in theory something could sink the def which I'd > > call a bug - so maybe a gcc_checking_assert that this doesn't > > happen would be nice. > > I need to compute the bb, so I've used flag_checking guarded gcc_assert if > it is ok. > > > > > build_zero_cst (TREE_TYPE (version_simd_if_cond)) > > > > > > Is that better (build_zero_cst is a wrapper that will call build_int_cst > > > with 0)? A lot of code calls build_int_cst directly. Don't care much > > > though. > > > > it's just shorter... ;) > > And this too. > > Ok for trunk if it passes bootstrap/regtest?
OK with a slight adjustment below to the dominator test Thanks, Richard. > 2019-05-16 Jakub Jelinek <ja...@redhat.com> > > * omp-low.c (lower_rec_input_clauses): If OMP_CLAUSE_IF > has non-constant expression, force sctx.lane and use two > argument IFN_GOMP_SIMD_LANE instead of single argument. > * tree-ssa-dce.c (eliminate_unnecessary_stmts): Don't DCE > two argument IFN_GOMP_SIMD_LANE without lhs. > * tree-vectorizer.h (struct _loop_vec_info): Add simd_if_cond > member. > (LOOP_VINFO_SIMD_IF_COND, LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND): > Define. > (LOOP_REQUIRES_VERSIONING): Or in > LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND. > * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize > simd_if_cond. > (vect_analyze_loop_2): Punt if LOOP_VINFO_SIMD_IF_COND is constant 0. > * tree-vect-loop-manip.c (vect_loop_versioning): Add runtime check > from simd if clause if needed. > > * gcc.dg/vect/vect-simd-1.c: New test. > * gcc.dg/vect/vect-simd-2.c: New test. > * gcc.dg/vect/vect-simd-3.c: New test. > * gcc.dg/vect/vect-simd-4.c: New test. > > --- gcc/omp-low.c.jj 2019-05-15 23:42:16.046859954 +0200 > +++ gcc/omp-low.c 2019-05-16 15:04:41.785179634 +0200 > @@ -3783,6 +3783,7 @@ lower_rec_input_clauses (tree clauses, g > tree simt_lane = NULL_TREE, simtrec = NULL_TREE; > tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE; > gimple_seq llist[3] = { }; > + tree nonconst_simd_if = NULL_TREE; > > copyin_seq = NULL; > sctx.is_simt = is_simd && omp_find_clause (clauses, OMP_CLAUSE__SIMT_); > @@ -3814,6 +3815,8 @@ lower_rec_input_clauses (tree clauses, g > case OMP_CLAUSE_IF: > if (integer_zerop (OMP_CLAUSE_IF_EXPR (c))) > sctx.max_vf = 1; > + else if (TREE_CODE (OMP_CLAUSE_IF_EXPR (c)) != INTEGER_CST) > + nonconst_simd_if = OMP_CLAUSE_IF_EXPR (c); > break; > case OMP_CLAUSE_SIMDLEN: > if (integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (c))) > @@ -5190,6 +5193,17 @@ lower_rec_input_clauses (tree clauses, g > if (known_eq (sctx.max_vf, 1U)) > sctx.is_simt = false; > > + if (nonconst_simd_if) > + { > + if (sctx.lane == NULL_TREE) > + { > + sctx.idx = create_tmp_var (unsigned_type_node); > + sctx.lane = create_tmp_var (unsigned_type_node); > + } > + /* FIXME: For now. */ > + sctx.is_simt = false; > + } > + > if (sctx.lane || sctx.is_simt) > { > uid = create_tmp_var (ptr_type_node, "simduid"); > @@ -5219,8 +5233,9 @@ lower_rec_input_clauses (tree clauses, g > } > if (sctx.lane) > { > - gimple *g > - = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid); > + gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, > + 1 + (nonconst_simd_if != NULL), > + uid, nonconst_simd_if); > gimple_call_set_lhs (g, sctx.lane); > gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr > (ctx->stmt)); > gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT); > --- gcc/tree-ssa-dce.c.jj 2019-05-15 23:36:35.696258741 +0200 > +++ gcc/tree-ssa-dce.c 2019-05-16 15:04:41.786179618 +0200 > @@ -1328,12 +1328,16 @@ eliminate_unnecessary_stmts (void) > update_stmt (stmt); > release_ssa_name (name); > > - /* GOMP_SIMD_LANE or ASAN_POISON without lhs is not > - needed. */ > + /* GOMP_SIMD_LANE (unless two argument) or ASAN_POISON > + without lhs is not needed. */ > if (gimple_call_internal_p (stmt)) > switch (gimple_call_internal_fn (stmt)) > { > case IFN_GOMP_SIMD_LANE: > + if (gimple_call_num_args (stmt) >= 2 > + && !integer_nonzerop (gimple_call_arg (stmt, 1))) > + break; > + /* FALLTHRU */ > case IFN_ASAN_POISON: > remove_dead_stmt (&gsi, bb, to_remove_edges); > break; > --- gcc/tree-vectorizer.h.jj 2019-05-15 23:36:36.770241348 +0200 > +++ gcc/tree-vectorizer.h 2019-05-16 15:04:41.787179601 +0200 > @@ -428,6 +428,13 @@ typedef struct _loop_vec_info : public v > loops. */ > tree mask_compare_type; > > + /* For #pragma omp simd if (x) loops the x expression. If constant 0, > + the loop should not be vectorized, if constant non-zero, simd_if_cond > + shouldn't be set and loop vectorized normally, if SSA_NAME, the loop > + should be versioned on that condition, using scalar loop if the > condition > + is false and vectorized loop otherwise. */ > + tree simd_if_cond; > + > /* Unknown DRs according to which loop was peeled. */ > struct dr_vec_info *unaligned_dr; > > @@ -591,6 +598,7 @@ typedef struct _loop_vec_info : public v > #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec > #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) > (L)->single_scalar_iteration_cost > #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info > +#define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond > > #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ > ((L)->may_misalign_stmts.length () > 0) > @@ -600,10 +608,13 @@ typedef struct _loop_vec_info : public v > || (L)->lower_bounds.length () > 0) > #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ > (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) > +#define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \ > + (LOOP_VINFO_SIMD_IF_COND (L)) > #define LOOP_REQUIRES_VERSIONING(L) \ > (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ > || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ > - || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)) > + || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ > + || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) > > #define LOOP_VINFO_NITERS_KNOWN_P(L) \ > (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) > --- gcc/tree-vect-loop.c.jj 2019-05-15 23:36:36.718242191 +0200 > +++ gcc/tree-vect-loop.c 2019-05-16 15:25:17.826832201 +0200 > @@ -819,6 +819,7 @@ _loop_vec_info::_loop_vec_info (struct l > max_vectorization_factor (0), > mask_skip_niters (NULL_TREE), > mask_compare_type (NULL_TREE), > + simd_if_cond (NULL_TREE), > unaligned_dr (NULL), > peeling_for_alignment (0), > ptr_mask (0), > @@ -862,6 +863,26 @@ _loop_vec_info::_loop_vec_info (struct l > gimple *stmt = gsi_stmt (si); > gimple_set_uid (stmt, 0); > add_stmt (stmt); > + /* If .GOMP_SIMD_LANE call for the current loop has 2 arguments, the > + second argument is the #pragma omp simd if (x) condition, when 0, > + loop shouldn't be vectorized, when non-zero constant, it should > + be vectorized normally, otherwise versioned with vectorized loop > + done if the condition is non-zero at runtime. */ > + if (loop_in->simduid > + && is_gimple_call (stmt) > + && gimple_call_internal_p (stmt) > + && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE > + && gimple_call_num_args (stmt) >= 2 > + && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME > + && (loop_in->simduid > + == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))) > + { > + tree arg = gimple_call_arg (stmt, 1); > + if (integer_zerop (arg) || TREE_CODE (arg) == SSA_NAME) > + simd_if_cond = arg; > + else > + gcc_assert (integer_nonzerop (arg)); > + } > } > } > } > @@ -1769,6 +1790,11 @@ vect_analyze_loop_2 (loop_vec_info loop_ > /* The first group of checks is independent of the vector size. */ > fatal = true; > > + if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo) > + && integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo))) > + return opt_result::failure_at (vect_location, > + "not vectorized: simd if(0)\n"); > + > /* Find all data references in the loop (which correspond to vdefs/vuses) > and analyze their evolution in the loop. */ > > --- gcc/tree-vect-loop-manip.c.jj 2019-05-15 23:36:36.829240393 +0200 > +++ gcc/tree-vect-loop-manip.c 2019-05-16 15:36:39.222614532 +0200 > @@ -3009,6 +3009,8 @@ vect_loop_versioning (loop_vec_info loop > bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); > bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); > bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo); > + tree version_simd_if_cond > + = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo); > > if (check_profitability) > cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters, > @@ -3044,6 +3046,29 @@ vect_loop_versioning (loop_vec_info loop > vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); > } > > + if (version_simd_if_cond) > + { > + gcc_assert (dom_info_available_p (CDI_DOMINATORS)); > + if (flag_checking) > + if (basic_block bb > + = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond))) > + gcc_assert (dominated_by_p (CDI_DOMINATORS, loop->header, bb) > + && (scalar_loop == NULL > + || dominated_by_p (CDI_DOMINATORS, > + scalar_loop->header, bb))); given the dominator test returns true for loop->header == bb you need to check loop->header != bb && ... here since the insertion point for the conditional is the loop preheader (or do the dominator check on the preheader). > + tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond)); > + tree c = fold_build2 (NE_EXPR, boolean_type_node, > + version_simd_if_cond, zero); > + if (cond_expr) > + cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, > + c, cond_expr); > + else > + cond_expr = c; > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "created versioning for simd if condition check.\n"); > + } > + > cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr), > &gimplify_stmt_list, > is_gimple_condexpr, NULL_TREE); > --- gcc/testsuite/gcc.dg/vect/vect-simd-1.c.jj 2019-05-16 > 15:04:42.214172571 +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-simd-1.c 2019-05-16 15:04:42.214172571 > +0200 > @@ -0,0 +1,64 @@ > +/* { dg-additional-options "-fopenmp-simd" } */ > +/* { dg-additional-options "-mavx" { target avx_runtime } } */ > + > +#include "tree-vect.h" > + > +#define N 1024 > +int a[N]; > +int x; > + > +__attribute__((noipa)) int > +bar (void) > +{ > + return x; > +} > + > +__attribute__((noipa)) void > +foo (void) > +{ > + #pragma omp simd if (bar ()) > + for (int i = 0; i < N; ++i) > + a[i] = a[i] + 1; > +} > + > +__attribute__((noipa)) void > +baz (void) > +{ > + int c = 0; > + #pragma omp simd if (c) > + for (int i = 0; i < N; ++i) > + a[i] = a[i] + 1; > +} > + > +__attribute__((noipa)) void > +qux (void) > +{ > + int c = 1; > + #pragma omp simd if (c) > + for (int i = 0; i < N; ++i) > + a[i] = a[i] + 1; > +} > + > +int > +main () > +{ > + check_vect (); > + foo (); > + for (int i = 0; i < N; ++i) > + if (a[i] != 1) > + abort (); > + x = 1; > + foo (); > + for (int i = 0; i < N; ++i) > + if (a[i] != 2) > + abort (); > + baz (); > + for (int i = 0; i < N; ++i) > + if (a[i] != 3) > + abort (); > + qux (); > + for (int i = 0; i < N; ++i) > + if (a[i] != 4) > + abort (); > + return 0; > +} > --- gcc/testsuite/gcc.dg/vect/vect-simd-2.c.jj 2019-05-16 > 15:04:42.214172571 +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-simd-2.c 2019-05-16 15:04:42.214172571 > +0200 > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-additional-options "-fopenmp-simd" } */ > + > +#define N 1024 > +int a[N]; > +int bar (void); > + > +void > +foo (void) > +{ > + #pragma omp simd if (bar ()) > + for (int i = 0; i < N; ++i) > + a[i] = a[i] + 1; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ > +/* { dg-final { scan-tree-dump-times "created versioning for simd if > condition check" 1 "vect" } } */ > --- gcc/testsuite/gcc.dg/vect/vect-simd-3.c.jj 2019-05-16 > 15:04:42.214172571 +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-simd-3.c 2019-05-16 15:04:42.214172571 > +0200 > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-additional-options "-fopenmp-simd" } */ > + > +#define N 1024 > +int a[N]; > + > +void > +foo (void) > +{ > + int c = 0; > + #pragma omp simd if (c) > + for (int i = 0; i < N; ++i) > + a[i] = a[i] + 1; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ > --- gcc/testsuite/gcc.dg/vect/vect-simd-4.c.jj 2019-05-16 > 15:04:42.220172472 +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-simd-4.c 2019-05-16 15:04:42.220172472 > +0200 > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-additional-options "-fopenmp-simd" } */ > + > +#define N 1024 > +int a[N]; > + > +void > +foo (void) > +{ > + int c = 1; > + #pragma omp simd if (c) > + for (int i = 0; i < N; ++i) > + a[i] = a[i] + 1; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ > +/* { dg-final { scan-tree-dump-not "created versioning for simd if condition > check" "vect" } } */ > > > Jakub > -- Richard Biener <rguent...@suse.de> SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)