Hi! This patch just makes sure (for now) that we don't vectorize loops with #pragma omp ordered simd in them (directly or inlined into them). Later on we can teach the vectorizer to handle some of the cases (but supposedly the markers would need to become stronger barriers, so that earlier optimizations don't shuffle any memory accesses across it; for now they are ok as is).
2015-09-10 Jakub Jelinek <ja...@redhat.com> * internal-fn.def (GOMP_SIMD_ORDERED_START, GOMP_SIMD_ORDERED_END): New internal functions. * omp-low.c (lower_omp_ordered): For ordered simd construct expand GOMP_SIMD_ORDERED_{START,END} internal calls around the body instead of GOMP_ordered_{start,end}, and set cfun->has_simduid_loops. * passes.def (pass_simduid_cleanup): Schedule another copy of the pass after all optimizations. * tree-inline.c (remap_gimple_stmt): Set has_simduid_loops if remapping GOMP_SIMD_ORDERED_{START,END} internal call. * tree-vectorizer.c (adjust_simduid_builtins): Remove GOMP_SIMD_ORDERED_{START,END} calls. (vectorize_loops, pass_simduid_cleanup::execute): Adjust comments. * internal-fn.c (expand_GOMP_SIMD_ORDERED_START, expand_GOMP_SIMD_ORDERED_END): New functions. * c-c++-common/gomp/ordered-1.c: New test. * c-c++-common/gomp/ordered-2.c: New test. --- gcc/internal-fn.def.jj 2015-04-29 10:59:41.000000000 +0200 +++ gcc/internal-fn.def 2015-09-10 13:49:08.053011729 +0200 @@ -44,6 +44,8 @@ DEF_INTERNAL_FN (STORE_LANES, ECF_CONST DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_SIMD_ORDERED_START, ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_SIMD_ORDERED_END, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (LOOP_VECTORIZED, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (MASK_LOAD, ECF_PURE | ECF_LEAF, NULL) DEF_INTERNAL_FN (MASK_STORE, ECF_LEAF, NULL) --- gcc/omp-low.c.jj 2015-09-07 14:42:28.000000000 +0200 +++ gcc/omp-low.c 2015-09-10 14:46:48.653692822 +0200 @@ -12327,10 +12327,13 @@ lower_omp_ordered (gimple_stmt_iterator { tree block; gimple stmt = gsi_stmt (*gsi_p); + gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt); gcall *x; gbind *bind; + bool simd + = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_SIMD); - lower_omp_ordered_clauses (as_a <gomp_ordered *> (stmt), ctx); + lower_omp_ordered_clauses (ord_stmt, ctx); push_gimplify_context (); @@ -12339,8 +12342,14 @@ lower_omp_ordered (gimple_stmt_iterator gsi_replace (gsi_p, bind, true); gimple_bind_add_stmt (bind, stmt); - x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_START), - 0); + if (simd) + { + x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_START, 0); + cfun->has_simduid_loops = true; + } + else + x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_START), + 0); gimple_bind_add_stmt (bind, x); lower_omp (gimple_omp_body_ptr (stmt), ctx); @@ -12348,7 +12357,11 @@ lower_omp_ordered (gimple_stmt_iterator gimple_bind_add_seq (bind, gimple_omp_body (stmt)); gimple_omp_set_body (stmt, NULL); - x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END), 0); + if (simd) + x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 0); + else + x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END), + 0); gimple_bind_add_stmt (bind, x); gimple_bind_add_stmt (bind, gimple_build_omp_return (true)); --- gcc/passes.def.jj 2015-09-03 16:38:07.000000000 +0200 +++ gcc/passes.def 2015-09-10 13:50:21.220920366 +0200 @@ -340,6 +340,7 @@ along with GCC; see the file COPYING3. NEXT_PASS (pass_tm_memopt); NEXT_PASS (pass_tm_edges); POP_INSERT_PASSES () + NEXT_PASS (pass_simduid_cleanup); NEXT_PASS (pass_vtable_verify); NEXT_PASS (pass_lower_vaarg); NEXT_PASS (pass_lower_vector); --- gcc/tree-inline.c.jj 2015-09-03 16:40:30.000000000 +0200 +++ gcc/tree-inline.c 2015-09-10 14:21:08.638424534 +0200 @@ -1619,6 +1619,11 @@ remap_gimple_stmt (gimple stmt, copy_bod gimple_call_set_tail (call_stmt, false); if (gimple_call_from_thunk_p (call_stmt)) gimple_call_set_from_thunk (call_stmt, false); + if (gimple_call_internal_p (call_stmt) + && IN_RANGE (gimple_call_internal_fn (call_stmt), + IFN_GOMP_SIMD_ORDERED_START, + IFN_GOMP_SIMD_ORDERED_END)) + DECL_STRUCT_FUNCTION (id->dst_fn)->has_simduid_loops = true; } /* Remap the region numbers for __builtin_eh_{pointer,filter}, --- gcc/tree-vectorizer.c.jj 2015-09-03 16:39:22.000000000 +0200 +++ gcc/tree-vectorizer.c 2015-09-10 14:31:40.860067887 +0200 @@ -149,8 +149,9 @@ simd_array_to_simduid::equal (const simd return p1->decl == p2->decl; } -/* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF and IFN_GOMP_SIMD_LAST_LANE - into their corresponding constants. */ +/* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE, + into their corresponding constants and remove + IFN_GOMP_SIMD_ORDERED_{START,END}. */ static void adjust_simduid_builtins (hash_table<simduid_to_vf> *htab) @@ -161,7 +162,7 @@ adjust_simduid_builtins (hash_table<simd { gimple_stmt_iterator i; - for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i)) + for (i = gsi_start_bb (bb); !gsi_end_p (i); ) { unsigned int vf = 1; enum internal_fn ifn; @@ -169,7 +170,10 @@ adjust_simduid_builtins (hash_table<simd tree t; if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) - continue; + { + gsi_next (&i); + continue; + } ifn = gimple_call_internal_fn (stmt); switch (ifn) { @@ -177,7 +181,13 @@ adjust_simduid_builtins (hash_table<simd case IFN_GOMP_SIMD_VF: case IFN_GOMP_SIMD_LAST_LANE: break; + case IFN_GOMP_SIMD_ORDERED_START: + case IFN_GOMP_SIMD_ORDERED_END: + gsi_remove (&i, true); + unlink_stmt_vdef (stmt); + continue; default: + gsi_next (&i); continue; } tree arg = gimple_call_arg (stmt, 0); @@ -206,6 +216,7 @@ adjust_simduid_builtins (hash_table<simd gcc_unreachable (); } update_call_from_tree (&i, t); + gsi_next (&i); } } } @@ -568,7 +579,7 @@ vectorize_loops (void) free_stmt_vec_info_vec (); - /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE} builtins. */ + /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ if (cfun->has_simduid_loops) adjust_simduid_builtins (simduid_to_vf_htab); @@ -630,7 +641,7 @@ pass_simduid_cleanup::execute (function note_simd_array_uses (&simd_array_to_simduid_htab); - /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE} builtins. */ + /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ adjust_simduid_builtins (NULL); /* Shrink any "omp array simd" temporary arrays to the --- gcc/internal-fn.c.jj 2015-09-03 16:36:22.000000000 +0200 +++ gcc/internal-fn.c 2015-09-10 14:45:02.454253630 +0200 @@ -175,6 +175,22 @@ expand_GOMP_SIMD_LAST_LANE (gcall *) gcc_unreachable (); } +/* This should get expanded in adjust_simduid_builtins. */ + +static void +expand_GOMP_SIMD_ORDERED_START (gcall *) +{ + gcc_unreachable (); +} + +/* This should get expanded in adjust_simduid_builtins. */ + +static void +expand_GOMP_SIMD_ORDERED_END (gcall *) +{ + gcc_unreachable (); +} + /* This should get expanded in the sanopt pass. */ static void --- gcc/testsuite/c-c++-common/gomp/ordered-1.c.jj 2015-09-10 15:14:57.959090756 +0200 +++ gcc/testsuite/c-c++-common/gomp/ordered-1.c 2015-09-10 15:20:26.115259937 +0200 @@ -0,0 +1,91 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define N 1024 +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +int last; + +void +bar (unsigned char *a, int i, int safelen) +{ + int j, k; + if (i != last++) + abort (); + for (j = i - safelen - 32; j < i; j++) + if (j >= 0 && a[j] != 2) + break; + if (j <= i - safelen || a[j] != 1) + abort (); + for (k = j; k < i + safelen + 32; k++) + if (k >= N || a[k] != 1) + break; + if (k <= i || k > j + safelen) + abort (); + if (k < N && a[k] != 0) + abort (); + for (; k < i + safelen + 32; k++) + if (k < N && a[k] != 0) + abort (); +} + +static inline void +foo (unsigned char *a, int i) +{ + #pragma omp ordered simd + bar (a, i, 64); +} + +int +main () +{ + unsigned char a[N], b[N]; + int i; + #pragma omp simd + for (i = 0; i < N; i++) + a[i] = 0; + #pragma omp simd safelen (64) + for (i = 0; i < N; i++) + { + a[i]++; + foo (a, i); + a[i]++; + } + #pragma omp simd + for (i = 0; i < N; i++) + { + a[i] = 0; + b[i] = 0; + } + last = 0; + #pragma omp simd safelen (32) + for (i = 0; i < N; i++) + { + a[i]++; + #pragma omp ordered simd + bar (a, i, 32); + a[i]++; + } + for (i = 0; i < N; i++) + if (a[i] != 2) + abort (); + #pragma omp simd safelen (32) + for (i = 1; i < N; i++) + { + #pragma omp ordered simd + b[i] = b[i - 1] + 1; + a[i]++; + #pragma omp ordered simd + a[i] += a[i - 1]; + } + for (i = 0; i < N; i++) + if (a[i] != (unsigned char) (2 + 3 * i) || b[i] != (unsigned char) i) + abort (); + return 0; +} --- gcc/testsuite/c-c++-common/gomp/ordered-2.c.jj 2015-09-10 15:16:02.527140243 +0200 +++ gcc/testsuite/c-c++-common/gomp/ordered-2.c 2015-09-10 15:15:58.345201806 +0200 @@ -0,0 +1,4 @@ +/* { dg-do run } */ +/* { dg-options "-O0 -fopenmp-simd" } */ + +#include "ordered-1.c" Jakub