Hi! I've committed following patch to add C/C++ parsing of simd: schedule clause modifier, and a very rough implementation of it for schedule with chunk and dynamic schedule kinds. No idea what to do about runtime schedule, because there we don't pass a chunk size to the library routine. And for nochunk static it will need more work (well, for chunk static likely too). Best would be to arrange for the vectorizer to be able to communicate its decisions back into the schedule static decisions - the spec allows the first iteration to have even more than chunk_size rounded up to a multiple of (estimated) vectorization factor, so best would be if we e.g. decide to peel the loop for alignment etc. to schedule those iterations in the first thread and then full portion of chunk_size rounded up to vf, then second up to (last - 1)th thread doing anything always run exactly chunk_size rounded up to vf iterations and last iteration doing what is left. Any help with that would be appreciated.
Also, not sure if we shouldn't replace here omp_max_vf with the OMP_CLAUSE_SIMDLEN value if specified, that is the desired vectorization factor, so perhaps it is enough to use that. Also, omp_max_vf might be too high, it assumes the loop might contain some QImode types that would need vectorization, while if it is e.g. fully SImode+, the guess will be 4x higher than needed. Perhaps walk the loop and collect narrowest type used in there? 2015-06-12 Jakub Jelinek <ja...@redhat.com> * tree.h (OMP_CLAUSE_SCHEDULE_SIMD): Define. * omp-low.c (struct omp_for_data): Add simd_schedule field. (extract_omp_for_data): Initialize it. (omp_adjust_chunk_size): New function. (get_ws_args_for, expand_omp_for_generic, expand_omp_for_static_chunk): Use it. * tree-pretty-print.c (dump_omp_clause): Print simd: modifier on OMP_CLAUSE_SCHEDULE. c-family/ * c-omp.c (c_omp_split_clauses): Clear OMP_CLAUSE_SCHEDULE_SIMD when not combined with simd construct. c/ * c-parser.c (c_parser_omp_clause_schedule): Parse optional simd: modifier in schedule clause. cp/ * parser.c (cp_parser_omp_clause_schedule): Parse optional simd: modifier in schedule clause. testsuite/ * c-c++-common/gomp/schedule-simd-1.c: New test. --- gcc/tree.h.jj 2015-06-11 14:36:37.000000000 +0200 +++ gcc/tree.h 2015-06-11 18:22:28.413686564 +0200 @@ -1526,6 +1526,10 @@ extern void protected_set_expr_location #define OMP_CLAUSE_SCHEDULE_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SCHEDULE)->omp_clause.subcode.schedule_kind) +/* True if a SCHEDULE clause has the simd modifier on it. */ +#define OMP_CLAUSE_SCHEDULE_SIMD(NODE) \ + (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SCHEDULE)->base.public_flag) + #define OMP_CLAUSE_DEFAULT_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEFAULT)->omp_clause.subcode.default_kind) --- gcc/omp-low.c.jj 2015-06-11 11:35:02.000000000 +0200 +++ gcc/omp-low.c 2015-06-12 12:23:06.857019167 +0200 @@ -251,7 +251,7 @@ struct omp_for_data gomp_for *for_stmt; tree pre, iter_type; int collapse; - bool have_nowait, have_ordered; + bool have_nowait, have_ordered, simd_schedule; enum omp_clause_schedule_kind sched_kind; struct omp_for_data_loop *loops; }; @@ -514,6 +514,7 @@ extract_omp_for_data (gomp_for *for_stmt fd->have_ordered = false; fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; fd->chunk_size = NULL_TREE; + fd->simd_schedule = false; if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_CILKFOR) fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR; collapse_iter = NULL; @@ -532,6 +533,7 @@ extract_omp_for_data (gomp_for *for_stmt gcc_assert (!distribute && !taskloop); fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t); fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); + fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t); break; case OMP_CLAUSE_DIST_SCHEDULE: gcc_assert (distribute); @@ -870,6 +872,29 @@ workshare_safe_to_combine_p (basic_block } +static int omp_max_vf (void); + +/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier + presence (SIMD_SCHEDULE). */ + +static tree +omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) +{ + if (!simd_schedule) + return chunk_size; + + int vf = omp_max_vf (); + if (vf == 1) + return chunk_size; + + tree type = TREE_TYPE (chunk_size); + chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, + build_int_cst (type, vf - 1)); + return fold_build2 (BIT_AND_EXPR, type, chunk_size, + build_int_cst (type, -vf)); +} + + /* Collect additional arguments needed to emit a combined parallel+workshare call. WS_STMT is the workshare directive being expanded. */ @@ -917,6 +942,7 @@ get_ws_args_for (gimple par_stmt, gimple if (fd.chunk_size) { t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); + t = omp_adjust_chunk_size (t, fd.simd_schedule); ws_args->quick_push (t); } @@ -7019,6 +7045,7 @@ expand_omp_for_generic (struct omp_regio if (fd->chunk_size) { t = fold_convert (fd->iter_type, fd->chunk_size); + t = omp_adjust_chunk_size (t, fd->simd_schedule); t = build_call_expr (builtin_decl_explicit (start_fn), 6, t0, t1, t2, t, t3, t4); } @@ -7044,6 +7071,7 @@ expand_omp_for_generic (struct omp_regio { tree bfn_decl = builtin_decl_explicit (start_fn); t = fold_convert (fd->iter_type, fd->chunk_size); + t = omp_adjust_chunk_size (t, fd->simd_schedule); t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); } else @@ -7830,9 +7858,11 @@ expand_omp_for_static_chunk (struct omp_ true, NULL_TREE, true, GSI_SAME_STMT); step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), true, NULL_TREE, true, GSI_SAME_STMT); - fd->chunk_size - = force_gimple_operand_gsi (&gsi, fold_convert (itype, fd->chunk_size), - true, NULL_TREE, true, GSI_SAME_STMT); + tree chunk_size = fold_convert (itype, fd->chunk_size); + chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); + chunk_size + = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, + GSI_SAME_STMT); t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); t = fold_build2 (PLUS_EXPR, itype, step, t); @@ -7866,7 +7896,7 @@ expand_omp_for_static_chunk (struct omp_ = gimple_build_assign (trip_init, build_int_cst (itype, 0)); gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - t = fold_build2 (MULT_EXPR, itype, threadid, fd->chunk_size); + t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); t = fold_build2 (MULT_EXPR, itype, t, step); if (POINTER_TYPE_P (type)) t = fold_build_pointer_plus (n1, t); @@ -7883,11 +7913,11 @@ expand_omp_for_static_chunk (struct omp_ t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); t = fold_build2 (PLUS_EXPR, itype, t, threadid); - t = fold_build2 (MULT_EXPR, itype, t, fd->chunk_size); + t = fold_build2 (MULT_EXPR, itype, t, chunk_size); s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); - t = fold_build2 (PLUS_EXPR, itype, s0, fd->chunk_size); + t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); t = fold_build2 (MIN_EXPR, itype, t, n); e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); --- gcc/tree-pretty-print.c.jj 2015-06-11 14:43:37.000000000 +0200 +++ gcc/tree-pretty-print.c 2015-06-11 18:25:40.975760680 +0200 @@ -439,6 +439,8 @@ dump_omp_clause (pretty_printer *pp, tre case OMP_CLAUSE_SCHEDULE: pp_string (pp, "schedule("); + if (OMP_CLAUSE_SCHEDULE_SIMD (clause)) + pp_string (pp, "simd:"); switch (OMP_CLAUSE_SCHEDULE_KIND (clause)) { case OMP_CLAUSE_SCHEDULE_STATIC: --- gcc/c-family/c-omp.c.jj 2015-06-08 10:50:52.000000000 +0200 +++ gcc/c-family/c-omp.c 2015-06-11 20:07:49.845720479 +0200 @@ -766,10 +766,14 @@ c_omp_split_clauses (location_t loc, enu s = C_OMP_CLAUSE_SPLIT_PARALLEL; break; case OMP_CLAUSE_ORDERED: - case OMP_CLAUSE_SCHEDULE: case OMP_CLAUSE_NOWAIT: s = C_OMP_CLAUSE_SPLIT_FOR; break; + case OMP_CLAUSE_SCHEDULE: + s = C_OMP_CLAUSE_SPLIT_FOR; + if (code != OMP_SIMD) + OMP_CLAUSE_SCHEDULE_SIMD (clauses) = 0; + break; case OMP_CLAUSE_SAFELEN: case OMP_CLAUSE_SIMDLEN: case OMP_CLAUSE_LINEAR: --- gcc/c/c-parser.c.jj 2015-06-11 17:00:21.000000000 +0200 +++ gcc/c/c-parser.c 2015-06-11 18:41:48.136095564 +0200 @@ -11112,7 +11112,13 @@ c_parser_omp_clause_reduction (c_parser schedule-kind: static | dynamic | guided | runtime | auto -*/ + + OpenMP 4.1: + schedule ( schedule-modifier : schedule-kind ) + schedule ( schedule-modifier : schedule-kind , expression ) + + schedule-modifier: + simd */ static tree c_parser_omp_clause_schedule (c_parser *parser, tree list) @@ -11127,6 +11133,19 @@ c_parser_omp_clause_schedule (c_parser * if (c_parser_next_token_is (parser, CPP_NAME)) { + tree kind = c_parser_peek_token (parser)->value; + const char *p = IDENTIFIER_POINTER (kind); + if (strcmp ("simd", p) == 0 + && c_parser_peek_2nd_token (parser)->type == CPP_COLON) + { + OMP_CLAUSE_SCHEDULE_SIMD (c) = 1; + c_parser_consume_token (parser); + c_parser_consume_token (parser); + } + } + + if (c_parser_next_token_is (parser, CPP_NAME)) + { tree kind = c_parser_peek_token (parser)->value; const char *p = IDENTIFIER_POINTER (kind); --- gcc/cp/parser.c.jj 2015-06-11 16:59:24.000000000 +0200 +++ gcc/cp/parser.c 2015-06-11 18:42:54.267093129 +0200 @@ -28707,7 +28707,14 @@ cp_parser_omp_clause_reduction (cp_parse schedule ( schedule-kind , expression ) schedule-kind: - static | dynamic | guided | runtime | auto */ + static | dynamic | guided | runtime | auto + + OpenMP 4.1: + schedule ( schedule-modifier : schedule-kind ) + schedule ( schedule-modifier : schedule-kind , expression ) + + schedule-modifier: + simd */ static tree cp_parser_omp_clause_schedule (cp_parser *parser, tree list, location_t location) @@ -28721,6 +28728,19 @@ cp_parser_omp_clause_schedule (cp_parser if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)) { + tree id = cp_lexer_peek_token (parser->lexer)->u.value; + const char *p = IDENTIFIER_POINTER (id); + if (strcmp ("simd", p) == 0 + && cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON)) + { + OMP_CLAUSE_SCHEDULE_SIMD (c) = 1; + cp_lexer_consume_token (parser->lexer); + cp_lexer_consume_token (parser->lexer); + } + } + + if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)) + { tree id = cp_lexer_peek_token (parser->lexer)->u.value; const char *p = IDENTIFIER_POINTER (id); --- gcc/testsuite/c-c++-common/gomp/schedule-simd-1.c.jj 2015-06-12 12:49:39.030398681 +0200 +++ gcc/testsuite/c-c++-common/gomp/schedule-simd-1.c 2015-06-12 12:49:25.000000000 +0200 @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp -O2" } */ +/* { dg-additional-options "-mavx512f" { target { x86_64-*-* i?86-*-* } } } */ + +#define N 1024 +int a[N], b[N], c[N]; + +void +f1 (void) +{ + int i; + #pragma omp parallel for simd schedule (simd:static) + for (i = 0; i < N; i++) + a[i] = b[i] + c[i]; +} + +void +f2 (void) +{ + int i; + #pragma omp parallel for simd schedule (simd: static, 7) + for (i = 0; i < N; i++) + a[i] = b[i] + c[i]; +} + +void +f3 (void) +{ + int i; + #pragma omp parallel for simd schedule (simd : dynamic, 7) + for (i = 0; i < N; i++) + a[i] = b[i] + c[i]; +} + +void +f4 (void) +{ + int i; + #pragma omp parallel for simd schedule ( simd:runtime) + for (i = 0; i < N; i++) + a[i] = b[i] + c[i]; +} + +void +f5 (void) +{ + int i; + #pragma omp parallel for simd schedule (simd:auto) + for (i = 0; i < N; i++) + a[i] = b[i] + c[i]; +} Jakub