Hi!

I've committed following patch to add C/C++ parsing of simd: schedule
clause modifier, and a very rough implementation of it for
schedule with chunk and dynamic schedule kinds.
No idea what to do about runtime schedule, because there we don't pass
a chunk size to the library routine.
And for nochunk static it will need more work (well, for chunk static
likely too).  Best would be to arrange for the vectorizer to be able to
communicate its decisions back into the schedule static decisions
- the spec allows the first iteration to have even more than chunk_size
rounded up to a multiple of (estimated) vectorization factor, so best would
be if we e.g. decide to peel the loop for alignment etc. to schedule those
iterations in the first thread and then full portion of chunk_size rounded
up to vf, then second up to (last - 1)th thread doing anything always
run exactly chunk_size rounded up to vf iterations and last iteration doing
what is left.  Any help with that would be appreciated.

Also, not sure if we shouldn't replace here omp_max_vf with the
OMP_CLAUSE_SIMDLEN value if specified, that is the desired vectorization
factor, so perhaps it is enough to use that.  Also, omp_max_vf might be too
high, it assumes the loop might contain some QImode types that would need
vectorization, while if it is e.g. fully SImode+, the guess will be 4x
higher than needed.  Perhaps walk the loop and collect narrowest type used
in there?

2015-06-12  Jakub Jelinek  <ja...@redhat.com>

        * tree.h (OMP_CLAUSE_SCHEDULE_SIMD): Define.
        * omp-low.c (struct omp_for_data): Add simd_schedule field.
        (extract_omp_for_data): Initialize it.
        (omp_adjust_chunk_size): New function.
        (get_ws_args_for, expand_omp_for_generic,
        expand_omp_for_static_chunk): Use it.
        * tree-pretty-print.c (dump_omp_clause): Print simd: modifier
        on OMP_CLAUSE_SCHEDULE.
c-family/
        * c-omp.c (c_omp_split_clauses): Clear OMP_CLAUSE_SCHEDULE_SIMD
        when not combined with simd construct.
c/
        * c-parser.c (c_parser_omp_clause_schedule): Parse optional
        simd: modifier in schedule clause.
cp/
        * parser.c (cp_parser_omp_clause_schedule): Parse optional
        simd: modifier in schedule clause.
testsuite/
        * c-c++-common/gomp/schedule-simd-1.c: New test.

--- gcc/tree.h.jj       2015-06-11 14:36:37.000000000 +0200
+++ gcc/tree.h  2015-06-11 18:22:28.413686564 +0200
@@ -1526,6 +1526,10 @@ extern void protected_set_expr_location
 #define OMP_CLAUSE_SCHEDULE_KIND(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, 
OMP_CLAUSE_SCHEDULE)->omp_clause.subcode.schedule_kind)
 
+/* True if a SCHEDULE clause has the simd modifier on it.  */
+#define OMP_CLAUSE_SCHEDULE_SIMD(NODE) \
+  (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SCHEDULE)->base.public_flag)
+
 #define OMP_CLAUSE_DEFAULT_KIND(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, 
OMP_CLAUSE_DEFAULT)->omp_clause.subcode.default_kind)
 
--- gcc/omp-low.c.jj    2015-06-11 11:35:02.000000000 +0200
+++ gcc/omp-low.c       2015-06-12 12:23:06.857019167 +0200
@@ -251,7 +251,7 @@ struct omp_for_data
   gomp_for *for_stmt;
   tree pre, iter_type;
   int collapse;
-  bool have_nowait, have_ordered;
+  bool have_nowait, have_ordered, simd_schedule;
   enum omp_clause_schedule_kind sched_kind;
   struct omp_for_data_loop *loops;
 };
@@ -514,6 +514,7 @@ extract_omp_for_data (gomp_for *for_stmt
   fd->have_ordered = false;
   fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   fd->chunk_size = NULL_TREE;
+  fd->simd_schedule = false;
   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
     fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR;
   collapse_iter = NULL;
@@ -532,6 +533,7 @@ extract_omp_for_data (gomp_for *for_stmt
        gcc_assert (!distribute && !taskloop);
        fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t);
        fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
+       fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t);
        break;
       case OMP_CLAUSE_DIST_SCHEDULE:
        gcc_assert (distribute);
@@ -870,6 +872,29 @@ workshare_safe_to_combine_p (basic_block
 }
 
 
+static int omp_max_vf (void);
+
+/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
+   presence (SIMD_SCHEDULE).  */
+
+static tree
+omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
+{
+  if (!simd_schedule)
+    return chunk_size;
+
+  int vf = omp_max_vf ();
+  if (vf == 1)
+    return chunk_size;
+
+  tree type = TREE_TYPE (chunk_size);
+  chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
+                           build_int_cst (type, vf - 1));
+  return fold_build2 (BIT_AND_EXPR, type, chunk_size,
+                     build_int_cst (type, -vf));
+}
+
+
 /* Collect additional arguments needed to emit a combined
    parallel+workshare call.  WS_STMT is the workshare directive being
    expanded.  */
@@ -917,6 +942,7 @@ get_ws_args_for (gimple par_stmt, gimple
       if (fd.chunk_size)
        {
          t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
+         t = omp_adjust_chunk_size (t, fd.simd_schedule);
          ws_args->quick_push (t);
        }
 
@@ -7019,6 +7045,7 @@ expand_omp_for_generic (struct omp_regio
          if (fd->chunk_size)
            {
              t = fold_convert (fd->iter_type, fd->chunk_size);
+             t = omp_adjust_chunk_size (t, fd->simd_schedule);
              t = build_call_expr (builtin_decl_explicit (start_fn),
                                   6, t0, t1, t2, t, t3, t4);
            }
@@ -7044,6 +7071,7 @@ expand_omp_for_generic (struct omp_regio
            {
              tree bfn_decl = builtin_decl_explicit (start_fn);
              t = fold_convert (fd->iter_type, fd->chunk_size);
+             t = omp_adjust_chunk_size (t, fd->simd_schedule);
              t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
            }
          else
@@ -7830,9 +7858,11 @@ expand_omp_for_static_chunk (struct omp_
                                 true, NULL_TREE, true, GSI_SAME_STMT);
   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
                                   true, NULL_TREE, true, GSI_SAME_STMT);
-  fd->chunk_size
-    = force_gimple_operand_gsi (&gsi, fold_convert (itype, fd->chunk_size),
-                               true, NULL_TREE, true, GSI_SAME_STMT);
+  tree chunk_size = fold_convert (itype, fd->chunk_size);
+  chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
+  chunk_size
+    = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
+                               GSI_SAME_STMT);
 
   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
   t = fold_build2 (PLUS_EXPR, itype, step, t);
@@ -7866,7 +7896,7 @@ expand_omp_for_static_chunk (struct omp_
     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
 
-  t = fold_build2 (MULT_EXPR, itype, threadid, fd->chunk_size);
+  t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
   t = fold_build2 (MULT_EXPR, itype, t, step);
   if (POINTER_TYPE_P (type))
     t = fold_build_pointer_plus (n1, t);
@@ -7883,11 +7913,11 @@ expand_omp_for_static_chunk (struct omp_
 
   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
-  t = fold_build2 (MULT_EXPR, itype, t, fd->chunk_size);
+  t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                 false, GSI_CONTINUE_LINKING);
 
-  t = fold_build2 (PLUS_EXPR, itype, s0, fd->chunk_size);
+  t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
   t = fold_build2 (MIN_EXPR, itype, t, n);
   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                 false, GSI_CONTINUE_LINKING);
--- gcc/tree-pretty-print.c.jj  2015-06-11 14:43:37.000000000 +0200
+++ gcc/tree-pretty-print.c     2015-06-11 18:25:40.975760680 +0200
@@ -439,6 +439,8 @@ dump_omp_clause (pretty_printer *pp, tre
 
     case OMP_CLAUSE_SCHEDULE:
       pp_string (pp, "schedule(");
+      if (OMP_CLAUSE_SCHEDULE_SIMD (clause))
+       pp_string (pp, "simd:");
       switch (OMP_CLAUSE_SCHEDULE_KIND (clause))
        {
        case OMP_CLAUSE_SCHEDULE_STATIC:
--- gcc/c-family/c-omp.c.jj     2015-06-08 10:50:52.000000000 +0200
+++ gcc/c-family/c-omp.c        2015-06-11 20:07:49.845720479 +0200
@@ -766,10 +766,14 @@ c_omp_split_clauses (location_t loc, enu
          s = C_OMP_CLAUSE_SPLIT_PARALLEL;
          break;
        case OMP_CLAUSE_ORDERED:
-       case OMP_CLAUSE_SCHEDULE:
        case OMP_CLAUSE_NOWAIT:
          s = C_OMP_CLAUSE_SPLIT_FOR;
          break;
+       case OMP_CLAUSE_SCHEDULE:
+         s = C_OMP_CLAUSE_SPLIT_FOR;
+         if (code != OMP_SIMD)
+           OMP_CLAUSE_SCHEDULE_SIMD (clauses) = 0;
+         break;
        case OMP_CLAUSE_SAFELEN:
        case OMP_CLAUSE_SIMDLEN:
        case OMP_CLAUSE_LINEAR:
--- gcc/c/c-parser.c.jj 2015-06-11 17:00:21.000000000 +0200
+++ gcc/c/c-parser.c    2015-06-11 18:41:48.136095564 +0200
@@ -11112,7 +11112,13 @@ c_parser_omp_clause_reduction (c_parser
 
    schedule-kind:
      static | dynamic | guided | runtime | auto
-*/
+
+   OpenMP 4.1:
+   schedule ( schedule-modifier : schedule-kind )
+   schedule ( schedule-modifier : schedule-kind , expression )
+
+   schedule-modifier:
+     simd  */
 
 static tree
 c_parser_omp_clause_schedule (c_parser *parser, tree list)
@@ -11127,6 +11133,19 @@ c_parser_omp_clause_schedule (c_parser *
 
   if (c_parser_next_token_is (parser, CPP_NAME))
     {
+      tree kind = c_parser_peek_token (parser)->value;
+      const char *p = IDENTIFIER_POINTER (kind);
+      if (strcmp ("simd", p) == 0
+         && c_parser_peek_2nd_token (parser)->type == CPP_COLON)
+       {
+         OMP_CLAUSE_SCHEDULE_SIMD (c) = 1;
+         c_parser_consume_token (parser);
+         c_parser_consume_token (parser);
+       }
+    }
+
+  if (c_parser_next_token_is (parser, CPP_NAME))
+    {
       tree kind = c_parser_peek_token (parser)->value;
       const char *p = IDENTIFIER_POINTER (kind);
 
--- gcc/cp/parser.c.jj  2015-06-11 16:59:24.000000000 +0200
+++ gcc/cp/parser.c     2015-06-11 18:42:54.267093129 +0200
@@ -28707,7 +28707,14 @@ cp_parser_omp_clause_reduction (cp_parse
    schedule ( schedule-kind , expression )
 
    schedule-kind:
-     static | dynamic | guided | runtime | auto  */
+     static | dynamic | guided | runtime | auto
+
+   OpenMP 4.1:
+   schedule ( schedule-modifier : schedule-kind )
+   schedule ( schedule-modifier : schedule-kind , expression )
+
+   schedule-modifier:
+     simd  */
 
 static tree
 cp_parser_omp_clause_schedule (cp_parser *parser, tree list, location_t 
location)
@@ -28721,6 +28728,19 @@ cp_parser_omp_clause_schedule (cp_parser
 
   if (cp_lexer_next_token_is (parser->lexer, CPP_NAME))
     {
+      tree id = cp_lexer_peek_token (parser->lexer)->u.value;
+      const char *p = IDENTIFIER_POINTER (id);
+      if (strcmp ("simd", p) == 0
+         && cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON))
+       {
+         OMP_CLAUSE_SCHEDULE_SIMD (c) = 1;
+         cp_lexer_consume_token (parser->lexer);
+         cp_lexer_consume_token (parser->lexer);
+       }
+    }
+
+  if (cp_lexer_next_token_is (parser->lexer, CPP_NAME))
+    {
       tree id = cp_lexer_peek_token (parser->lexer)->u.value;
       const char *p = IDENTIFIER_POINTER (id);
 
--- gcc/testsuite/c-c++-common/gomp/schedule-simd-1.c.jj        2015-06-12 
12:49:39.030398681 +0200
+++ gcc/testsuite/c-c++-common/gomp/schedule-simd-1.c   2015-06-12 
12:49:25.000000000 +0200
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2" } */
+/* { dg-additional-options "-mavx512f" { target { x86_64-*-* i?86-*-* } } } */
+
+#define N 1024
+int a[N], b[N], c[N];
+
+void
+f1 (void)
+{
+  int i;
+  #pragma omp parallel for simd schedule (simd:static)
+  for (i = 0; i < N; i++)
+    a[i] = b[i] + c[i];
+}
+
+void
+f2 (void)
+{
+  int i;
+  #pragma omp parallel for simd schedule (simd: static, 7)
+  for (i = 0; i < N; i++)
+    a[i] = b[i] + c[i];
+}
+
+void
+f3 (void)
+{
+  int i;
+  #pragma omp parallel for simd schedule (simd : dynamic, 7)
+  for (i = 0; i < N; i++)
+    a[i] = b[i] + c[i];
+}
+
+void
+f4 (void)
+{
+  int i;
+  #pragma omp parallel for simd schedule ( simd:runtime)
+  for (i = 0; i < N; i++)
+    a[i] = b[i] + c[i];
+}
+
+void
+f5 (void)
+{
+  int i;
+  #pragma omp parallel for simd schedule (simd:auto)
+  for (i = 0; i < N; i++)
+    a[i] = b[i] + c[i];
+}

        Jakub

Reply via email to