Hi!

The way we implement lastprivate(conditional:), where we have 2 per-thread
variables, the privatized value of the variable and another private variable
with the highest iteration count that stored to the privatized variable and
only merge at the end of the construct doesn't really work for non-monotonic
loops (admittedly, at least right now all loops are monotonic), because
while the counter private variable could hold the maximum iteration count
properly, the other private variable will hold the value from whatever
iteration has been handled last by the thread and stored to the variable.
We'd need to do the merging and clearing also at the point where the
iteration goes nonmonotonically down, which would be slow.

So, the following patch just forces the use of monotonic APIs when we have
lastprivate(conditional:).

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-06-05  Jakub Jelinek  <ja...@redhat.com>

        * omp-expand.c (struct omp_region): Add has_lastprivate_conditional
        member.
        (expand_parallel_call): If region->inner->has_lastprivate_conditional,
        treat it like explicit monotonic schedule modifier.
        (expand_omp_for): Initialize has_lastprivate_conditional.
        If fd.lastprivate_conditional != 0, treat it like explicit monotonic
        schedule modifier.

--- gcc/omp-expand.c.jj 2019-06-03 12:04:20.342277395 +0200
+++ gcc/omp-expand.c    2019-06-04 15:45:54.450078636 +0200
@@ -100,6 +100,9 @@ struct omp_region
   /* True if this is a combined parallel+workshare region.  */
   bool is_combined_parallel;
 
+  /* Copy of fd.lastprivate_conditional != 0.  */
+  bool has_lastprivate_conditional;
+
   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
      a depend clause.  */
   gomp_ordered *ord_stmt;
@@ -601,8 +604,12 @@ expand_parallel_call (struct omp_region
          switch (region->inner->sched_kind)
            {
            case OMP_CLAUSE_SCHEDULE_RUNTIME:
-             if ((region->inner->sched_modifiers
-                  & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
+             /* For lastprivate(conditional:), our implementation
+                requires monotonic behavior.  */
+             if (region->inner->has_lastprivate_conditional != 0)
+               start_ix2 = 3;
+             else if ((region->inner->sched_modifiers
+                      & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
                start_ix2 = 6;
              else if ((region->inner->sched_modifiers
                        & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
@@ -613,7 +620,8 @@ expand_parallel_call (struct omp_region
            case OMP_CLAUSE_SCHEDULE_DYNAMIC:
            case OMP_CLAUSE_SCHEDULE_GUIDED:
              if ((region->inner->sched_modifiers
-                  & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
+                  & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
+                 && !region->inner->has_lastprivate_conditional)
                {
                  start_ix2 = 3 + region->inner->sched_kind;
                  break;
@@ -6228,6 +6236,7 @@ expand_omp_for (struct omp_region *regio
                        &fd, loops);
   region->sched_kind = fd.sched_kind;
   region->sched_modifiers = fd.sched_modifiers;
+  region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
 
   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
@@ -6280,14 +6289,16 @@ expand_omp_for (struct omp_region *regio
       switch (fd.sched_kind)
        {
        case OMP_CLAUSE_SCHEDULE_RUNTIME:
-         if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
+         if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
+             && fd.lastprivate_conditional == 0)
            {
              gcc_assert (!fd.have_ordered);
              fn_index = 6;
              sched = 4;
            }
          else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
-                  && !fd.have_ordered)
+                  && !fd.have_ordered
+                  && fd.lastprivate_conditional == 0)
            fn_index = 7;
          else
            {
@@ -6298,7 +6309,8 @@ expand_omp_for (struct omp_region *regio
        case OMP_CLAUSE_SCHEDULE_DYNAMIC:
        case OMP_CLAUSE_SCHEDULE_GUIDED:
          if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
-             && !fd.have_ordered)
+             && !fd.have_ordered
+             && fd.lastprivate_conditional == 0)
            {
              fn_index = 3 + fd.sched_kind;
              sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;

        Jakub

Reply via email to