Hi! The way we implement lastprivate(conditional:), where we have 2 per-thread variables, the privatized value of the variable and another private variable with the highest iteration count that stored to the privatized variable and only merge at the end of the construct doesn't really work for non-monotonic loops (admittedly, at least right now all loops are monotonic), because while the counter private variable could hold the maximum iteration count properly, the other private variable will hold the value from whatever iteration has been handled last by the thread and stored to the variable. We'd need to do the merging and clearing also at the point where the iteration goes nonmonotonically down, which would be slow.
So, the following patch just forces the use of monotonic APIs when we have lastprivate(conditional:). Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2019-06-05 Jakub Jelinek <ja...@redhat.com> * omp-expand.c (struct omp_region): Add has_lastprivate_conditional member. (expand_parallel_call): If region->inner->has_lastprivate_conditional, treat it like explicit monotonic schedule modifier. (expand_omp_for): Initialize has_lastprivate_conditional. If fd.lastprivate_conditional != 0, treat it like explicit monotonic schedule modifier. --- gcc/omp-expand.c.jj 2019-06-03 12:04:20.342277395 +0200 +++ gcc/omp-expand.c 2019-06-04 15:45:54.450078636 +0200 @@ -100,6 +100,9 @@ struct omp_region /* True if this is a combined parallel+workshare region. */ bool is_combined_parallel; + /* Copy of fd.lastprivate_conditional != 0. */ + bool has_lastprivate_conditional; + /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has a depend clause. */ gomp_ordered *ord_stmt; @@ -601,8 +604,12 @@ expand_parallel_call (struct omp_region switch (region->inner->sched_kind) { case OMP_CLAUSE_SCHEDULE_RUNTIME: - if ((region->inner->sched_modifiers - & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) + /* For lastprivate(conditional:), our implementation + requires monotonic behavior. */ + if (region->inner->has_lastprivate_conditional != 0) + start_ix2 = 3; + else if ((region->inner->sched_modifiers + & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) start_ix2 = 6; else if ((region->inner->sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0) @@ -613,7 +620,8 @@ expand_parallel_call (struct omp_region case OMP_CLAUSE_SCHEDULE_DYNAMIC: case OMP_CLAUSE_SCHEDULE_GUIDED: if ((region->inner->sched_modifiers - & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0) + & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 + && !region->inner->has_lastprivate_conditional) { start_ix2 = 3 + region->inner->sched_kind; break; @@ -6228,6 +6236,7 @@ expand_omp_for (struct omp_region *regio &fd, loops); region->sched_kind = fd.sched_kind; region->sched_modifiers = fd.sched_modifiers; + region->has_lastprivate_conditional = fd.lastprivate_conditional != 0; gcc_assert (EDGE_COUNT (region->entry->succs) == 2); BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; @@ -6280,14 +6289,16 @@ expand_omp_for (struct omp_region *regio switch (fd.sched_kind) { case OMP_CLAUSE_SCHEDULE_RUNTIME: - if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) + if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0 + && fd.lastprivate_conditional == 0) { gcc_assert (!fd.have_ordered); fn_index = 6; sched = 4; } else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 - && !fd.have_ordered) + && !fd.have_ordered + && fd.lastprivate_conditional == 0) fn_index = 7; else { @@ -6298,7 +6309,8 @@ expand_omp_for (struct omp_region *regio case OMP_CLAUSE_SCHEDULE_DYNAMIC: case OMP_CLAUSE_SCHEDULE_GUIDED: if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 - && !fd.have_ordered) + && !fd.have_ordered + && fd.lastprivate_conditional == 0) { fn_index = 3 + fd.sched_kind; sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; Jakub