From: Matthew Malcomson <[email protected]>
Apologies for the re-send: There is a flaky bug me and my collegues are
having w.r.t. emails having incorrect headers and getting rejected from
gcc-patches mailing list.
Re-sending including Cc's to target maintainers.
-------- >8 ----------------------- 8< -----------------
In PR122314 we noticed that our implementation of a barrier could
execute tasks from the next "Task scheduling" region. This was because
of a race condition where a barrier could be "completed", and some
thread raced ahead to schedule another task on the "next" barrier all
before some other thread checks for a bit on the generation number to
tell if there is a task pending.
The solution provided here is to check whether the generation number has
"incremented" past the state that this barrier was entered with. As it
happens the `state` variable already provided to
`gomp_barrier_handle_tasks` is enough for the targets to tell whether
the current global generation has incremented from the existing one.
This requires some changes in the two loops in bar.c that are waiting on
tasks being available. These loops now need to check for "generation
has incremented" rather than "generation is identical to one increment
forward". Without such an adjustment of the check a thread that is
refusing to execute tasks because they have been scheduled for the next
barrier will not continue into the next region until some other thread
has completed the task (and removed the BAR_TASK_PENDING flag).
This problem could be seen by a hang in testcases like
task-reduction-13.c.
--------------
While I've built each of the gcn/nvptx/rtems targets that I've changed,
I've not ran the testsuite with them. I would appreciate help from the
relevant target maintainers with this.
Testing done:
- Bootstrap & regtest on aarch64 and x86_64.
- With & without _LIBGOMP_CHECKING_.
- Testsuite with & without OMP_WAIT_POLICY=passive
- Cross compilation & regtest on arm.
- TSAN done on this as part of all my upstream patches.
libgomp/ChangeLog:
PR libgomp/122314
* config/gcn/bar.c (gomp_team_barrier_wait_end): Use
gomp_barrier_state_is_incremented.
(gomp_team_barrier_wait_cancel_end): Likewise.
* config/gcn/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* config/linux/bar.c (gomp_team_barrier_wait_end): Use
gomp_barrier_state_is_incremented.
(gomp_team_barrier_wait_cancel_end): Likewise.
* config/linux/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* config/nvptx/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* config/posix/bar.c (gomp_team_barrier_wait_end): Use
gomp_barrier_state_is_incremented.
(gomp_team_barrier_wait_cancel_end): Likewise
* config/posix/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* config/rtems/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* task.c (gomp_barrier_handle_tasks): Use
gomp_barrier_has_completed.
* testsuite/libgomp.c/pr122314.c: New test.
Signed-off-by: Matthew Malcomson <[email protected]>
---
libgomp/config/gcn/bar.c | 4 +--
libgomp/config/gcn/bar.h | 16 ++++++++++++
libgomp/config/linux/bar.c | 4 +--
libgomp/config/linux/bar.h | 16 ++++++++++++
libgomp/config/nvptx/bar.h | 16 ++++++++++++
libgomp/config/posix/bar.c | 4 +--
libgomp/config/posix/bar.h | 16 ++++++++++++
libgomp/config/rtems/bar.h | 16 ++++++++++++
libgomp/task.c | 17 ++++++++++++
libgomp/testsuite/libgomp.c/pr122314.c | 36 ++++++++++++++++++++++++++
10 files changed, 139 insertions(+), 6 deletions(-)
create mode 100644 libgomp/testsuite/libgomp.c/pr122314.c
diff --git a/libgomp/config/gcn/bar.c b/libgomp/config/gcn/bar.c
index 57ac648477e..05daa8fcbbc 100644
--- a/libgomp/config/gcn/bar.c
+++ b/libgomp/config/gcn/bar.c
@@ -128,7 +128,7 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
}
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
}
void
@@ -207,7 +207,7 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
}
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
return false;
}
diff --git a/libgomp/config/gcn/bar.h b/libgomp/config/gcn/bar.h
index b62d3af6dee..8fdd6465822 100644
--- a/libgomp/config/gcn/bar.h
+++ b/libgomp/config/gcn/bar.h
@@ -165,4 +165,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/config/linux/bar.c b/libgomp/config/linux/bar.c
index 1f4f9e4e6f5..2c9d1ce894d 100644
--- a/libgomp/config/linux/bar.c
+++ b/libgomp/config/linux/bar.c
@@ -118,7 +118,7 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
}
void
@@ -185,7 +185,7 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
return false;
}
diff --git a/libgomp/config/linux/bar.h b/libgomp/config/linux/bar.h
index 6bbaa9603ba..9fb514526c4 100644
--- a/libgomp/config/linux/bar.h
+++ b/libgomp/config/linux/bar.h
@@ -165,4 +165,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/config/nvptx/bar.h b/libgomp/config/nvptx/bar.h
index a84b746978d..353795e9a59 100644
--- a/libgomp/config/nvptx/bar.h
+++ b/libgomp/config/nvptx/bar.h
@@ -169,4 +169,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/config/posix/bar.c b/libgomp/config/posix/bar.c
index 3757dfb8fff..d728c9c0afe 100644
--- a/libgomp/config/posix/bar.c
+++ b/libgomp/config/posix/bar.c
@@ -156,7 +156,7 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
}
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
#ifdef HAVE_SYNC_BUILTINS
n = __sync_add_and_fetch (&bar->arrived, -1);
@@ -228,7 +228,7 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
break;
}
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
#ifdef HAVE_SYNC_BUILTINS
n = __sync_add_and_fetch (&bar->arrived, -1);
diff --git a/libgomp/config/posix/bar.h b/libgomp/config/posix/bar.h
index c88f7588be4..a48e99488bb 100644
--- a/libgomp/config/posix/bar.h
+++ b/libgomp/config/posix/bar.h
@@ -155,4 +155,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/config/rtems/bar.h b/libgomp/config/rtems/bar.h
index 0d10efa28b1..ea39679e36d 100644
--- a/libgomp/config/rtems/bar.h
+++ b/libgomp/config/rtems/bar.h
@@ -167,4 +167,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/task.c b/libgomp/task.c
index 88e23aab816..b7f4b8220c2 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -1559,6 +1559,23 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
int do_wake = 0;
gomp_mutex_lock (&team->task_lock);
+ /* Avoid running tasks from next task scheduling region (PR122314).
+ N.b. we check that `team->task_count != 0` in order to avoid the
+ non-atomic read of `bar->generation` "conflicting" (in the C standard
+ sense) with the atomic write of `bar->generation` in
+ `gomp_team_barrier_wait_end`. That conflict would otherwise be a
+ data-race and hence UB. One alternate approach could have been to
+ atomically load `bar->generation` in `gomp_barrier_has_completed`.
+
+ When `task_count == 0` we're not going to perform tasks anyway, so the
+ problem of PR122314 is naturally avoided. */
+ if (team->task_count != 0
+ && gomp_barrier_has_completed (state, &team->barrier))
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ return;
+ }
+
if (gomp_barrier_last_thread (state))
{
if (team->task_count == 0)
diff --git a/libgomp/testsuite/libgomp.c/pr122314.c
b/libgomp/testsuite/libgomp.c/pr122314.c
new file mode 100644
index 00000000000..60d06f5ea57
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr122314.c
@@ -0,0 +1,36 @@
+#include <omp.h>
+
+void abort ();
+
+#define NUM_THREADS 8
+unsigned full_data[NUM_THREADS] = {0};
+void
+test ()
+{
+#pragma omp parallel num_threads(8)
+ {
+#pragma omp barrier
+ /* Initialise so that if tasks are performed on the previous barrier their
+ updates get overridden. This is a key behaviour of this test. */
+ full_data[omp_get_thread_num ()] = 0;
+#pragma omp for
+ for (int i = 0; i < 10; i++)
+#pragma omp task
+ {
+ full_data[omp_get_thread_num ()] += 1;
+ }
+ }
+
+ unsigned total = 0;
+ for (int i = 0; i < NUM_THREADS; i++)
+ total += full_data[i];
+
+ if (total != 10)
+ abort ();
+}
+
+int
+main ()
+{
+ test ();
+}
--
2.43.0