Hi! In implicit parallel regions, we have NULL teams and often NULL task. For task reductions we need both non-NULL, so this patch creates such a team in that case, like we do already for target nowait.
Tested on x86_64-linux, committed to gomp-5_0-branch. 2018-10-08 Jakub Jelinek <ja...@redhat.com> * task.c (GOMP_taskgroup_reduction_register): If team is NULL, create implicit team with 1 thread and call GOMP_taskgroup_start again. Don't mix declarations with statements. * team.c (gomp_team_end): Determine nesting by thr->ts.level != 0 rather than thr->ts.team != NULL. * testsuite/libgomp.c-c++-common/task-reduction-4.c: New test. --- libgomp/task.c.jj 2018-10-08 12:20:53.712125100 +0200 +++ libgomp/task.c 2018-10-08 18:29:51.410292170 +0200 @@ -1968,11 +1968,45 @@ GOMP_taskgroup_reduction_register (uintp { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; - struct gomp_task *task = thr->task; - unsigned nthreads = team ? team->nthreads : 1; + struct gomp_task *task; + if (__builtin_expect (team == NULL, 0)) + { + /* The task reduction code needs a team and task, so for + orphaned taskgroups just create the implicit team. */ + struct gomp_task_icv *icv; + team = gomp_new_team (1); + task = thr->task; + icv = task ? &task->icv : &gomp_global_icv; + team->prev_ts = thr->ts; + thr->ts.team = team; + thr->ts.team_id = 0; + thr->ts.work_share = &team->work_shares[0]; + thr->ts.last_work_share = NULL; +#ifdef HAVE_SYNC_BUILTINS + thr->ts.single_count = 0; +#endif + thr->ts.static_trip = 0; + thr->task = &team->implicit_task[0]; + gomp_init_task (thr->task, NULL, icv); + if (task) + { + thr->task = task; + gomp_end_task (); + free (task); + thr->task = &team->implicit_task[0]; + } +#ifdef LIBGOMP_USE_PTHREADS + else + pthread_setspecific (gomp_thread_destructor, thr); +#endif + GOMP_taskgroup_start (); + } + unsigned nthreads = team->nthreads; size_t total_cnt = 0; - uintptr_t *d = data; - uintptr_t *old = task->taskgroup->reductions; + uintptr_t *d = data, *old; + struct htab *old_htab = NULL, *new_htab; + task = thr->task; + old = task->taskgroup->reductions; do { size_t sz = d[1] * nthreads; @@ -1992,13 +2026,12 @@ GOMP_taskgroup_reduction_register (uintp d = (uintptr_t *) d[4]; } while (1); - struct htab *old_htab = NULL; if (old && old[5]) { old_htab = (struct htab *) old[5]; total_cnt += htab_elements (old_htab); } - struct htab *new_htab = htab_create (total_cnt); + new_htab = htab_create (total_cnt); if (old_htab) { /* Copy old hash table, like in htab_expand. */ --- libgomp/team.c.jj 2018-07-27 12:57:16.000000000 +0200 +++ libgomp/team.c 2018-10-08 19:05:58.135130888 +0200 @@ -945,7 +945,7 @@ gomp_team_end (void) gomp_end_task (); thr->ts = team->prev_ts; - if (__builtin_expect (thr->ts.team != NULL, 0)) + if (__builtin_expect (thr->ts.level != 0, 0)) { #ifdef HAVE_SYNC_BUILTINS __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); --- libgomp/testsuite/libgomp.c-c++-common/task-reduction-4.c.jj 2018-10-08 18:35:52.181268647 +0200 +++ libgomp/testsuite/libgomp.c-c++-common/task-reduction-4.c 2018-10-08 18:35:52.181268647 +0200 @@ -0,0 +1,70 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +void +bar (long long int *p) +{ + p[0] *= 2; + #pragma omp task in_reduction (*: p[0]) + p[0] *= 3; +} + +void +foo (long long int *p, long long int *q) +{ + #pragma omp taskgroup task_reduction (*: p[0]) + { + #pragma omp task in_reduction (*: p[0]) + bar (p); + #pragma omp task in_reduction (*: p[0]) + bar (p); + bar (p); + #pragma omp taskgroup task_reduction (*: q[0]) + { + #pragma omp task in_reduction (*: q[0]) + bar (q); + #pragma omp task in_reduction (*: q[0]) + bar (q); + #pragma omp task in_reduction (*: q[0]) + bar (q); + bar (q); + #pragma omp task in_reduction (*: p[0]) + { + #pragma omp taskgroup task_reduction (*: p[0]) + { + #pragma omp task in_reduction (*: p[0]) + bar (p); + p[0] *= 2; + #pragma omp task in_reduction (*: p[0]) + bar (p); + } + } + } + } +} + +int +main () +{ + long long int p = 1LL, q = 1LL; + foo (&p, &q); + if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL) + abort (); + p = 1LL; + q = 1LL; + #pragma omp taskgroup + foo (&p, &q); + if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL) + abort (); + p = 1LL; + q = 1LL; + #pragma omp parallel + #pragma omp single + foo (&p, &q); + if (p != 6LL * 6LL * 6LL * 6LL * 6LL * 2LL || q != 6LL * 6LL * 6LL * 6LL) + abort (); + return 0; +} Jakub