Hi! struct gomp_team has struct gomp_work_share array inside of it. If that latter structure has 64-byte aligned member in the middle, the whole struct gomp_team needs to be 64-byte aligned, but we weren't allocating it using gomp_aligned_alloc.
This patch fixes that, except that on gcn team_malloc is special, so I've instead decided at least for now to avoid using aligned member and use the padding instead on gcn. Bootstrapped/regtested on x86_64-linux and i686-linux and in the PR Rainer mentioned testing on Solaris, committed to trunk. 2021-11-18 Jakub Jelinek <ja...@redhat.com> PR libgomp/102838 * libgomp.h (GOMP_USE_ALIGNED_WORK_SHARES): Define if GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC is defined and __AMDGCN__ is not. (struct gomp_work_share): Use GOMP_USE_ALIGNED_WORK_SHARES instead of GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC. * work.c (alloc_work_share, gomp_work_share_start): Likewise. * team.c (gomp_new_team): If GOMP_USE_ALIGNED_WORK_SHARES, use gomp_aligned_alloc instead of team_malloc. --- libgomp/libgomp.h.jj 2021-11-11 14:35:37.699347142 +0100 +++ libgomp/libgomp.h 2021-11-16 11:57:26.657271188 +0100 @@ -95,6 +95,10 @@ enum memmodel #define GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC 1 #endif +#if defined(GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC) && !defined(__AMDGCN__) +#define GOMP_USE_ALIGNED_WORK_SHARES 1 +#endif + extern void *gomp_malloc (size_t) __attribute__((malloc)); extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); extern void *gomp_realloc (void *, size_t); @@ -348,7 +352,7 @@ struct gomp_work_share are in a different cache line. */ /* This lock protects the update of the following members. */ -#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC +#ifdef GOMP_USE_ALIGNED_WORK_SHARES gomp_mutex_t lock __attribute__((aligned (64))); #else char pad[64 - offsetof (struct gomp_work_share_1st_cacheline, pad)]; --- libgomp/work.c.jj 2021-10-20 09:34:47.027331304 +0200 +++ libgomp/work.c 2021-11-16 11:58:10.136662003 +0100 @@ -78,7 +78,7 @@ alloc_work_share (struct gomp_team *team team->work_share_chunk *= 2; /* Allocating gomp_work_share structures aligned is just an optimization, don't do it when using the fallback method. */ -#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC +#ifdef GOMP_USE_ALIGNED_WORK_SHARES ws = gomp_aligned_alloc (__alignof (struct gomp_work_share), team->work_share_chunk * sizeof (struct gomp_work_share)); @@ -191,7 +191,7 @@ gomp_work_share_start (size_t ordered) /* Work sharing constructs can be orphaned. */ if (team == NULL) { -#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC +#ifdef GOMP_USE_ALIGNED_WORK_SHARES ws = gomp_aligned_alloc (__alignof (struct gomp_work_share), sizeof (*ws)); #else --- libgomp/team.c.jj 2021-11-11 14:35:37.699347142 +0100 +++ libgomp/team.c 2021-11-16 11:59:46.401311440 +0100 @@ -177,7 +177,12 @@ gomp_new_team (unsigned nthreads) { size_t extra = sizeof (team->ordered_release[0]) + sizeof (team->implicit_task[0]); +#ifdef GOMP_USE_ALIGNED_WORK_SHARES + team = gomp_aligned_alloc (__alignof (struct gomp_team), + sizeof (*team) + nthreads * extra); +#else team = team_malloc (sizeof (*team) + nthreads * extra); +#endif #ifndef HAVE_SYNC_BUILTINS gomp_mutex_init (&team->work_share_list_free_lock); Jakub