Hi! The gomp_work_share struct is designed to have first half mostly readonly, set once, and the second half meant for writes, with the middle being 64-byte aligned. The following patch uses (as an optimization) the new gomp_aligned_alloc if it is not the fallback implementation.
Tested on x86_64-linux, committed to trunk. 2018-10-26 Jakub Jelinek <ja...@redhat.com> * libgomp.h (GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC): Define unless gomp_aligned_alloc uses fallback implementation. * alloc.c (NEED_SPECIAL_GOMP_ALIGNED_FREE): Don't define. (gomp_aligned_free): Use !defined(GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC) instead of defined(NEED_SPECIAL_GOMP_ALIGNED_FREE). * work.c (alloc_work_share): Use gomp_aligned_alloc instead of gomp_malloc if GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC is defined. --- libgomp/libgomp.h.jj 2018-10-25 12:01:49.673340585 +0200 +++ libgomp/libgomp.h 2018-10-26 18:09:34.626281156 +0200 @@ -86,6 +86,15 @@ enum memmodel /* alloc.c */ +#if defined(HAVE_ALIGNED_ALLOC) \ + || defined(HAVE__ALIGNED_MALLOC) \ + || defined(HAVE_POSIX_MEMALIGN) \ + || defined(HAVE_MEMALIGN) +/* Defined if gomp_aligned_alloc doesn't use fallback version + and free can be used instead of gomp_aligned_free. */ +#define GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC 1 +#endif + extern void *gomp_malloc (size_t) __attribute__((malloc)); extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); extern void *gomp_realloc (void *, size_t); --- libgomp/alloc.c.jj 2018-09-27 15:53:01.635671568 +0200 +++ libgomp/alloc.c 2018-10-26 18:10:36.745266239 +0200 @@ -87,7 +87,6 @@ gomp_aligned_alloc (size_t al, size_t si ((void **) ap)[-1] = p; ret = ap; } -#define NEED_SPECIAL_GOMP_ALIGNED_FREE } #endif if (ret == NULL) @@ -98,10 +97,10 @@ gomp_aligned_alloc (size_t al, size_t si void gomp_aligned_free (void *ptr) { -#ifdef NEED_SPECIAL_GOMP_ALIGNED_FREE +#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC + free (ptr); +#else if (ptr) free (((void **) ptr)[-1]); -#else - free (ptr); #endif } --- libgomp/work.c.jj 2018-04-30 13:21:06.574866351 +0200 +++ libgomp/work.c 2018-10-26 18:12:02.324868021 +0200 @@ -76,7 +76,15 @@ alloc_work_share (struct gomp_team *team #endif team->work_share_chunk *= 2; + /* Allocating gomp_work_share structures aligned is just an + optimization, don't do it when using the fallback method. */ +#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC + ws = gomp_aligned_alloc (__alignof (struct gomp_work_share), + team->work_share_chunk + * sizeof (struct gomp_work_share)); +#else ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share)); +#endif ws->next_alloc = team->work_shares[0].next_alloc; team->work_shares[0].next_alloc = ws; team->work_share_list_alloc = &ws[1]; Jakub