Ensure that the GOMP_MAX_VF does the right thing for explicit schedules, when offloading is enabled ("target" directives are present), and is inactive otherwise.
This requires enabling the offload-dump scanning features previously only used in the libgomp testsuite. The automake scheme used there isn't a good fit here, so we probe the known devices manually. gcc/testsuite/ChangeLog: * gcc.dg/gomp/gomp.exp: Load scanoffload.exp and scanoffloadtree.exp. Set offload_targets when available. * gcc.dg/gomp/max_vf-1.c: New test. * gcc.dg/gomp/max_vf-2.c: New test. * gcc.dg/gomp/max_vf-3.c: New test. --- gcc/testsuite/gcc.dg/gomp/gomp.exp | 14 +++++++++ gcc/testsuite/gcc.dg/gomp/max_vf-1.c | 37 ++++++++++++++++++++++ gcc/testsuite/gcc.dg/gomp/max_vf-2.c | 47 ++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/gomp/max_vf-3.c | 21 +++++++++++++ 4 files changed, 119 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/gomp/max_vf-1.c create mode 100644 gcc/testsuite/gcc.dg/gomp/max_vf-2.c create mode 100644 gcc/testsuite/gcc.dg/gomp/max_vf-3.c diff --git a/gcc/testsuite/gcc.dg/gomp/gomp.exp b/gcc/testsuite/gcc.dg/gomp/gomp.exp index a563cf90c1b..9414fb092d9 100644 --- a/gcc/testsuite/gcc.dg/gomp/gomp.exp +++ b/gcc/testsuite/gcc.dg/gomp/gomp.exp @@ -20,11 +20,25 @@ # Load support procs. load_lib gcc-dg.exp +load_lib scanoffload.exp +load_lib scanoffloadtree.exp if ![check_effective_target_fopenmp] { return } +global offload_targets +if { ![info exists offload_targets] } { + set offload_targets_list {} + if [check_effective_target_offload_gcn] { + lappend offload_targets_list amdgcn-amdhsa + } + if [check_effective_target_offload_nvptx] { + lappend offload_targets_list nvptx-none + } + set offload_targets [join $offload_targets_list ","] +} + # Initialize `dg'. dg-init diff --git a/gcc/testsuite/gcc.dg/gomp/max_vf-1.c b/gcc/testsuite/gcc.dg/gomp/max_vf-1.c new file mode 100644 index 00000000000..0513aae226c --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/max_vf-1.c @@ -0,0 +1,37 @@ +/* Test that omp parallel simd schedule uses the correct max_vf for the + host system, when no target directives are present. */ + +/* { dg-do compile } */ +/* { dg-options "-fopenmp -O2 -fdump-tree-ompexp" } */ + +/* Fix a max_vf size so we can scan for it. +{ dg-additional-options "-msse2" { target { x86_64-*-* i?86-*-* } } } */ + +#define N 1024 +int a[N], b[N], c[N]; + +void +f2 (void) +{ + int i; + #pragma omp parallel for simd schedule (simd: static, 7) + for (i = 0; i < N; i++) + a[i] = b[i] + c[i]; +} + +/* Make sure the max_vf is inlined as a number. + Hopefully there are no unrelated uses of these numbers ... +{ dg-final { scan-tree-dump-times {\* 16} 2 "ompexp" { target { x86_64-*-* } } } } +{ dg-final { scan-tree-dump-times {\+ 16} 1 "ompexp" { target { x86_64-*-* } } } } */ + +void +f3 (int *a, int *b, int *c) +{ + int i; + #pragma omp parallel for simd schedule (simd : dynamic, 7) + for (i = 0; i < N; i++) + a[i] = b[i] + c[i]; +} + +/* Make sure the max_vf is inlined as a number. +{ dg-final { scan-tree-dump-times {__builtin_GOMP_parallel_loop_nonmonotonic_dynamic \(.*, 16, 0\);} 1 "ompexp" { target { x86_64-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/max_vf-2.c b/gcc/testsuite/gcc.dg/gomp/max_vf-2.c new file mode 100644 index 00000000000..be900c565a3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/max_vf-2.c @@ -0,0 +1,47 @@ +/* Test that omp parallel simd schedule uses the correct max_vf for the + host system, when target directives are present. */ + +/* { dg-require-effective-target offloading_enabled } */ + +/* { dg-do link } */ +/* { dg-options "-fopenmp -O2 -fdump-tree-ompexp -foffload=-fdump-tree-optimized" } */ + +/* Fix a max_vf size so we can scan for it. +{ dg-additional-options "-msse2" { target { x86_64-*-* i?86-*-* } } } */ + +#define N 1024 +int a[N], b[N], c[N]; + +/* Test both static schedules and inline target directives. */ +void +f2 (void) +{ + int i; + #pragma omp target parallel for simd schedule (simd: static, 7) + for (i = 0; i < N; i++) + a[i] = b[i] + c[i]; +} + +/* Test both dynamic schedules and declare target functions. */ +#pragma omp declare target +void +f3 (int *a, int *b, int *c) +{ + int i; + #pragma omp parallel for simd schedule (simd : dynamic, 7) + for (i = 0; i < N; i++) + a[i] = b[i] + c[i]; +} +#pragma omp end declare target + +/* Make sure that the max_vf is used as an IFN. +{ dg-final { scan-tree-dump-times {GOMP_MAX_VF} 2 "ompexp" { target { x86_64-*-* i?86-*-* } } } } */ + +/* Make sure the max_vf is passed as a temporary variable. +{ dg-final { scan-tree-dump-times {__builtin_GOMP_parallel_loop_nonmonotonic_dynamic \(.*, D\.[0-9]*, 0\);} 1 "ompexp" { target { x86_64-*-* i?86-*-* } } } } */ + +/* Test SIMD offload devices +{ dg-final { scan-offload-tree-dump-times {__builtin_GOMP_parallel_loop_nonmonotonic_dynamic \(.*, 64, 0\);} 1 "optimized" { target { offload_gcn } } } } +{ dg-final { scan-offload-tree-dump-times {__builtin_GOMP_parallel_loop_nonmonotonic_dynamic \(.*, 7, 0\);} 1 "optimized" { target { offload_nvptx } } } } */ + +int main() {} diff --git a/gcc/testsuite/gcc.dg/gomp/max_vf-3.c b/gcc/testsuite/gcc.dg/gomp/max_vf-3.c new file mode 100644 index 00000000000..91744c309df --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/max_vf-3.c @@ -0,0 +1,21 @@ +/* Ensure that the default safelen is set correctly for the larger of the host + and offload device, to prevent defeating the vectorizer. */ + +/* { dg-require-effective-target offloading_enabled } */ + +/* { dg-do link } */ +/* { dg-options "-fopenmp -O2 -fdump-tree-omplower" } */ + +int f(float *a, float *b, int n) +{ + float sum = 0; + #pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum) + for (int i = 0; i < n; i++) + sum += a[i] * b[i]; + return sum; +} + +/* Make sure that the max_vf used is suitable for the offload device. +{ dg-final { scan-tree-dump-times {omp simd safelen\(64\)} 1 "omplower" { target { offload_gcn } } } } */ + +int main() {} -- 2.46.0