On Fri, Jul 1, 2016 at 2:31 PM, H.J. Lu <hjl.to...@gmail.com> wrote: > On Thu, Jun 30, 2016 at 7:51 AM, Yuri Rumyantsev <ysrum...@gmail.com> wrote: >> Richard, >> >> Could you please review additional simple fix for 70729 - we need to >> nullify safelen field of loops containing simduid intrinsics like >> GOMP_SIMD_LANE (introduced e.g. for private variables). I checked >> that this fix cures regression which was missed by me since AVX2 >> machine is required for libgomp.fortran/examples-4/simd-2.f90. >> >> Regression testing and bootstrapping did not show any new failures. >> Is it OK for trunk? >> >> Patch: >> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c >> index 2669813..9fbd183 100644 >> --- a/gcc/tree-vectorizer.c >> +++ b/gcc/tree-vectorizer.c >> @@ -204,6 +204,10 @@ adjust_simduid_builtins (hash_table<simduid_to_vf> >> *htab) >> gcc_assert (TREE_CODE (arg) == SSA_NAME); >> simduid_to_vf *p = NULL, data; >> data.simduid = DECL_UID (SSA_NAME_VAR (arg)); >> + /* Need to nullify loop safelen field since it's value is not >> + valid after transformation. */ >> + if (bb->loop_father && bb->loop_father->safelen > 0) >> + bb->loop_father->safelen = 0; >> if (htab) >> { >> p = htab->find (&data); >> >> ChangeLog: >> 2016-06-30 Yuri Rumyantsev <ysrum...@gmail.com> >> >> PR tree-optimization/70729 >> * tree-vectorizer.c (adjust_simduid_builtins): Nullify safelen field >> of loop since it can be not valid after transformation. >> > > I still see > > FAIL: libgomp.fortran/simd3.f90 -O3 -fomit-frame-pointer > -funroll-loops -fpeel-loops -ftracer -finline-functions execution > test > FAIL: libgomp.fortran/simd3.f90 -O3 -g execution test > FAIL: libgomp.fortran/simd4.f90 -O3 -fomit-frame-pointer > -funroll-loops -fpeel-loops -ftracer -finline-functions execution > test > FAIL: libgomp.fortran/simd4.f90 -O3 -g execution test > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71734
Here are testcases: FAIL: libgomp.fortran/pr71734-1.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test FAIL: libgomp.fortran/pr71734-1.f90 -O3 -g execution test FAIL: libgomp.fortran/pr71734-2.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test FAIL: libgomp.fortran/pr71734-2.f90 -O3 -g execution test FAIL: libgomp.fortran/pr71734-1.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test FAIL: libgomp.fortran/pr71734-1.f90 -O3 -g execution test FAIL: libgomp.fortran/pr71734-2.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test FAIL: libgomp.fortran/pr71734-2.f90 -O3 -g execution test -- H.J.
From d120301ab113db895164289433fcc068611d3fba Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Sun, 3 Jul 2016 08:54:24 -0700 Subject: [PATCH] Add tests for PR 71734 --- libgomp/testsuite/libgomp.fortran/pr71734-1.f90 | 110 ++++++++++++++++++++++++ libgomp/testsuite/libgomp.fortran/pr71734-2.f90 | 104 ++++++++++++++++++++++ 2 files changed, 214 insertions(+) create mode 100644 libgomp/testsuite/libgomp.fortran/pr71734-1.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/pr71734-2.f90 diff --git a/libgomp/testsuite/libgomp.fortran/pr71734-1.f90 b/libgomp/testsuite/libgomp.fortran/pr71734-1.f90 new file mode 100644 index 0000000..a5617d7 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr71734-1.f90 @@ -0,0 +1,110 @@ +! { dg-do run { target avx_runtime } } +! { dg-additional-options "-msse2" } +! The same as simd3.f90, but compiled with -msse2. we run it only on +! AVX machine where simd4.f90 is compiled with -mavx. + + integer :: a(1024), b(1024), k, m, i, s, t + k = 4 + m = 2 + t = 1 + do i = 1, 1024 + a(i) = i - 513 + b(i) = modulo (i - 52, 39) + if (i.lt.52.and.b(i).ne.0) b(i) = b(i) - 39 + end do + s = foo (b) + do i = 1, 1024 + if (a(i).ne.((i - 513) * b(i))) call abort + if (i.lt.52.and.modulo (i - 52, 39).ne.0) then + if (b(i).ne.(modulo (i - 52, 39) - 39)) call abort + else + if (b(i).ne.(modulo (i - 52, 39))) call abort + end if + a(i) = i - 513 + end do + if (k.ne.(4 + 3 * 1024).or.s.ne.1596127) call abort + k = 4 + m = 2 + t = 1 + s = bar (b) + do i = 1, 1024 + if (a(i).ne.((i - 513) * b(i))) call abort + if (i.lt.52.and.modulo (i - 52, 39).ne.0) then + if (b(i).ne.(modulo (i - 52, 39) - 39)) call abort + else + if (b(i).ne.(modulo (i - 52, 39))) call abort + end if + a(i) = i - 513 + end do + if (k.ne.(4 + 3 * 1024).or.s.ne.1596127) call abort + k = 4 + m = 2 + t = 1 + s = baz (b) + do i = 1, 1024 + if (a(i).ne.((i - 513) * b(i))) call abort + if (i.lt.52.and.modulo (i - 52, 39).ne.0) then + if (b(i).ne.(modulo (i - 52, 39) - 39)) call abort + else + if (b(i).ne.(modulo (i - 52, 39))) call abort + end if + end do + if (k.ne.(4 + 3 * 1024).or.s.ne.1596127) call abort +contains + function foo (p) + integer :: p(1024), u, v, i, s, foo + s = 0 + !$omp parallel + !$omp do simd linear(k : m + 1) reduction(+: s) lastprivate(u, v) & + !$omp & schedule (static, 32) + do i = 1, 1024 + a(i) = a(i) * p(i) + u = p(i) + k + k = k + m + 1 + v = p(i) + k + s = s + p(i) + k + end do + !$omp end do simd + !$omp end parallel + if (i.ne.1025) call abort + if (u.ne.(36 + 4 + 3 * 1023).or.v.ne.(36 + 4 + 3 * 1024)) call abort + foo = s + end function foo + function bar (p) + integer :: p(1024), u, v, i, s, bar + s = 0 + !$omp parallel + !$omp do simd linear(k : m + 1) reduction(+: s) lastprivate(u, v) & + !$omp & schedule (dynamic, 32) + do i = 1, 1024, t + a(i) = a(i) * p(i) + u = p(i) + k + k = k + m + 1 + v = p(i) + k + s = s + p(i) + k + end do + !$omp end do simd + !$omp endparallel + if (i.ne.1025) call abort + if (u.ne.(36 + 4 + 3 * 1023).or.v.ne.(36 + 4 + 3 * 1024)) call abort + bar = s + end function bar + function baz (p) + integer :: p(1024), u, v, i, s, baz + s = 0 + !$omp parallel + !$omp do simd linear(k : m + 1) reduction(+: s) lastprivate(u, v) & + !$omp & linear(i : t) schedule (static, 8) + do i = 1, 1024, t + a(i) = a(i) * p(i) + u = p(i) + k + k = k + m + 1 + v = p(i) + k + s = s + p(i) + k + end do + !$omp end parallel + if (i.ne.1025) call abort + if (u.ne.(36 + 4 + 3 * 1023).or.v.ne.(36 + 4 + 3 * 1024)) call abort + baz = s + end function baz +end diff --git a/libgomp/testsuite/libgomp.fortran/pr71734-2.f90 b/libgomp/testsuite/libgomp.fortran/pr71734-2.f90 new file mode 100644 index 0000000..1cad4c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr71734-2.f90 @@ -0,0 +1,104 @@ +! { dg-do run { target avx_runtime } } +! { dg-additional-options "-msse2" } +! The same as simd4.f90, but compiled with -msse2. we run it only on +! AVX machine where simd4.f90 is compiled with -mavx. + + integer :: a(1024), b(1024), k, m, i, s, t + k = 4 + m = 2 + t = 1 + do i = 1, 1024 + a(i) = i - 513 + b(i) = modulo (i - 52, 39) + if (i.lt.52.and.b(i).ne.0) b(i) = b(i) - 39 + end do + s = foo (b) + do i = 1, 1024 + if (a(i).ne.((i - 513) * b(i))) call abort + if (i.lt.52.and.modulo (i - 52, 39).ne.0) then + if (b(i).ne.(modulo (i - 52, 39) - 39)) call abort + else + if (b(i).ne.(modulo (i - 52, 39))) call abort + end if + a(i) = i - 513 + end do + if (k.ne.(4 + 3 * 1024).or.s.ne.1596127) call abort + k = 4 + m = 2 + t = 1 + s = bar (b) + do i = 1, 1024 + if (a(i).ne.((i - 513) * b(i))) call abort + if (i.lt.52.and.modulo (i - 52, 39).ne.0) then + if (b(i).ne.(modulo (i - 52, 39) - 39)) call abort + else + if (b(i).ne.(modulo (i - 52, 39))) call abort + end if + a(i) = i - 513 + end do + if (k.ne.(4 + 3 * 1024).or.s.ne.1596127) call abort + k = 4 + m = 2 + t = 1 + s = baz (b) + do i = 1, 1024 + if (a(i).ne.((i - 513) * b(i))) call abort + if (i.lt.52.and.modulo (i - 52, 39).ne.0) then + if (b(i).ne.(modulo (i - 52, 39) - 39)) call abort + else + if (b(i).ne.(modulo (i - 52, 39))) call abort + end if + end do + if (k.ne.(4 + 3 * 1024).or.s.ne.1596127) call abort +contains + function foo (p) + integer :: p(1024), u, v, i, s, foo + s = 0 + !$omp parallel do simd linear(k : m + 1) reduction(+: s) & + !$omp & lastprivate(u, v) schedule (static, 32) + do i = 1, 1024 + a(i) = a(i) * p(i) + u = p(i) + k + k = k + m + 1 + v = p(i) + k + s = s + p(i) + k + end do + !$omp end parallel do simd + if (i.ne.1025) call abort + if (u.ne.(36 + 4 + 3 * 1023).or.v.ne.(36 + 4 + 3 * 1024)) call abort + foo = s + end function foo + function bar (p) + integer :: p(1024), u, v, i, s, bar + s = 0 + !$omp parallel do simd linear(k : m + 1) reduction(+: s) & + !$omp & lastprivate(u, v) schedule (dynamic, 32) + do i = 1, 1024, t + a(i) = a(i) * p(i) + u = p(i) + k + k = k + m + 1 + v = p(i) + k + s = s + p(i) + k + end do + !$omp endparalleldosimd + if (i.ne.1025) call abort + if (u.ne.(36 + 4 + 3 * 1023).or.v.ne.(36 + 4 + 3 * 1024)) call abort + bar = s + end function bar + function baz (p) + integer :: p(1024), u, v, i, s, baz + s = 0 + !$omp parallel do simd linear(k : m + 1) reduction(+: s) & + !$omp & lastprivate(u, v) linear(i : t) schedule (static, 8) + do i = 1, 1024, t + a(i) = a(i) * p(i) + u = p(i) + k + k = k + m + 1 + v = p(i) + k + s = s + p(i) + k + end do + if (i.ne.1025) call abort + if (u.ne.(36 + 4 + 3 * 1023).or.v.ne.(36 + 4 + 3 * 1024)) call abort + baz = s + end function baz +end -- 2.7.4