The following fixes an oversight in SLP reduction permutation optimization which will happily delete permutations required for handling of gaps.
Bootstrapped on x86_64-unknown-linux-gnu, testin in progress. Richard. 2016-02-22 Richard Biener <rguent...@suse.de> PR tree-optimization/69882 * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Properly preserve permutations present because of gaps. (vect_supported_load_permutation_p): Always continue checking permutations after vect_attempt_slp_rearrange_stmts. * gfortran.dg/vect/pr69882.f90: New testcase. Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 233598) --- gcc/tree-vect-slp.c (working copy) *************** vect_attempt_slp_rearrange_stmts (slp_in *** 1332,1339 **** node->load_permutation); /* We are done, no actual permutations need to be generated. */ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) ! SLP_TREE_LOAD_PERMUTATION (node).release (); return true; } --- 1332,1350 ---- node->load_permutation); /* We are done, no actual permutations need to be generated. */ + unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn); FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) ! { ! gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0]; ! first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)); ! /* But we have to keep those permutations that are required because ! of handling of gaps. */ ! if (unrolling_factor == 1 ! || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt)) ! && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)) ! SLP_TREE_LOAD_PERMUTATION (node).release (); ! } ! return true; } *************** vect_supported_load_permutation_p (slp_i *** 1381,1392 **** In reduction chain the order of the loads is not important. */ if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)) && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) ! { ! if (vect_attempt_slp_rearrange_stmts (slp_instn)) ! return true; ! ! /* Fallthru to general load permutation handling. */ ! } /* In basic block vectorization we allow any subchain of an interleaving chain. --- 1392,1398 ---- In reduction chain the order of the loads is not important. */ if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)) && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) ! vect_attempt_slp_rearrange_stmts (slp_instn); /* In basic block vectorization we allow any subchain of an interleaving chain. Index: gcc/testsuite/gfortran.dg/vect/pr69882.f90 =================================================================== *** gcc/testsuite/gfortran.dg/vect/pr69882.f90 (revision 0) --- gcc/testsuite/gfortran.dg/vect/pr69882.f90 (working copy) *************** *** 0 **** --- 1,41 ---- + ! { dg-additional-options "-Ofast" } + ! { dg-additional-options "-mavx" { target avx_runtime } } + + subroutine foo(a, x) + implicit none + + integer, parameter :: XX=4, YY=26 + integer, intent(in) :: x + real *8, intent(in) :: a(XX,YY) + real *8 :: c(XX) + + integer i, k + + c = 0 + + do k=x,YY + do i=1,2 + c(i) = max(c(i), a(i,k)) + end do + end do + + PRINT *, "c=", c + + IF (c(1) .gt. 0.0) THEN + CALL ABORT + END IF + + IF (c(2) .gt. 0.0) THEN + CALL ABORT + END IF + end subroutine foo + + PROGRAM MAIN + real *8 a(4, 26) + + a = 0 + a(3,1) = 100.0 + a(4,1) = 100.0 + + CALL FOO(a, 1) + END PROGRAM