I am testing the following patch to fix the qsort intransitiveness of dr_group_sort_cmp. The patch removes the overly powerful operand_equal_p checks (handling commutativity ) because those do not mix well with the sorting constraints.
I am also testing a followup to address the missed equalities by canonicalization -- the interesting trees happen to be all built by split_constant_offset where we can do some elaborate canonicalization in linear complexity (as opposed to operand_equal_p's exponential handling or trying to handle it in data_ref_compare_tree where it would be done at least multiple times during qsort invocation). Bootstrapped on x86_64-unknown-linux-gnu, testing still in progress (so is a quick SPEC 2k6 build where the issue showed up in multiple places). Richard. 2017-10-06 Richard Biener <rguent...@suse.de> PR tree-optimization/82397 * tree-vect-data-refs.c (dr_group_sort_cmp): Do not use operand_equal_p but rely on data_ref_compare_tree for detecting equalities. (vect_analyze_data_ref_accesses): Use data_ref_compare_tree to match up with dr_group_sort_cmp. * gfortran.dg/pr82397.f: New testcase. Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c (revision 253475) +++ gcc/tree-vect-data-refs.c (working copy) @@ -2727,43 +2727,30 @@ dr_group_sort_cmp (const void *dra_, con return loopa->num < loopb->num ? -1 : 1; /* Ordering of DRs according to base. */ - if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)) - { - cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra), - DR_BASE_ADDRESS (drb)); - if (cmp != 0) - return cmp; - } + cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra), + DR_BASE_ADDRESS (drb)); + if (cmp != 0) + return cmp; /* And according to DR_OFFSET. */ - if (!dr_equal_offsets_p (dra, drb)) - { - cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)); - if (cmp != 0) - return cmp; - } + cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)); + if (cmp != 0) + return cmp; /* Put reads before writes. */ if (DR_IS_READ (dra) != DR_IS_READ (drb)) return DR_IS_READ (dra) ? -1 : 1; /* Then sort after access size. */ - if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), - TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0)) - { - cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), - TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))); - if (cmp != 0) - return cmp; - } + cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), + TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))); + if (cmp != 0) + return cmp; /* And after step. */ - if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) - { - cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)); - if (cmp != 0) - return cmp; - } + cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)); + if (cmp != 0) + return cmp; /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */ cmp = tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)); @@ -2835,9 +2822,9 @@ vect_analyze_data_ref_accesses (vec_info and they are both either store or load (not load and store, not masked loads or stores). */ if (DR_IS_READ (dra) != DR_IS_READ (drb) - || !operand_equal_p (DR_BASE_ADDRESS (dra), - DR_BASE_ADDRESS (drb), 0) - || !dr_equal_offsets_p (dra, drb) + || data_ref_compare_tree (DR_BASE_ADDRESS (dra), + DR_BASE_ADDRESS (drb)) != 0 + || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0 || !gimple_assign_single_p (DR_STMT (dra)) || !gimple_assign_single_p (DR_STMT (drb))) break; @@ -2851,7 +2838,7 @@ vect_analyze_data_ref_accesses (vec_info break; /* Check that the data-refs have the same step. */ - if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) + if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0) break; /* Do not place the same access in the interleaving chain twice. */ Index: gcc/testsuite/gfortran.dg/pr82397.f =================================================================== --- gcc/testsuite/gfortran.dg/pr82397.f (nonexistent) +++ gcc/testsuite/gfortran.dg/pr82397.f (working copy) @@ -0,0 +1,32 @@ +! { dg-do compile } +! { dg-options "-Ofast" } + + subroutine foo(U,V,R,N,A) + integer N + real*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3) + integer I3, I2, I1 +C + do I3=2,N-1 + do I2=2,N-1 + do I1=2,N-1 + R(I1,I2,I3)=V(I1,I2,I3) + * -A(0)*( U(I1, I2, I3 ) ) + * -A(1)*( U(I1-1,I2, I3 ) + U(I1+1,I2, I3 ) + * + U(I1, I2-1,I3 ) + U(I1, I2+1,I3 ) + * + U(I1, I2, I3-1) + U(I1, I2, I3+1) ) + * -A(2)*( U(I1-1,I2-1,I3 ) + U(I1+1,I2-1,I3 ) + * + U(I1-1,I2+1,I3 ) + U(I1+1,I2+1,I3 ) + * + U(I1, I2-1,I3-1) + U(I1, I2+1,I3-1) + * + U(I1, I2-1,I3+1) + U(I1, I2+1,I3+1) + * + U(I1-1,I2, I3-1) + U(I1-1,I2, I3+1) + * + U(I1+1,I2, I3-1) + U(I1+1,I2, I3+1) ) + * -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1) + * + U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1) + * + U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1) + * + U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) ) + enddo + enddo + enddo + return + end +