On Tue, Jan 28, 2014 at 01:14:32PM +0100, Richard Biener wrote: > > I admit I fully don't understand why exactly, but my experimentation so far > > showed that for read/write and write/read ddrs it is ok and desirable to > > ignore the dist > 0 && DDR_REVERSED_P (ddr) cases, but for write/write > > ddrs it is undesirable. See the PR for further tests, perhaps I could > > turn them into further testcases. > > Please.
New testcase in the patch. > > - if (dist > 0 && DDR_REVERSED_P (ddr)) > > + if (dist > 0 && DDR_REVERSED_P (ddr) > > + && (DR_IS_READ (dra) || DR_IS_READ (drb))) > > I think that'snot sufficient. It depends > on the order of the stmts whether the dependence distance is really > negative - we are trying to catch write-after-read negative distance > here I think. We can't rely on the DDRs being formed in stmt order > (anymore, at least since 4.9 where we start to arbitrary re-order > the vector of DRs). As discussed on IRC, the actual bug was that vect_analyze_data_ref_accesses reordered the data references before DDRs were constructed, thus DDR_A wasn't necessarily before DDR_B lexically in the loop and thus using DDR_REVERSED_P bit didn't really reflect whether it is negative or positive distance. Fixed by making sure the data refs aren't reordered (well, they are reordered on a copy of the vector only for the purposes of vect_analyze_data_ref_accesses). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2014-01-28 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/59594 * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Sort a copy of the datarefs vector rather than the vector itself. * gcc.dg/vect/no-vfa-vect-depend-2.c: New test. * gcc.dg/vect/no-vfa-vect-depend-3.c: New test. * gcc.dg/vect/pr59594.c: New test. --- gcc/tree-vect-data-refs.c.jj 2014-01-23 10:52:26.766346677 +0100 +++ gcc/tree-vect-data-refs.c 2014-01-28 16:11:34.371698307 +0100 @@ -2484,19 +2484,21 @@ vect_analyze_data_ref_accesses (loop_vec return true; /* Sort the array of datarefs to make building the interleaving chains - linear. */ - qsort (datarefs.address (), datarefs.length (), + linear. Don't modify the original vector's order, it is needed for + determining what dependencies are reversed. */ + vec<data_reference_p> datarefs_copy = datarefs.copy (); + qsort (datarefs_copy.address (), datarefs_copy.length (), sizeof (data_reference_p), dr_group_sort_cmp); /* Build the interleaving chains. */ - for (i = 0; i < datarefs.length () - 1;) + for (i = 0; i < datarefs_copy.length () - 1;) { - data_reference_p dra = datarefs[i]; + data_reference_p dra = datarefs_copy[i]; stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); stmt_vec_info lastinfo = NULL; - for (i = i + 1; i < datarefs.length (); ++i) + for (i = i + 1; i < datarefs_copy.length (); ++i) { - data_reference_p drb = datarefs[i]; + data_reference_p drb = datarefs_copy[i]; stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); /* ??? Imperfect sorting (non-compatible types, non-modulo @@ -2573,7 +2575,7 @@ vect_analyze_data_ref_accesses (loop_vec } } - FOR_EACH_VEC_ELT (datarefs, i, dr) + FOR_EACH_VEC_ELT (datarefs_copy, i, dr) if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) && !vect_analyze_data_ref_access (dr)) { @@ -2588,9 +2590,13 @@ vect_analyze_data_ref_accesses (loop_vec continue; } else - return false; + { + datarefs_copy.release (); + return false; + } } + datarefs_copy.release (); return true; } --- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c.jj 2014-01-28 14:06:10.818303424 +0100 +++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c 2014-01-28 14:06:10.818303424 +0100 @@ -0,0 +1,55 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 17 + +int ia[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0}; +int ib[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0}; +int res[N] = {48,192,180,168,156,144,132,120,108,96,84,72,60,48,36,24,12}; + +__attribute__ ((noinline)) +int main1 () +{ + int i; + + /* Not vectorizable due to data dependence: dependence distance 1. */ + for (i = N - 1; i >= 0; i--) + { + ia[i] = ia[i+1] * 4; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ia[i] != 0) + abort (); + } + + /* Vectorizable. Dependence distance -1. */ + for (i = N - 1; i >= 0; i--) + { + ib[i+1] = ib[i] * 4; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ib[i] != res[i]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c.jj 2014-01-28 14:12:08.235470812 +0100 +++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c 2014-01-28 16:18:44.876376404 +0100 @@ -0,0 +1,187 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 64 + +int ia[N + 1]; +int ib[N + 1]; + +/* Vectorizable. Dependence distance -1. */ +__attribute__((noinline)) void +f1 (void) +{ + int i; + for (i = 0; i < N; i++) + { + ia[i + 1] = 1; + ib[i] = ia[i]; + } +} + +/* Not vectorizable due to data dependence: dependence distance 1. */ +__attribute__((noinline)) void +f2 (void) +{ + int i; + for (i = 0; i < N; i++) + { + ia[i] = 1; + ib[i] = ia[i + 1]; + } +} + +/* Not vectorizable due to data dependence: dependence distance 1. */ +__attribute__((noinline)) void +f3 (void) +{ + int i; + for (i = N - 1; i >= 0; i--) + { + ia[i + 1] = 1; + ib[i] = ia[i]; + } +} + +/* Vectorizable. Dependence distance -1. */ +__attribute__((noinline)) void +f4 (void) +{ + int i; + for (i = N - 1; i >= 0; i--) + { + ia[i] = 1; + ib[i] = ia[i + 1]; + } +} + +/* Vectorizable. Dependence distance -1. */ +__attribute__((noinline)) void +f5 (void) +{ + int i; + for (i = 0; i < N; i++) + { + ia[i + 1] = 1; + ia[i] = 2; + } +} + +/* Not vectorizable due to data dependence: dependence distance 1. */ +__attribute__((noinline)) void +f6 (void) +{ + int i; + for (i = 0; i < N; i++) + { + ia[i] = 1; + ia[i + 1] = 2; + } +} + +/* Not vectorizable due to data dependence: dependence distance 1. */ +__attribute__((noinline)) void +f7 (void) +{ + int i; + for (i = N - 1; i >= 0; i--) + { + ia[i + 1] = 1; + ia[i] = 2; + } +} + +/* Vectorizable. Dependence distance -1. */ +__attribute__((noinline)) void +f8 (void) +{ + int i; + for (i = N - 1; i >= 0; i--) + { + ia[i] = 1; + ia[i + 1] = 2; + } +} + +__attribute__ ((noinline)) int +main1 (void) +{ + int i, j; + + for (j = 0; j < 8; j++) + { + for (i = 0; i <= N; i++) + { + ia[i] = i + 3; + ib[i] = i + N + 3; + asm (""); + } + + switch (j) + { + case 0: f1 (); break; + case 1: f2 (); break; + case 2: f3 (); break; + case 3: f4 (); break; + case 4: f5 (); break; + case 5: f6 (); break; + case 6: f7 (); break; + case 7: f8 (); break; + } + + for (i = 0; i <= N; i++) + { + int ea = i + 3; + int eb = i + N + 3; + switch (j) + { + case 0: + if (i) ea = 1; + if (i == 0) eb = 3; + else if (i != N) eb = 1; + break; + case 1: + if (i != N) ea = 1; + if (i != N) eb = i + 4; + break; + case 2: + if (i) ea = 1; + if (i != N) eb = i + 3; + break; + case 3: + if (i != N) ea = 1; + if (i < N - 1) eb = 1; + else if (i == N - 1) eb = 67; + break; + case 4: + ea = 1 + (i != N); + break; + case 5: + ea = 2 - (i != N); + break; + case 6: + ea = 1 + (i == 0); + break; + case 7: + ea = 2 - (i == 0); + break; + } + if (ia[i] != ea || ib[i] != eb) + abort (); + } + } + + return 0; +} + +int main () +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" {xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "dependence distance negative" 4 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.dg/vect/pr59594.c.jj 2014-01-28 14:06:10.819303452 +0100 +++ gcc/testsuite/gcc.dg/vect/pr59594.c 2014-01-28 14:06:10.818303424 +0100 @@ -0,0 +1,31 @@ +/* PR tree-optimization/59594 */ + +#include "tree-vect.h" + +#define N 1024 +int b[N + 1]; + +int +main () +{ + int i; + check_vect (); + for (i = 0; i < N + 1; i++) + { + b[i] = i; + asm (""); + } + for (i = N; i >= 0; i--) + { + b[i + 1] = b[i]; + b[i] = 1; + } + if (b[0] != 1) + __builtin_abort (); + for (i = 0; i < N; i++) + if (b[i + 1] != i) + __builtin_abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ Jakub