http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49616
Summary: REGRESSION vectorization fails in case of runtime dimensioned vector Product: gcc Version: 4.7.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization AssignedTo: unassig...@gcc.gnu.org ReportedBy: vincenzo.innoce...@cern.ch 4.7 fails while 4.6.1 succeed. Test case at the end of the message: I was not able to reduce it more (the one that fails is at the bottom) gcc version 4.7.0 20110702 (experimental) (GCC) c++ -O3 -std=c++0x -ftree-vectorizer-verbose=2 -c vectBug.cc vectBug.cc:68: note: LOOP VECTORIZED. vectBug.cc:68: note: LOOP VECTORIZED. vectBug.cc:71: note: vectorized 2 loops in function. vectBug.cc:35: note: not vectorized: data ref analysis failed *bs$__b_46[k_320] = D.2547_242; vectBug.cc:48: note: not vectorized: data ref analysis failed *bs$__b_46[k_187] = D.2556_271; vectBug.cc:45: note: not vectorized: data ref analysis failed *bs$__b_46[k_70] = D.2553_262; vectBug.cc:40: note: not vectorized: data ref analysis failed *bs$__b_46[k_317] = D.2550_251; vectBug.cc:35: note: LOOP VECTORIZED. vectBug.cc:48: note: LOOP VECTORIZED. vectBug.cc:45: note: LOOP VECTORIZED. vectBug.cc:40: note: LOOP VECTORIZED. vectBug.cc:16: note: LOOP VECTORIZED. vectBug.cc:28: note: LOOP VECTORIZED. vectBug.cc:25: note: LOOP VECTORIZED. vectBug.cc:20: note: LOOP VECTORIZED. vectBug.cc:16: note: LOOP VECTORIZED. vectBug.cc:28: note: LOOP VECTORIZED. vectBug.cc:25: note: LOOP VECTORIZED. vectBug.cc:20: note: LOOP VECTORIZED. vectBug.cc:94: note: vectorized 12 loops in function. while c++ -O3 -std=c++0x -ftree-vectorizer-verbose=2 -c vectBug.cc -DFIXED vectBug.cc:68: note: LOOP VECTORIZED. vectBug.cc:68: note: LOOP VECTORIZED. vectBug.cc:71: note: vectorized 2 loops in function. vectBug.cc:35: note: LOOP VECTORIZED. vectBug.cc:48: note: LOOP VECTORIZED. vectBug.cc:45: note: LOOP VECTORIZED. vectBug.cc:40: note: LOOP VECTORIZED. vectBug.cc:35: note: LOOP VECTORIZED. vectBug.cc:48: note: LOOP VECTORIZED. vectBug.cc:45: note: LOOP VECTORIZED. vectBug.cc:40: note: LOOP VECTORIZED. vectBug.cc:16: note: LOOP VECTORIZED. vectBug.cc:28: note: LOOP VECTORIZED. vectBug.cc:25: note: LOOP VECTORIZED. vectBug.cc:20: note: LOOP VECTORIZED. vectBug.cc:16: note: LOOP VECTORIZED. vectBug.cc:28: note: LOOP VECTORIZED. vectBug.cc:25: note: LOOP VECTORIZED. vectBug.cc:20: note: LOOP VECTORIZED. vectBug.cc:94: note: vectorized 16 loops in function. and gcc version 4.6.1 20110520 (prerelease) (GCC) c++ -O3 -std=c++0x -ftree-vectorizer-verbose=2 -c vectBug.cc vectBug.cc:68: note: LOOP VECTORIZED. vectBug.cc:68: note: LOOP VECTORIZED. vectBug.cc:71: note: vectorized 2 loops in function. vectBug.cc:35: note: LOOP VECTORIZED. vectBug.cc:48: note: LOOP VECTORIZED. vectBug.cc:45: note: LOOP VECTORIZED. vectBug.cc:40: note: LOOP VECTORIZED. vectBug.cc:35: note: LOOP VECTORIZED. vectBug.cc:48: note: LOOP VECTORIZED. vectBug.cc:45: note: LOOP VECTORIZED. vectBug.cc:40: note: LOOP VECTORIZED. vectBug.cc:16: note: LOOP VECTORIZED. vectBug.cc:28: note: LOOP VECTORIZED. vectBug.cc:25: note: LOOP VECTORIZED. vectBug.cc:20: note: LOOP VECTORIZED. vectBug.cc:16: note: LOOP VECTORIZED. vectBug.cc:28: note: LOOP VECTORIZED. vectBug.cc:25: note: LOOP VECTORIZED. vectBug.cc:20: note: LOOP VECTORIZED. vectBug.cc:94: note: vectorized 16 loops in function. test case cat vectBug.cc const int arraySize=512; struct Bar { int __attribute__ ((aligned(16))) c[arraySize]; int last; Bar() : last(0) { refresh();} void refresh(); void loop0(int N, float * f) { int k=0; int lead = arraySize-last; if (N<=lead) { for (int i=0; i!=N; ++i) f[k++] = c[last++]; return; } for (int i=last; i!=arraySize; ++i) f[k++] = c[i]; int outLoop = (N-lead)/arraySize; last = N -lead - outLoop*arraySize; for (int j=0; j!=outLoop; ++j) { refresh(); for (int i=0; i!=arraySize; ++i) f[k++] = c[i]; } refresh(); for (int i=0; i!=last; ++i) f[k++] = c[i]; } template<typename F> void loop(int N, F f) { int lead = arraySize-last; if (N<=lead) { for (int i=0; i!=N; ++i) f(c[last+i]); last +=N; return; } for (int i=last; i!=arraySize; ++i) f(c[i]); int outLoop = (N-lead)/arraySize; last = N -lead - outLoop*arraySize; for (int j=0; j!=outLoop; ++j) { refresh(); for (int i=0; i!=arraySize; ++i) f(c[i]); } refresh(); for (int i=0; i!=last; ++i) f(c[i]); } }; float __attribute__ ((aligned(16))) z[4096]; void refresh(); int j=0; void fun(float const *, float const *, int); template<typename F> inline void loop(int N, F f) { if (j+N>4096) { j=0; refresh(); } for (int i=0; i!=N; ++i) f(z[j++]); } void foo(int N) { float __attribute__ ((aligned(16))) x[N]; float __attribute__ ((aligned(16))) y[N]; int k=0; auto xs = [&x, &k](float r) { x[k++]= 1.5f*r;}; auto ys = [&y, &k](float r) { y[k++]= r+1.f;}; k=0; loop(N,xs); // for (int i=0; i!=N; ++i) xs(z[j++]); // x[k++] = z[j++]; k=0; loop(N,ys); // for (int i=0; i!=N; ++i) ys(z[j++]); // y[k++] = z[j++]; fun(x,y,N); } void load(int N) { float __attribute__ ((aligned(16))) a[N]; #ifndef FIXED float __attribute__ ((aligned(16))) b[N]; #else float __attribute__ ((aligned(16))) b[1024]; #endif static Bar bar; bar.loop0(N,a); bar.loop0(N,b); fun(a,b,N); int k=0; auto as = [&a, &k](float r) { a[k++]= 1.5f*r;}; auto bs = [&b, &k](float r) { b[k++]= r+1.f;}; k=0; bar.loop(N,as); k=0; bar.loop(N,bs); // <=== this fail (all others ok) fun(a,b,N); }