http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50680
Bug #: 50680
Summary: -ftree-vectorizer-verbose does not report about
"basic block SLP" (attempt of) vectorization
Classification: Unclassified
Product: gcc
Version: 4.7.0
Status: UNCONFIRMED
Severity: trivial
Priority: P3
Component: tree-optimization
AssignedTo: [email protected]
ReportedBy: [email protected]
in this example compiled with
c++ -Ofast -ftree-vectorizer-verbose=7 -c slp.cc -mtune=corei7 -msse4.2
it reports only about loop, voop,aoop, not about the equivalently
(not)-vectorized code in foo,voo,aoo
This makes debug/optmization of "basic block SLP" difficult
(it took me a while to understand why foo is not vectorized!)
float * __restrict__ x;
float * __restrict__ y;
float * __restrict__ z;
float v1[4],v2[4],v3[4];
struct A {
float a[4];
}; // __attribute__ ((aligned(16)));
A a1, a2, a3;
void foo ()
{
float * __restrict__ pin = &x[0];
float * __restrict__ pout =&y[0];
*pout++ = *pin++;
*pout++ = *pin++;
*pout++ = *pin++;
*pout++ = *pin++;
}
void voo() {
float * __restrict__ pin = &v1[0];
float * __restrict__ pout =&v2[0];
*pout++ = *pin++;
*pout++ = *pin++;
*pout++ = *pin++;
*pout++ = *pin++;
}
void aoo() {
float * __restrict__ pin = &a1.a[0];
float * __restrict__ pout =&a2.a[0];
*pout++ = *pin++;
*pout++ = *pin++;
*pout++ = *pin++;
*pout++ = *pin++;
}
void loop() {
for (int i=0;i!=4;++i) x[i]=y[i]+z[i];
}
void voop() {
for (int i=0;i!=4;++i) v1[i]=v2[i]+v3[i];
}
void aoop() {
for (int i=0;i!=4;++i) a1.a[i]=a2.a[i]+a3.a[i];
}