These kernels from FFmpeg are not vectorized with: gcc-4.5 -c diff_pixels.c -O3 -ffast-math -ftree-vectorizer-verbose=7 -msse2 [...] diff_pixels.c:10: note: not vectorized: data ref analysis failed D.2726_9 = *s1_100;
Note that ICC 11.0 does vectorize these loop kernels. The difficulty seems to be that one of the data references is a pointer incremented by a parameter that is not a constant integer known at compile time. typedef short DCTELEM; typedef unsigned char uint8_t; void diff_pixels_c(DCTELEM *__restrict__ block, const uint8_t *s1, const uint8_t *s2, int stride) { int i; /* read the pixels */ for(i=0;i<8;i++) { block[0] = s1[0] - s2[0]; block[1] = s1[1] - s2[1]; block[2] = s1[2] - s2[2]; block[3] = s1[3] - s2[3]; block[4] = s1[4] - s2[4]; block[5] = s1[5] - s2[5]; block[6] = s1[6] - s2[6]; block[7] = s1[7] - s2[7]; s1 += stride; s2 += stride; block += 8; } } typedef short DCTELEM; typedef unsigned char uint8_t; typedef long int x86_reg; typedef unsigned int uint32_t; typedef unsigned long int uint64_t; int pix_sum_c (uint8_t * pix, int line_size) { int s, i, j; s = 0; for (i = 0; i < 16; i++) { for (j = 0; j < 16; j += 8) { s += pix[0]; s += pix[1]; s += pix[2]; s += pix[3]; s += pix[4]; s += pix[5]; s += pix[6]; s += pix[7]; pix += 8; } pix += line_size - 16; } return s; } -- Summary: Missed vectorization: "not vectorized: data ref analysis": pointer incremented by a parameter Product: gcc Version: 4.5.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: spop at gcc dot gnu dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43434