These kernels from FFmpeg are not vectorized with:
gcc-4.5 -c diff_pixels.c -O3 -ffast-math -ftree-vectorizer-verbose=7 -msse2
[...]
diff_pixels.c:10: note: not vectorized: data ref analysis failed D.2726_9 =
*s1_100;

Note that ICC 11.0 does vectorize these loop kernels.
The difficulty seems to be that one of the data references is a pointer
incremented by a parameter that is not a constant integer known at
compile time.

typedef short DCTELEM;
typedef unsigned char uint8_t;

void diff_pixels_c(DCTELEM *__restrict__ block, const uint8_t *s1,
                   const uint8_t *s2, int stride)
{
  int i;

  /* read the pixels */
  for(i=0;i<8;i++)
    {
      block[0] = s1[0] - s2[0];
      block[1] = s1[1] - s2[1];
      block[2] = s1[2] - s2[2];
      block[3] = s1[3] - s2[3];
      block[4] = s1[4] - s2[4];
      block[5] = s1[5] - s2[5];
      block[6] = s1[6] - s2[6];
      block[7] = s1[7] - s2[7];
      s1 += stride;
      s2 += stride;
      block += 8;
    }
}


typedef short DCTELEM;
typedef unsigned char uint8_t;
typedef long int x86_reg;
typedef unsigned int uint32_t;
typedef unsigned long int uint64_t;

int
pix_sum_c (uint8_t * pix, int line_size)
{
  int s, i, j;

  s = 0;
  for (i = 0; i < 16; i++)
    {
      for (j = 0; j < 16; j += 8)
        {
          s += pix[0];
          s += pix[1];
          s += pix[2];
          s += pix[3];
          s += pix[4];
          s += pix[5];
          s += pix[6];
          s += pix[7];
          pix += 8;
        }
      pix += line_size - 16;
    }
  return s;
}


-- 
           Summary: Missed vectorization: "not vectorized: data ref
                    analysis": pointer incremented by a parameter
           Product: gcc
           Version: 4.5.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: spop at gcc dot gnu dot org


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43434

Reply via email to