https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65952

--- Comment #5 from alalaw01 at gcc dot gnu.org ---
So the above example tends to get fully unrolled, but even on an example with
32 ptrs rather than 4, yes the vectorizer fails because of the multiplication -
but the multiplication is gone by the final tree stage, as it's strength
reduced down to an add; I believe this -fdump-tree-optimized would be perfectly
vectorizable:

loop ()
{
  unsigned long ivtmp.12;
  unsigned long ivtmp.10;
  void * _4;
  struct my_struct * _7;
  struct my_struct * pretmp_11;
  unsigned long _20;

  <bb 2>:
  pretmp_11 = array;
  ivtmp.10_16 = (unsigned long) pretmp_11;
  ivtmp.12_2 = (unsigned long) &ptrs;
  _20 = (unsigned long) &MEM[(void *)&ptrs + 256B];

  <bb 3>:
  # ivtmp.10_10 = PHI <ivtmp.10_1(3), ivtmp.10_16(2)>
  # ivtmp.12_15 = PHI <ivtmp.12_14(3), ivtmp.12_2(2)>
  _7 = (struct my_struct *) ivtmp.10_10;
  _4 = (void *) ivtmp.12_15;
  MEM[base: _4, offset: 0B] = _7;
  ivtmp.10_1 = ivtmp.10_10 + 16;
  ivtmp.12_14 = ivtmp.12_15 + 8;
  if (ivtmp.12_14 != _20)
    goto <bb 3>;
  else
    goto <bb 4>;

  <bb 4>:
  return;

}

Reply via email to