https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110334

--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
So it's actually that we require the instantiations but we are not able to
fully optimize avx::[rect_]memset* before LTO streaming.  In fact we're not too
far from that:

void skvx::Vec<2, unsigned int>::Vec (struct Vec * const this, unsigned int
D.78460)
{ 
  unsigned int _3(D);

  <bb 2> [local count: 1073741824]:
  MEM[(struct VecStorage *)this_2(D)] ={v} {CLOBBER};
  MEM[(struct Vec *)this_2(D)] ={v} {CLOBBER};
  MEM[(struct Vec *)this_2(D)].val = _3(D);
  MEM[(struct Vec *)this_2(D) + 4B] ={v} {CLOBBER};
  MEM[(struct Vec *)this_2(D) + 4B].val = _3(D);
  return;
}

void skvx::Vec<4, unsigned int>::Vec (struct Vec * const this, unsigned int
D.78672)
{ 
  unsigned int _3(D);
  struct Vec * _4;
  struct Vec * _5;

  <bb 2> [local count: 1073741824]:
  MEM[(struct VecStorage *)this_2(D)] ={v} {CLOBBER};
  _4 = &MEM[(struct VecStorage *)this_2(D)].lo;
  skvx::Vec<2, unsigned int>::Vec (_4, _3(D));
  _5 = &MEM[(struct VecStorage *)this_2(D)].hi;
  skvx::Vec<2, unsigned int>::Vec (_5, _3(D));
  return;

}

void avx::memsetT<unsigned int> (unsigned int * buffer, unsigned int value, int
count)
{
  struct Vec wideValue;
  long unsigned int _17;

  <bb 2> [local count: 118111600]:
  MEM[(struct VecStorage *)&wideValue] ={v} {CLOBBER};
  skvx::Vec<4, unsigned int>::Vec (&MEM[(struct VecStorage *)&wideValue].lo,
value_8(D));
  skvx::Vec<4, unsigned int>::Vec (&MEM[(struct VecStorage *)&wideValue].hi,
value_8(D));


we go inlining skvx::Vec<8, unsigned int>::Vec here which eventually
expands

__attribute__((always_inline))
void skvx::VecStorage<8, unsigned int>::VecStorage (struct VecStorage * const
this, unsigned int s)
{

but we do not inline into that function and as we do not iterate early
inlining we stop here.

Removing all always_inline from the TU fixes the issue.

Reply via email to