https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106470
--- Comment #3 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
The other fix is to use _mm256_extract_epi16.
E.g.
inline unsigned short extract_epi16(__m256i v, int pos) {
switch(pos){
case 0: return _mm256_extract_epi16(v, 0);
case 1: return _mm256_extract_epi16(v, 1);
case 2: return _mm256_extract_epi16(v, 2);
case 3: return _mm256_extract_epi16(v, 3);
case 4: return _mm256_extract_epi16(v, 4);
case 5: return _mm256_extract_epi16(v, 5);
case 6: return _mm256_extract_epi16(v, 6);
case 7: return _mm256_extract_epi16(v, 7);
case 8: return _mm256_extract_epi16(v, 8);
case 9: return _mm256_extract_epi16(v, 9);
case 10: return _mm256_extract_epi16(v, 10);
case 11: return _mm256_extract_epi16(v, 11);
case 12: return _mm256_extract_epi16(v, 12);
case 13: return _mm256_extract_epi16(v, 13);
case 14: return _mm256_extract_epi16(v, 14);
case 15: return _mm256_extract_epi16(v, 15);
}
return 0;
}
...
for (size_t i = 0; i < 16; i++) {
printf(" %04x", extract_epi16(tmp, i));
}