https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77287

--- Comment #13 from Hongtao.liu <crazylht at gmail dot com> ---
;; Function fn (fn, funcdef_no=5484, decl_uid=32317, cgraph_uid=5485,
symbol_order=5484)

int fn (const int * px, const int * py, const int * pz, const int * pw, const
int * pa, const int * pb, const int * pc, const int * pd)
{
  vector(16) short unsigned int _3;
  vector(16) short unsigned int _5;
  vector(16) short int _7;
  vector(16) short int _9;
  vector(32) char _12;
  vector(32) unsigned char _14;
  vector(16) short unsigned int _16;
  vector(16) short unsigned int _17;
  vector(16) short int _18;
  vector(16) short int _19;
  vector(32) char _20;
  vector(32) unsigned char _21;
  vector(16) short unsigned int _22;
  vector(16) short unsigned int _23;
  vector(16) short int _24;
  vector(16) short int _25;
  vector(32) char _26;
  vector(32) unsigned char _27;
  vector(16) short unsigned int _28;
  vector(16) short unsigned int _29;
  vector(16) short int _30;
  vector(16) short int _31;
  int _32;
  vector(4) int _33;
  vector(8) int _34;
  vector(32) unsigned char _35;
  vector(32) char _36;
  vector(16) short unsigned int _37;
  vector(16) short unsigned int _38;
  vector(16) short unsigned int _39;
  vector(16) short unsigned int _40;
  vector(16) short unsigned int _41;
  vector(16) short unsigned int _42;
  vector(16) short unsigned int _43;
  vector(16) short unsigned int _44;
  vector(16) short unsigned int _45;
  vector(16) short unsigned int _46;
  vector(16) short unsigned int _47;
  vector(16) short unsigned int _48;
  vector(16) short unsigned int _50;
  vector(16) short unsigned int _51;
  vector(16) short unsigned int _53;
  vector(16) short unsigned int _54;
  vector(16) short unsigned int _56;
  vector(16) short unsigned int _57;
  vector(16) short unsigned int _59;
  vector(16) short unsigned int _60;
  vector(16) short int _62;
  vector(16) short int _63;
  vector(16) short unsigned int _64;
  vector(16) short unsigned int _65;
  vector(32) unsigned char _66;
  vector(32) char _67;
  vector(16) short int _68;
  vector(16) short int _69;
  vector(16) short unsigned int _70;
  vector(16) short unsigned int _71;
  vector(32) unsigned char _72;
  vector(32) char _73;
  vector(16) short int _74;
  vector(16) short int _75;
  vector(16) short unsigned int _76;
  vector(16) short unsigned int _77;
  vector(32) unsigned char _78;
  vector(32) char _79;
  vector(16) short int _80;
  vector(16) short int _81;
  vector(16) short unsigned int _82;
  vector(16) short unsigned int _83;
  vector(32) unsigned char _84;
  vector(32) char _85;
  vector(16) short int _86;
  vector(16) short int _87;
  vector(16) short unsigned int _88;
  vector(16) short unsigned int _89;
  vector(32) unsigned char _90;
  vector(32) char _91;
  vector(4) long long int _92;
  vector(4) long long int _93;
  vector(4) long long int _94;
  vector(4) long long int _95;
  vector(4) long long int _96;
  vector(4) long long int _97;
  vector(4) long long int _98;
  vector(4) long long int _99;
  vector(4) long long int _100;
  vector(4) long long int _101;
  vector(16) short unsigned int _107;
  vector(16) short unsigned int _108;
  vector(16) short unsigned int _109;
  vector(16) short unsigned int _110;
  vector(16) short unsigned int _111;

  <bb 2> [local count: 1073741824]:
  _101 = MEM[(const __m256i_u * {ref-all})px_2(D)];
  _100 = MEM[(const __m256i_u * {ref-all})py_4(D)];
  _99 = MEM[(const __m256i_u * {ref-all})pz_6(D)];
  _98 = MEM[(const __m256i_u * {ref-all})pw_8(D)];
  _97 = MEM[(const __m256i_u * {ref-all})pa_10(D)];
  _96 = MEM[(const __m256i_u * {ref-all})pb_11(D)];
  _95 = MEM[(const __m256i_u * {ref-all})pc_13(D)];
  _94 = MEM[(const __m256i_u * {ref-all})pd_15(D)];
  _93 = MEM[(const __m256i_u * {ref-all})pc_13(D) + 32B];
  _92 = MEM[(const __m256i_u * {ref-all})pd_15(D) + 32B];
  _86 = VIEW_CONVERT_EXPR<vector(16) short int>(_96);
  _87 = VIEW_CONVERT_EXPR<vector(16) short int>(_101);
  _88 = (vector(16) short unsigned int) _87;
  _89 = (vector(16) short unsigned int) _86;
  _90 = VEC_PACK_SAT_EXPR <_88, _89>;
  _91 = (vector(32) char) _90;
  _80 = VIEW_CONVERT_EXPR<vector(16) short int>(_95);
  _81 = VIEW_CONVERT_EXPR<vector(16) short int>(_100);
  _82 = (vector(16) short unsigned int) _81;
  _83 = (vector(16) short unsigned int) _80;
  _84 = VEC_PACK_SAT_EXPR <_82, _83>;
  _85 = (vector(32) char) _84;
  _74 = VIEW_CONVERT_EXPR<vector(16) short int>(_94);
  _75 = VIEW_CONVERT_EXPR<vector(16) short int>(_99);
  _76 = (vector(16) short unsigned int) _75;
  _77 = (vector(16) short unsigned int) _74;
  _78 = VEC_PACK_SAT_EXPR <_76, _77>;
  _79 = (vector(32) char) _78;
  _68 = VIEW_CONVERT_EXPR<vector(16) short int>(_93);
  _69 = VIEW_CONVERT_EXPR<vector(16) short int>(_98);
  _70 = (vector(16) short unsigned int) _69;
  _71 = (vector(16) short unsigned int) _68;
  _72 = VEC_PACK_SAT_EXPR <_70, _71>;
  _73 = (vector(32) char) _72;
  _62 = VIEW_CONVERT_EXPR<vector(16) short int>(_92);
  _63 = VIEW_CONVERT_EXPR<vector(16) short int>(_97);
  _64 = (vector(16) short unsigned int) _63;
  _65 = (vector(16) short unsigned int) _62;
  _66 = VEC_PACK_SAT_EXPR <_64, _65>;
  _67 = (vector(32) char) _66;
  _59 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_91);
  _60 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_101);
  _56 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_85);
  _57 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_100);
  _53 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_79);
  _54 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_99);
  _50 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_73);
  _51 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_98);
  _47 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_67);
  _48 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_97);
  _45 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_96);
  _111 = _60 - _45;
  _46 = _59 + _111;
  _43 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_95);
  _110 = _57 - _43;
  _44 = _56 + _110;
  _41 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_94);
  _109 = _54 - _41;
  _42 = _53 + _109;
  _39 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_93);
  _108 = _51 - _39;
  _40 = _50 + _108;
  _37 = VIEW_CONVERT_EXPR<vector(16) short unsigned int>(_92);
  _107 = _48 - _37;
  _38 = _47 + _107;
  _9 = VIEW_CONVERT_EXPR<vector(16) short int>(_44);
  _7 = VIEW_CONVERT_EXPR<vector(16) short int>(_46);
  _5 = (vector(16) short unsigned int) _7;
  _3 = (vector(16) short unsigned int) _9;
  _35 = VEC_PACK_SAT_EXPR <_5, _3>;
  _36 = (vector(32) char) _35;
  _19 = VIEW_CONVERT_EXPR<vector(16) short int>(_42);
  _18 = VIEW_CONVERT_EXPR<vector(16) short int>(_36);
  _17 = (vector(16) short unsigned int) _18;
  _16 = (vector(16) short unsigned int) _19;
  _14 = VEC_PACK_SAT_EXPR <_17, _16>;
  _12 = (vector(32) char) _14;
  _25 = VIEW_CONVERT_EXPR<vector(16) short int>(_40);
  _24 = VIEW_CONVERT_EXPR<vector(16) short int>(_12);
  _23 = (vector(16) short unsigned int) _24;
  _22 = (vector(16) short unsigned int) _25;
  _21 = VEC_PACK_SAT_EXPR <_23, _22>;
  _20 = (vector(32) char) _21;
  _31 = VIEW_CONVERT_EXPR<vector(16) short int>(_38);
  _30 = VIEW_CONVERT_EXPR<vector(16) short int>(_20);
  _29 = (vector(16) short unsigned int) _30;
  _28 = (vector(16) short unsigned int) _31;
  _27 = VEC_PACK_SAT_EXPR <_29, _28>;
  _26 = (vector(32) char) _27;
  _34 = VIEW_CONVERT_EXPR<vector(8) int>(_26);
  _33 = __builtin_ia32_vextractf128_si256 (_34, 0);
  _32 = __builtin_ia32_vec_ext_v4si (_33, 1); [tail call]
  return _32;

}

After folding _mm256_packus_epi16, gimple still doesn't simplify it.
I guess gcc only functionally supports vec_pack_sat_expr, but does not optimize
it

Reply via email to