https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98167
--- Comment #16 from Hongtao.liu <crazylht at gmail dot com> --- typedef int v4si __attribute__ ((vector_size(16))); v4si f(v4si a, v4si b) { v4si a1 = __builtin_shufflevector (a, a, 2, 3 ,1 ,0); v4si b1 = __builtin_shufflevector (b, a, 2, 3 ,1 ,0); return a1 * b1; } gcc generate f: vpshufd xmm1, xmm1, 30 vpshufd xmm0, xmm0, 30 vpmulld xmm0, xmm0, xmm1 ret llvm generate f: # @f vpmulld xmm0, xmm1, xmm0 vpshufd xmm0, xmm0, 30 # xmm0 = xmm0[2,3,1,0] ret