# cat regressionorbugfix.cc
typedef float v4sf_t __attribute__ ((__vector_size__ (16)));
typedef int v4si_t __attribute__ ((__vector_size__ (16)));
v4sf_t foo(v4sf_t a, v4sf_t b, v4sf_t c) {
return a + (b | c);
}
v4sf_t bar(v4sf_t a, v4sf_t b, v4sf_t c) {
return a + (v4sf_t) ((v4si_t) b | (v4si_t) c);
}
int main() { return 0; }
0000000000400a30 <foo(float __vector, float __vector, float __vector)>:
400a30: orps %xmm2,%xmm1
400a33: addps %xmm1,%xmm0
400a36: retq
0000000000400a40 <bar(float __vector, float __vector, float __vector)>:
400a40: por %xmm2,%xmm1
400a44: addps %xmm1,%xmm0
400a47: retq
I'm surely not qualified to argue about typing, but you'd need a rather
strong distortion field to not characterize that as a regression.
I've added 5 minutes ago an XFAILed test for exactly this code. OTOH, I
have also committed a fix that will avoid producing tons of shuffle and
unpacking instructions when function "bar" is compiled with "-msse" but
without "-msse2".
I'm also going to file a missed optimization bug soon.
I'm curious, does ICC support vector arithmetic like this? Do both
functions compile? What code does it produce for bar?
Paolo