SSE4.1 has pmovzx and pmovsx. For code like: [EMAIL PROTECTED] vect]$ cat pmovzxbw.c typedef unsigned char vec_t; typedef unsigned short vecx_t;
extern __attribute__((aligned(16))) vec_t x [64]; extern __attribute__((aligned(16))) vecx_t y [64]; void foo () { int i; for (i = 0; i < 64; i++) y [i] = x [i]; } Icc generates pmovzxbw x(%rip), %xmm0 #13.14 pmovzxbw 8+x(%rip), %xmm1 #13.14 pmovzxbw 16+x(%rip), %xmm2 #13.14 pmovzxbw 24+x(%rip), %xmm3 #13.14 pmovzxbw 32+x(%rip), %xmm4 #13.14 pmovzxbw 40+x(%rip), %xmm5 #13.14 pmovzxbw 48+x(%rip), %xmm6 #13.14 pmovzxbw 56+x(%rip), %xmm7 #13.14 movdqa %xmm0, y(%rip) #13.5 movdqa %xmm1, 16+y(%rip) #13.5 movdqa %xmm2, 32+y(%rip) #13.5 movdqa %xmm3, 48+y(%rip) #13.5 movdqa %xmm4, 64+y(%rip) #13.5 movdqa %xmm5, 80+y(%rip) #13.5 movdqa %xmm6, 96+y(%rip) #13.5 movdqa %xmm7, 112+y(%rip) #13.5 ret #14.1 -- Summary: Integer externsions aren't vectorized Product: gcc Version: 4.3.0 Status: UNCONFIRMED Severity: enhancement Priority: P3 Component: tree-optimization AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: hjl at lucon dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31667