------- Comment #2 from hjl at lucon dot org 2007-04-03 17:37 ------- For code:
typedef short vec_t; extern __attribute__((aligned(16))) vec_t x [64]; extern __attribute__((aligned(16))) vec_t y [64]; extern __attribute__((aligned(16))) vec_t m [64]; void foo () { int i; for (i = 0; i < 64; i++) if (x [i] < y [i]) m [i] = y [i]; else m [i] = x [i]; } I am expecting: .globl foo .type foo, @function foo: .LFB2: movdqa y(%rip), %xmm0 movl $16, %eax pmaxsw x(%rip), %xmm0 movdqa %xmm0, m(%rip) .p2align 4,,7 .L2: movdqa y(%rax), %xmm0 pmaxsw x(%rax), %xmm0 movdqa %xmm0, m(%rax) addq $16, %rax cmpq $128, %rax jne .L2 rep ; ret .LFE2: .size foo, .-foo But I got .globl foo .type foo, @function foo: .LFB2: xorl %ecx, %ecx .p2align 4,,7 .L2: movzwl x(%rcx,%rcx), %edx movzwl y(%rcx,%rcx), %eax cmpw %ax, %dx cmovge %edx, %eax movw %ax, m(%rcx,%rcx) addq $1, %rcx cmpq $64, %rcx jne .L2 rep ; ret .LFE2: .size foo, .-foo -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31460