https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103554
Bug ID: 103554
Summary: -mavx generates worse code on scalar code
Product: gcc
Version: 11.2.1
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: avi at scylladb dot com
Target Milestone: ---
Test case:
struct s1 {
long a, b, c, d, e, f, g, h;
};
s1 move(s1 in) {
s1 ret;
ret.a = in.d;
ret.b = in.e;
ret.c = in.a;
ret.d = in.b;
return ret;
}
-O3 generates:
move(s1):
movq 8(%rsp), %xmm0
movq 32(%rsp), %xmm1
movq %rdi, %rax
movhps 16(%rsp), %xmm0
movhps 40(%rsp), %xmm1
movups %xmm1, (%rdi)
movups %xmm0, 16(%rdi)
ret
-O3 -mavx generates:
move(s1):
pushq %rbp
movq %rdi, %rax
movq %rsp, %rbp
vmovq 16(%rbp), %xmm2
vmovq 40(%rbp), %xmm3
vpinsrq $1, 24(%rbp), %xmm2, %xmm1
vpinsrq $1, 48(%rbp), %xmm3, %xmm0
vinsertf128 $0x1, %xmm1, %ymm0, %ymm0
vmovdqu %ymm0, (%rdi)
vzeroupper
popq %rbp
ret
Clang -O3 generates this simple code, with or without -mavx (-mavx does use VEX
instructions):
move(s1): # @move(s1)
movq %rdi, %rax
movups 32(%rsp), %xmm0
movups %xmm0, (%rdi)
movaps 8(%rsp), %xmm0
movups %xmm0, 16(%rdi)
retq