https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91811
Bug ID: 91811
Summary: 256-bit vector store isn't used
Product: gcc
Version: 10.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: hjl.tools at gmail dot com
Target Milestone: ---
[hjl@gnu-cfl-1 xxx]$ cat y.i
typedef struct
{
long long width, height;
long long x, y;
} info;
extern void bar (info *);
void
foo (long long width, long long height,
long long x, long long y)
{
info t;
t.width = width;
t.height = height;
t.x = x;
t.y = y;
bar (&t);
}
[hjl@gnu-cfl-1 xxx]$
/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/ -O2
-march=skylake -ftree-slp-vectorize -mtune-ctrl=^sse_typeless_stores
-mprefer-vector-width=256 -S y.i
[hjl@gnu-cfl-1 xxx]$ cat y.s
.file "y.i"
.text
.p2align 4
.globl foo
.type foo, @function
foo:
.LFB0:
.cfi_startproc
vmovq %rdi, %xmm1
subq $40, %rsp
.cfi_def_cfa_offset 48
vpinsrq $1, %rsi, %xmm1, %xmm0
vmovq %rdx, %xmm2
vmovdqa %xmm0, (%rsp)
movq %rsp, %rdi
vpinsrq $1, %rcx, %xmm2, %xmm0
vmovdqa %xmm0, 16(%rsp)
call bar
addq $40, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE0:
.size foo, .-foo
.ident "GCC: (GNU) 10.0.0 20190918 (experimental)"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-1 xxx]$
Is it possible to use 256-bit YMM register store?