https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100267
--- Comment #8 from Hongtao.liu <crazylht at gmail dot com> ---
"less optimizations" part should be fixed in GCC12.
.file "test.c"
.text
.p2align 4
.globl dummyf1_avx512x8
.type dummyf1_avx512x8, @function
dummyf1_avx512x8:
.LFB5668:
.cfi_startproc
movl (%rdi), %edx
movq 8(%rdi), %rax
vmovdqu (%rax,%rdx,8), %ymm0
vmovdqu 32(%rax,%rdx,8), %ymm1
vpaddq %ymm1, %ymm0, %ymm0
ret
.cfi_endproc
.LFE5668:
.size dummyf1_avx512x8, .-dummyf1_avx512x8
.ident "GCC: (GNU) 12.0.0 20210621 (experimental)"
.section .note.GNU-stack,"",@progbits