https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89226
--- Comment #6 from H.J. Lu <hjl.tools at gmail dot com> --- Please take a look at usr/hjl/pieces/master branch: https://gitlab.com/x86-gcc/wip [hjl@gnu-cfl-1 gcc]$ cat x.cc #include <x86intrin.h> // DUMB PAIR struct dumb_pair { alignas(2*sizeof(__m256i)) __m256i x[2]; }; void copy1(const dumb_pair& from, dumb_pair& to) { to = from; } // SMART PAIR struct foo512 { __m256i a; __m256i b; auto& operator=(const foo512& f) { a = f.a; b = f.b; return *this; } }; struct smart_pair { union { foo512 y; __m256i x[2]; }; smart_pair(const smart_pair& sp) { y = sp.y; } smart_pair& operator=(const smart_pair& sp) { y = sp.y; return *this; } }; void copy2(const smart_pair& from, smart_pair& to) { to = from; } [hjl@gnu-cfl-1 gcc]$ ./xgcc -B./ -O3 -march=haswell x.cc -S [hjl@gnu-cfl-1 gcc]$ cat x.s .file "x.cc" .text .p2align 4 .globl _Z5copy1RK9dumb_pairRS_ .type _Z5copy1RK9dumb_pairRS_, @function _Z5copy1RK9dumb_pairRS_: .LFB5459: .cfi_startproc vmovdqa (%rdi), %ymm0 vmovaps %ymm0, (%rsi) vmovdqa 32(%rdi), %ymm1 vmovaps %ymm1, 32(%rsi) vzeroupper ret .cfi_endproc .LFE5459: .size _Z5copy1RK9dumb_pairRS_, .-_Z5copy1RK9dumb_pairRS_ .p2align 4 .globl _Z5copy2RK10smart_pairRS_ .type _Z5copy2RK10smart_pairRS_, @function _Z5copy2RK10smart_pairRS_: .LFB5465: .cfi_startproc vmovdqa (%rdi), %ymm0 vmovdqa 32(%rdi), %ymm1 vmovdqa %ymm0, (%rsi) vmovdqa %ymm1, 32(%rsi) vzeroupper ret .cfi_endproc .LFE5465: .size _Z5copy2RK10smart_pairRS_, .-_Z5copy2RK10smart_pairRS_ .ident "GCC: (GNU) 9.0.1 20190206 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-cfl-1 gcc]$