https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89226

--- Comment #6 from H.J. Lu <hjl.tools at gmail dot com> ---
Please take a look at usr/hjl/pieces/master branch:

https://gitlab.com/x86-gcc/wip

[hjl@gnu-cfl-1 gcc]$ cat x.cc
#include <x86intrin.h>

// DUMB PAIR
struct dumb_pair {
    alignas(2*sizeof(__m256i)) __m256i x[2];
};

void copy1(const dumb_pair& from, dumb_pair& to) {
    to = from;
}

// SMART PAIR
struct foo512 {
    __m256i a;
    __m256i b;

    auto& operator=(const foo512& f) {
        a = f.a;
        b = f.b;
        return *this;
    }
};

struct smart_pair {
    union {
        foo512 y;
        __m256i x[2];
    };
    smart_pair(const smart_pair& sp) {
        y = sp.y;
    }

    smart_pair& operator=(const smart_pair& sp) {
        y = sp.y;
        return *this;
    }
};

void copy2(const smart_pair& from, smart_pair& to) {
    to = from;
}
[hjl@gnu-cfl-1 gcc]$ ./xgcc -B./ -O3 -march=haswell x.cc -S
[hjl@gnu-cfl-1 gcc]$ cat x.s
        .file   "x.cc"
        .text
        .p2align 4
        .globl  _Z5copy1RK9dumb_pairRS_
        .type   _Z5copy1RK9dumb_pairRS_, @function
_Z5copy1RK9dumb_pairRS_:
.LFB5459:
        .cfi_startproc
        vmovdqa (%rdi), %ymm0
        vmovaps %ymm0, (%rsi)
        vmovdqa 32(%rdi), %ymm1
        vmovaps %ymm1, 32(%rsi)
        vzeroupper
        ret
        .cfi_endproc
.LFE5459:
        .size   _Z5copy1RK9dumb_pairRS_, .-_Z5copy1RK9dumb_pairRS_
        .p2align 4
        .globl  _Z5copy2RK10smart_pairRS_
        .type   _Z5copy2RK10smart_pairRS_, @function
_Z5copy2RK10smart_pairRS_:
.LFB5465:
        .cfi_startproc
        vmovdqa (%rdi), %ymm0
        vmovdqa 32(%rdi), %ymm1
        vmovdqa %ymm0, (%rsi)
        vmovdqa %ymm1, 32(%rsi)
        vzeroupper
        ret
        .cfi_endproc
.LFE5465:
        .size   _Z5copy2RK10smart_pairRS_, .-_Z5copy2RK10smart_pairRS_
        .ident  "GCC: (GNU) 9.0.1 20190206 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-1 gcc]$

Reply via email to