https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103611

--- Comment #2 from John Platts <john_platts at hotmail dot com> ---
Here is some code for extracting 64-bit integers from a SSE2 vector using GCC
vector extensions:
#include <cstdint>
#include <immintrin.h>

using Int64M128Vect [[__gnu__::__vector_size__(16)]] = std::int64_t;

template<int ElemIdx>
std::int64_t SSE2ExtractInt64(__m128i vect) noexcept {
    static_assert(ElemIdx == (ElemIdx & 1), "ElemIdx must be between 0 and 1");
    return Int64M128Vect(vect)[ElemIdx];
}

template std::int64_t SSE2ExtractInt64<0>(__m128i vect) noexcept;
template std::int64_t SSE2ExtractInt64<1>(__m128i vect) noexcept;

Here is the output of the above C++ code when compiled with the -O2 -std=c++17
-march=nocona -mtune=skylake -m32 options:
_Z16SSE2ExtractInt64ILi0EExDv2_x:
        subl    $28, %esp
        movq    %xmm0, 8(%esp)
        movl    8(%esp), %eax
        movl    12(%esp), %edx
        addl    $28, %esp
        ret
_Z16SSE2ExtractInt64ILi1EExDv2_x:
        subl    $28, %esp
        movhps  %xmm0, 8(%esp)
        movl    8(%esp), %eax
        movl    12(%esp), %edx
        addl    $28, %esp
        ret

Here is the output of the above C++ code when compiled with the -O2 -std=c++17
-march=nocona -mtune=skylake -m64 options:
_Z16SSE2ExtractInt64ILi0EElDv2_x:
        movq    %xmm0, %rax
        ret
_Z16SSE2ExtractInt64ILi1EElDv2_x:
        movhlps %xmm0, %xmm1
        movq    %xmm1, %rax
        ret

Here is the output of the above C++ code when compiled with the -O2 -std=c++17
-march=core2 -msse4.1 -mtune=skylake -m32 options:
_Z16SSE2ExtractInt64ILi0EExDv2_x:
        movd    %xmm0, %eax
        pextrd  $1, %xmm0, %edx
        ret
_Z16SSE2ExtractInt64ILi1EExDv2_x:
        subl    $28, %esp
        movhps  %xmm0, 8(%esp)
        movl    8(%esp), %eax
        movl    12(%esp), %edx
        addl    $28, %esp
        ret

Here is the output of the above C++ code when compiled with the -O2 -std=c++17
-march=core2 -msse4.1 -mtune=skylake -m64 options:
_Z16SSE2ExtractInt64ILi0EElDv2_x:
        movq    %xmm0, %rax
        ret
_Z16SSE2ExtractInt64ILi1EElDv2_x:
        pextrq  $1, %xmm0, %rax
        ret

Reply via email to