https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103932

            Bug ID: 103932
           Summary: x86: strange unoptimized code generated (multiple
                    negations of _mm_testz_si128 result)
           Product: gcc
           Version: 12.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: nekotekina at gmail dot com
  Target Milestone: ---

GCC generates seemingly unoptimized sequence of instructions in certain cases
(can't tell exactly what triggers it, example code is below):

        xor     eax, eax
        vptest  xmm0, xmm0
        sete    al
        test    eax, eax
        sete    al
        movzx   eax, al

This should be something like this:
xor eax, eax
vptest xmm0, xmm0
setne al


https://godbolt.org/z/sTaG65Ksc
Code (-O3 -std=c++20 -march=skylake):

#include <emmintrin.h>
#include <immintrin.h>
#include <bit>
#include <cstdint>

template <typename T>
concept Vector128 = (sizeof(T) == 16);

using u64 = std::uint64_t;
using u32 = std::uint32_t;

union alignas(16) v128
{
        u64 _u64[2];

        v128() = default;

        constexpr v128(const v128&) noexcept = default;

        template <Vector128 T>
        constexpr v128(const T& rhs) noexcept
                : v128(std::bit_cast<v128>(rhs))
        {
        }

        constexpr v128& operator=(const v128&) noexcept = default;

        template <Vector128 T>
        constexpr operator T() const noexcept
        {
                return std::bit_cast<T>(*this);
        }
};

// Test if vector is zero
inline bool gv_testz(const v128& arg)
{
#if defined(__SSE4_1__)
        return _mm_testz_si128(arg, arg);
#else
        return !(arg._u64[0] | arg._u64[1]);
#endif
}

struct alignas(16) context_t
{
        v128 vec[32];
        v128 sat;
};

void test1(context_t& ctx, u32 n)
{
        const u64 bit = !gv_testz(ctx.sat);
        v128 r;
        r._u64[0] = 0;
        r._u64[1] = bit;
        ctx.vec[n] = r;
}

void test2(context_t& ctx, u32 n)
{
        ctx.vec[n]._u64[1] = !gv_testz(ctx.sat);
}

Reply via email to