https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109069

--- Comment #5 from John Platts <john_platts at hotmail dot com> ---
Here is another test program that shows the same code generation bug when a
splat followed by a vec_sld is incorrectly optimized by gcc 12.2.0 on
powerpc64-linux-gnu and powerpc64le-linux-gnu with the -mcpu=power10 -O2
options:
#pragma push_macro("vector")
#pragma push_macro("pixel")
#pragma push_macro("bool")

#undef vector
#undef pixel
#undef bool

#include <altivec.h>

#pragma pop_macro("vector")
#pragma pop_macro("pixel")
#pragma pop_macro("bool")

#include <stdint.h>
#include <type_traits>

template<class T>
struct MakeSimdVectorType {
    typedef T type __attribute__((__vector_size__(16)));
};

template<class T>
using SimdVectorType = typename MakeSimdVectorType<T>::type;

template<class T, std::enable_if_t<(sizeof(T) == 1 &&
    std::is_integral_v<std::decay_t<T>>)>* = nullptr>
static inline SimdVectorType<T> Splat(T val) {
    return reinterpret_cast<SimdVectorType<T>>(
        vec_splats(static_cast<unsigned char>(val)));
}

template<class T, std::enable_if_t<(sizeof(T) == 2 &&
    std::is_integral_v<std::decay_t<T>>)>* = nullptr>
static inline SimdVectorType<T> Splat(T val) {
    return reinterpret_cast<SimdVectorType<T>>(
        vec_splats(static_cast<unsigned short>(val)));
}

template<class T, std::enable_if_t<(sizeof(T) == 4 &&
    std::is_integral_v<std::decay_t<T>>)>* = nullptr>
static inline SimdVectorType<T> Splat(T val) {
    return reinterpret_cast<SimdVectorType<T>>(
        vec_splats(static_cast<unsigned int>(val)));
}

template<class T, std::enable_if_t<(sizeof(T) == 8 &&
    std::is_integral_v<std::decay_t<T>>)>* = nullptr>
static inline SimdVectorType<T> Splat(T val) {
    return reinterpret_cast<SimdVectorType<T>>(
        vec_splats(static_cast<unsigned long long>(val)));
}

static inline __vector float Splat(float val) {
    return vec_splats(val);
}

static inline __vector double Splat(double val) {
    return vec_splats(val);
}

using AltivecUCharVectType = __vector unsigned char;

template<int kShiftAmount, class T>
AltivecUCharVectType SplatAndShift(T val) {
    const auto splatResult = Splat(val);
    return vec_sld(reinterpret_cast<AltivecUCharVectType>(splatResult),
        reinterpret_cast<AltivecUCharVectType>(splatResult), kShiftAmount);
}

template<int kShiftAmount, class T>
AltivecUCharVectType SplatAndShift_2(T val) {
    auto splatResult = Splat(val);
    __asm__(""
            : "+wa" (splatResult));
    return vec_sld(reinterpret_cast<AltivecUCharVectType>(splatResult),
        reinterpret_cast<AltivecUCharVectType>(splatResult), kShiftAmount);
}

auto SplatAndShift_I16_1() {
    return SplatAndShift<5>(int16_t{-32346});
}

auto SplatAndShift_I16_2() {
    return SplatAndShift_2<5>(int16_t{-32346});
}

auto SplatAndShift_I32_1() {
    return SplatAndShift<3>(int32_t{-1394373889});
}

auto SplatAndShift_I32_2() {
    return SplatAndShift_2<3>(int32_t{-1394373889});
}

Here is the assembly code that is generated for the above code on
powerpc64le-linux-gnu with the -O2 -mcpu=power10 options:
_Z19SplatAndShift_I16_1v:
        xxspltiw 34,2175173030
        blr
_Z19SplatAndShift_I16_2v:
        xxspltiw 34,2175173030
        vsldoi 2,2,2,5
        blr
_Z19SplatAndShift_I32_1v:
        xxspltiw 34,2900593407
        blr
_Z19SplatAndShift_I32_2v:
        xxspltiw 34,2900593407
        vsldoi 2,2,2,3
        blr

Here is the assembly code that is generated for the above code on
powerpc64-linux-gnu with the -O2 -mcpu=power10 options:
_Z19SplatAndShift_I16_1v:
        .quad   .L._Z19SplatAndShift_I16_1v,.TOC.@tocbase,0
.L._Z19SplatAndShift_I16_1v:
        xxspltiw 34,2175173030
        blr
_Z19SplatAndShift_I16_2v:
        .quad   .L._Z19SplatAndShift_I16_2v,.TOC.@tocbase,0
.L._Z19SplatAndShift_I16_2v:
        xxspltiw 34,2175173030
        vsldoi 2,2,2,5
        blr
_Z19SplatAndShift_I32_1v:
        .quad   .L._Z19SplatAndShift_I32_1v,.TOC.@tocbase,0
.L._Z19SplatAndShift_I32_1v:
        xxspltiw 34,2900593407
        blr
_Z19SplatAndShift_I32_2v:
        .quad   .L._Z19SplatAndShift_I32_2v,.TOC.@tocbase,0
.L._Z19SplatAndShift_I32_2v:
        xxspltiw 34,2900593407
        vsldoi 2,2,2,3
        blr

Reply via email to