[Bug target/109069] New: Vector truncation test program produces incorrect result on big-endian powerpc64-linux-gnu with -mcpu=power10 -O2

john_platts at hotmail dot com via Gcc-bugs Wed, 08 Mar 2023 13:01:23 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109069


            Bug ID: 109069
           Summary: Vector truncation test program produces incorrect
                    result on big-endian powerpc64-linux-gnu with
                    -mcpu=power10 -O2
           Product: gcc
           Version: 12.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: john_platts at hotmail dot com
  Target Milestone: ---

The following C++ test program generates a test failure when compiled for
big-endian powerpc64-linux-gnu with GCC 12.2.1 with the -mcpu=power10 -O2
options:

#pragma push_macro("vector")
#pragma push_macro("pixel")
#pragma push_macro("bool")

#undef vector
#undef pixel
#undef bool

#include <altivec.h>

#pragma pop_macro("vector")
#pragma pop_macro("pixel")
#pragma pop_macro("bool")

#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <iostream>
#include <string_view>
#include <type_traits>

template<size_t LaneSize>
struct AltivecTypes {
};

template<>
struct AltivecTypes<1> {
  using UnsignedLaneT = unsigned char;
  using SignedLaneT = signed char;
  using UnsignedVectT = __vector unsigned char;
  using SignedVectT = __vector signed char;
  using BoolVectT = __vector __bool char;
};

template<>
struct AltivecTypes<2> {
  using UnsignedLaneT = unsigned short;
  using SignedLaneT = signed short;
  using UnsignedVectT = __vector unsigned short;
  using SignedVectT = __vector signed short;
  using BoolVectT = __vector __bool short;
};

template<>
struct AltivecTypes<4> {
  using UnsignedLaneT = unsigned int;
  using SignedLaneT = signed int;
  using FloatLaneT = float;
  using UnsignedVectT = __vector unsigned int;
  using SignedVectT = __vector signed int;
  using BoolVectT = __vector __bool int;
  using FloatVectT = __vector float;
};

template<>
struct AltivecTypes<8> {
  using UnsignedLaneT = unsigned long long;
  using SignedLaneT = signed long long;
  using FloatLaneT = double;
  using UnsignedVectT = __vector unsigned long long;
  using SignedVectT = __vector signed long long;
  using BoolVectT = __vector __bool long long;
  using FloatVectT = __vector double;
};

template<class T, bool = std::is_signed_v<T>, bool = std::is_integral_v<T>,
                  bool = std::is_floating_point_v<T>, class = void>
struct MakeAltivecVectorType {
};

template<class T>
struct MakeAltivecVectorType<T, true, true, false,
  std::void_t<typename AltivecTypes<sizeof(T)>::SignedVectT>> {
  using type = typename AltivecTypes<sizeof(T)>::SignedVectT;
};

template<class T>
struct MakeAltivecVectorType<T, false, true, false,
  std::void_t<typename AltivecTypes<sizeof(T)>::UnsignedVectT>> {
  using type = typename AltivecTypes<sizeof(T)>::UnsignedVectT;
};

template<class T>
struct MakeAltivecVectorType<T, true, false, true,
  std::void_t<typename AltivecTypes<sizeof(T)>::FloatVectT>> {
  using type = typename AltivecTypes<sizeof(T)>::FloatVectT;
};

template<class T>
using AltivecVectorType = typename MakeAltivecVectorType<T>::type;

template<size_t N, class T, std::enable_if_t<(sizeof(T) * N <= 8)>* = nullptr>
AltivecVectorType<T> LoadVector(const T* __restrict__ src) {
  using Bits = typename AltivecTypes<(sizeof(T) * N)>::UnsignedLaneT;
  Bits bits;
  __builtin_memcpy(&bits, src, sizeof(T) * N);
  return reinterpret_cast<AltivecVectorType<T>>(vec_splats(bits));
}

template<size_t N, class T, std::enable_if_t<(sizeof(T) * N == 16)>* = nullptr>
AltivecVectorType<T> LoadVector(const T* __restrict__ src) {
  using LaneT =
std::decay_t<decltype(std::declval<AltivecVectorType<T>>()[0])>;
  typedef LaneT LoadRawT
    __attribute__((__vector_size__(16), __aligned__(16), __may_alias__));
  const LoadRawT* __restrict__ p = reinterpret_cast<const LoadRawT*>(src);
  return reinterpret_cast<AltivecVectorType<T>>(*p);
}

template<size_t N, class T, std::enable_if_t<(sizeof(T) * N <= 8)>* = nullptr>
void StoreVector(T* __restrict__ dest, AltivecVectorType<T> vect) {
  using Bits = typename AltivecTypes<(sizeof(T) * N)>::UnsignedLaneT;
  typedef Bits BitsVectT __attribute__((__vector_size__(16)));
  const Bits bits = reinterpret_cast<BitsVectT>(vect)[0];
  __builtin_memcpy(dest, &bits, sizeof(T) * N);
}

template<size_t N, class T, std::enable_if_t<(sizeof(T) * N == 16)>* = nullptr>
void StoreVector(T* __restrict__ dest, AltivecVectorType<T> vect) {
  using LaneT =
std::decay_t<decltype(std::declval<AltivecVectorType<T>>()[0])>;
  typedef LaneT StoreRawT
    __attribute__((__vector_size__(16), __aligned__(16), __may_alias__));
  StoreRawT* __restrict__ p = reinterpret_cast<StoreRawT*>(dest);
  *p = reinterpret_cast<StoreRawT>(vect);
}

template<size_t N, class FromV>
AltivecVectorType<uint8_t> AltivecTruncateToU8(FromV vect) {
  static_assert(N >= 1, "N >= 1 must be true");

  using FromLaneT = std::decay_t<decltype(std::declval<FromV>()[0])>;
  constexpr size_t sizeOfFromLane = sizeof(FromLaneT);
  static_assert(sizeOfFromLane >= 2, "sizeOfFromLane >= 2 must be true");

  if constexpr(N == 1) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    return reinterpret_cast<__vector unsigned char>(vect);
#else
    return reinterpret_cast<__vector unsigned char>(
      vec_sld(vect, vect, sizeof(FromLaneT) - sizeof(unsigned char)));
#endif
  } else {
    if constexpr(sizeOfFromLane >= 4) {
      return AltivecTruncateToU8<N>(vec_pack(vect, vect));
    } else {
      const __vector unsigned short u16Vect =
        reinterpret_cast<__vector unsigned short>(vect);
      return vec_pack(u16Vect, u16Vect);
    }
  }
}

static int TestsFailedCount = 0;

template<class T>
static constexpr decltype(auto) CharToNumber(T&& val) {
    using DecayT = std::decay_t<T>;
    if constexpr(std::is_same_v<DecayT, char>) {
        if constexpr(std::is_signed_v<char>)
            return static_cast<short>(val);
        else
            return static_cast<unsigned short>(val);
    } else if constexpr(std::is_same_v<DecayT, signed char>) {
        return static_cast<short>(val);
    } else if constexpr(std::is_same_v<DecayT, unsigned char>) {
        return static_cast<unsigned short>(val);
    } else if constexpr(std::is_same_v<DecayT, char16_t>
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
                        || std::is_same_v<DecayT, char8_t>
#endif
        ) {
        return static_cast<uint_least16_t>(val);
    } else if constexpr(std::is_same_v<DecayT, char32_t>) {
        return static_cast<uint_least32_t>(val);
    } else if constexpr(std::is_same_v<DecayT, wchar_t>) {
        if constexpr(std::is_signed_v<wchar_t>)
            return static_cast<std::make_signed_t<wchar_t>>(val);
        else
            return static_cast<std::make_unsigned_t<wchar_t>>(val);
    } else {
        return static_cast<T&&>(val);
    }
}

template<class T, size_t N>
inline void PrintValuesToCout(T (&vals)[N]) {
   using namespace std::string_view_literals;
   for(size_t i = 0; i < N; i++) {
       if(i != 0)
           std::cout << ", "sv;

       std::cout << CharToNumber(vals[i]);
   }
}

template<class FromLaneT, size_t N>
void DoTruncateToU8Test() {
  using namespace std::string_view_literals;
  {
    constexpr uint32_t base = 0xFA578D00;
    alignas(16) FromLaneT srcValues[N];
    alignas(16) uint8_t expectedValues[N];
    alignas(16) uint8_t actualValues[N];
    for(size_t i = 0; i < N; i++) {
      srcValues[i] =
        static_cast<FromLaneT>(base + static_cast<FromLaneT>(i));
      expectedValues[i] =
        static_cast<uint8_t>(base + static_cast<uint8_t>(i));
    }

    auto srcVect =
      LoadVector<N>(srcValues);
    auto resultVect =
      AltivecTruncateToU8<N>(srcVect);

    StoreVector<N>(actualValues, resultVect);

    for(size_t i = 0; i < N; i++) {
      if(expectedValues[i] != actualValues[i]) {
        std::cout << "Test failed for uint"sv <<
          (sizeof(FromLaneT) << 3) <<
          "_t lane type with "sv << N << " lanes\n"sv;
        std::cout << "Expected values:\n "sv;
        PrintValuesToCout(expectedValues);
        std::cout << "\nActual values:\n "sv;
        PrintValuesToCout(actualValues);
        std::cout << '\n';

        ++TestsFailedCount;
        break;
      }
    }
  }

  if constexpr(N >= 2) {
    DoTruncateToU8Test<FromLaneT, N / 2>();
  }
}

int main(int argc, char** argv) {
  using namespace std::string_view_literals;
  DoTruncateToU8Test<uint64_t, 2>();
  DoTruncateToU8Test<uint32_t, 4>();
  DoTruncateToU8Test<uint16_t, 8>();

  const auto failCnt = TestsFailedCount;
  std::cout << failCnt << " tests failed\n"sv;

  return static_cast<int>(failCnt != 0);
}

Here is the expected output of the above program:
0 tests failed

Here is the output that is generated when the above program is compiled with
gcc 12.2.1 with the -mcpu=power10 -O2 options:
Test failed for uint32_t lane type with 1 lanes
Expected values:
 0
Actual values:
 250
Test failed for uint16_t lane type with 1 lanes
Expected values:
 0
Actual values:
 141
2 tests failed

The program above does generate the correct result when compiled with the
-mcpu=power9 -O2 options on the powerpc64-linux-gnu target.

The C++ test program above does generate the correct result if compiled with
clang 15 with the -mcpu=power10 -std=c++17 -O2 options.

[Bug target/109069] New: Vector truncation test program produces incorrect result on big-endian powerpc64-linux-gnu with -mcpu=power10 -O2

Reply via email to