https://gcc.gnu.org/g:8615c855fa5647db11c84b882f0d88aadfc28863
commit r16-5455-g8615c855fa5647db11c84b882f0d88aadfc28863 Author: Pan Li <[email protected]> Date: Thu Nov 20 08:16:10 2025 -0700 [PATCH v1] RISC-V: Fix missed zero extend for unsigned scalar SAT_TRUNC [PR122692] When the input of the scalar unsigned SAT_TRUNC is not Xmode, the rtx need to zero extend to Xmode before the underlying code gen. Most of other SAT_* code gen has leveraged the API riscv_extend_to_xmode_reg but still have the ustrunc missed. Then results in the failures mentioned in PR. The below test suites are passed for this patch series. * The rv64gcv fully regression test. PR target/122692 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_expand_ustrunc): Leverage riscv_extend_to_xmode_reg to take care of src rtx. gcc/testsuite/ChangeLog: * g++.target/riscv/pr122692-run-1.C: New test. * g++.target/riscv/pr122692-run-2.C: New test. Signed-off-by: Pan Li <[email protected]> Diff: --- gcc/config/riscv/riscv.cc | 2 +- gcc/testsuite/g++.target/riscv/pr122692-run-1.C | 116 +++++++++++++++ gcc/testsuite/g++.target/riscv/pr122692-run-2.C | 178 ++++++++++++++++++++++++ 3 files changed, 295 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 844a008880bb..2d14b3c92f57 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -14547,7 +14547,7 @@ riscv_expand_ustrunc (rtx dest, rtx src) gcc_assert (precision < 64); uint64_t max = ((uint64_t)1u << precision) - 1u; - rtx xmode_src = gen_lowpart (Xmode, src); + rtx xmode_src = riscv_extend_to_xmode_reg (src, GET_MODE (src), ZERO_EXTEND); rtx xmode_dest = gen_reg_rtx (Xmode); rtx xmode_lt = gen_reg_rtx (Xmode); diff --git a/gcc/testsuite/g++.target/riscv/pr122692-run-1.C b/gcc/testsuite/g++.target/riscv/pr122692-run-1.C new file mode 100644 index 000000000000..52def7fea2a3 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/pr122692-run-1.C @@ -0,0 +1,116 @@ +/* { dg-do run { target { rv32 || rv64 } } } */ +/* { dg-options "-O2" } */ + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include <array> +#include <limits> +#include <memory> +#include <new> +#include <type_traits> + +#define HWY_INLINE inline __attribute__((__always_inline__)) +#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b)) +#define HWY_MAX(a, b) ((a) > (b) ? (a) : (b)) + +#if defined(__GNUC__) && !defined(__clang__) +#define NOIPA_ATTR __attribute__((__noipa__)) +#else +#define NOIPA_ATTR +#endif + +namespace test { + +static __attribute__((__noinline__)) NOIPA_ATTR int Unpredictable1() { + int result = 1; + __asm__("" : "+r"(result)::); + return result; +} + +class RandomState { + public: + explicit RandomState( + const uint64_t seed = uint64_t{0x123456789} * + static_cast<uint64_t>(test::Unpredictable1())) { + s0_ = SplitMix64(seed + 0x9E3779B97F4A7C15ull); + s1_ = SplitMix64(s0_); + } + + HWY_INLINE uint64_t operator()() { + uint64_t s1 = s0_; + const uint64_t s0 = s1_; + const uint64_t bits = s1 + s0; + s0_ = s0; + s1 ^= s1 << 23; + s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); + s1_ = s1; + return bits; + } + + private: + static uint64_t SplitMix64(uint64_t z) { + z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull; + z = (z ^ (z >> 27)) * 0x94D049BB133111EBull; + return z ^ (z >> 31); + } + + uint64_t s0_; + uint64_t s1_; +}; + +static __attribute__((__noinline__)) NOIPA_ATTR void GenerateRandomU16x16Vals( + RandomState& rng, uint16_t* const from, uint8_t* const expected) { + using T = uint16_t; + using TN = uint8_t; + + const T min = 0; + const T max = static_cast<T>(std::numeric_limits<TN>::max()); + + for (size_t i = 0; i < 16; ++i) { + const uint64_t bits = rng(); + __builtin_memcpy(&from[i], &bits, sizeof(T)); // not same size + expected[i] = static_cast<TN>(HWY_MIN(HWY_MAX(min, from[i]), max)); + } +} + +static __attribute__((__noinline__)) NOIPA_ATTR void DoVerifyU16x16Demote( + const uint16_t* const from, const uint8_t* const expected) { + for (int i = 0; i < 16; ++i) { + const uint8_t actual = + static_cast<uint8_t>((from[i] < 0xFF) ? from[i] : 0xFF); + if (expected[i] != actual) { + fprintf(stderr, + "Mismatch between expected result and actual result\nfrom=%u, " + "expected=%u, actual=%u\n", + static_cast<unsigned>(from[i]), + static_cast<unsigned>(expected[i]), + static_cast<unsigned>(actual)); + __builtin_abort(); + } + } +} + +static void DoDemoteU16x16ToU8x16Test() { + using T = uint16_t; + using TN = uint8_t; + std::array<T, 16> from; + std::array<TN, 16> expected; + + RandomState rng; + for (size_t rep = 0; rep < 1000; ++rep) { + GenerateRandomU16x16Vals(rng, from.data(), expected.data()); + DoVerifyU16x16Demote(from.data(), expected.data()); + } +} + +} // namespace test + +int main(int /*argc*/, char** /*argv*/) { + printf("Doing DoDemoteU16x16ToU8x16Test\n"); + test::DoDemoteU16x16ToU8x16Test(); + printf("Test completed successfully\n"); + return 0; +} diff --git a/gcc/testsuite/g++.target/riscv/pr122692-run-2.C b/gcc/testsuite/g++.target/riscv/pr122692-run-2.C new file mode 100644 index 000000000000..fd50e3238bf7 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/pr122692-run-2.C @@ -0,0 +1,178 @@ +/* { dg-do run { target { rv32 || rv64 } } } */ +/* { dg-options "-O2" } */ + +#include <inttypes.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include <array> +#include <limits> +#include <memory> +#include <new> +#include <type_traits> + +#define HWY_INLINE inline __attribute__((__always_inline__)) +#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b)) +#define HWY_MAX(a, b) ((a) > (b) ? (a) : (b)) + +#if defined(__GNUC__) && !defined(__clang__) +#define NOIPA_ATTR __attribute__((__noipa__)) +#else +#define NOIPA_ATTR +#endif + +namespace test { + +static __attribute__((__noinline__)) NOIPA_ATTR int Unpredictable1() { + int result = 1; + __asm__("" : "+r"(result)::); + return result; +} + +class RandomState { + public: + explicit RandomState( + const uint64_t seed = uint64_t{0x123456789} * + static_cast<uint64_t>(test::Unpredictable1())) { + s0_ = SplitMix64(seed + 0x9E3779B97F4A7C15ull); + s1_ = SplitMix64(s0_); + } + + HWY_INLINE uint64_t operator()() { + uint64_t s1 = s0_; + const uint64_t s0 = s1_; + const uint64_t bits = s1 + s0; + s0_ = s0; + s1 ^= s1 << 23; + s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); + s1_ = s1; + return bits; + } + + private: + static uint64_t SplitMix64(uint64_t z) { + z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull; + z = (z ^ (z >> 27)) * 0x94D049BB133111EBull; + return z ^ (z >> 31); + } + + uint64_t s0_; + uint64_t s1_; +}; + +template <class T> +static __attribute__((noinline)) NOIPA_ATTR T* MallocArray( + size_t num_to_alloc) { + static_assert(sizeof(T) > 0, "sizeof(T) > 0 must be true"); + constexpr size_t kMaxNumToAlloc = + std::numeric_limits<size_t>::max() / sizeof(T); + if (num_to_alloc > kMaxNumToAlloc) { + return nullptr; + } + + return reinterpret_cast<T*>(::malloc(num_to_alloc * sizeof(T))); +} + +struct CFreeDeleter { + HWY_INLINE void operator()(const volatile void* ptr) const noexcept { + if (ptr) { + ::free(const_cast<void*>(ptr)); + } + } +}; + +#define HWY_ASSERT(cond) \ + do { \ + if (__builtin_expect(!(cond), false)) { \ + fprintf(stderr, "Assertion failed at line %d of file %s: %s\n", \ + static_cast<int>(__LINE__), __FILE__, "" #cond); \ + fflush(stderr); \ + __builtin_abort(); \ + } \ + } while (false) + +static __attribute__((__noinline__)) NOIPA_ATTR void AssertU8x16ArrayEquals( + std::array<uint8_t, 16> expected, std::array<uint8_t, 16> actual, + const int line, const char* filename) { + for (size_t i = 0; i < 16; i++) { + if (expected[i] != actual[i]) { + fprintf(stderr, "Array mismatch at line %d of file %s:\n", line, + filename); + fprintf(stderr, + "Expected: {%" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 + ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 + ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 + ", %" PRIu8 ", %" PRIu8 "}\n", + expected[0], expected[1], expected[2], expected[3], expected[4], + expected[5], expected[6], expected[7], expected[8], expected[9], + expected[10], expected[11], expected[12], expected[13], + expected[14], expected[15]); + fprintf(stderr, + "Actual: {%" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 + ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 + ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 + ", %" PRIu8 ", %" PRIu8 "}\n", + actual[0], actual[1], actual[2], actual[3], actual[4], actual[5], + actual[6], actual[7], actual[8], actual[9], actual[10], + actual[11], actual[12], actual[13], actual[14], actual[15]); + __builtin_abort(); + } + } +} + +#define ASSERT_U8X16_ARR_EQUALS(expected, actual) \ + AssertU8x16ArrayEquals(expected, actual, __LINE__, __FILE__) + +static std::array<uint8_t, 16> LoadU8x16Vec(const uint8_t* __restrict ptr) { + std::array<uint8_t, 16> result; + __builtin_memcpy(&result, ptr, 16 * sizeof(uint8_t)); + return result; +} + +static std::array<uint16_t, 8> LoadU16x8Vec(const uint16_t* __restrict ptr) { + std::array<uint16_t, 8> result; + __builtin_memcpy(&result, ptr, 8 * sizeof(uint16_t)); + return result; +} + +static void DoOrderedDemote2U16x8ToU8x16Test() { + using T = uint16_t; + using TN = uint8_t; + std::unique_ptr<T[], CFreeDeleter> from(MallocArray<T>(16)); + std::unique_ptr<TN[], CFreeDeleter> expected(MallocArray<TN>(16)); + HWY_ASSERT(from && expected); + + constexpr size_t N = 8; + constexpr size_t twiceN = 16; + + // Narrower range in the wider type, for clamping before we cast + const T min = static_cast<T>( + std::is_signed_v<T> ? std::numeric_limits<TN>::lowest() : TN{0}); + const T max = std::numeric_limits<TN>::max(); + + RandomState rng; + for (size_t rep = 0; rep < 1000; ++rep) { + for (size_t i = 0; i < twiceN; ++i) { + const uint64_t bits = rng(); + __builtin_memcpy(&from[i], &bits, sizeof(T)); // not same size + expected[i] = static_cast<TN>(HWY_MIN(HWY_MAX(min, from[i]), max)); + } + + std::array<uint8_t, 16> actual; + for (size_t i = 0; i < 16; i++) { + actual[i] = static_cast<uint8_t>(HWY_MIN(from[i], 0xFF)); + } + ASSERT_U8X16_ARR_EQUALS(LoadU8x16Vec(expected.get()), actual); + } +} + +} // namespace test + +int main(int /*argc*/, char** /*argv*/) { + printf("Doing DoOrderedDemote2U16x8ToU8x16Test\n"); + test::DoOrderedDemote2U16x8ToU8x16Test(); + printf("Test completed successfully\n"); + return 0; +}
