https://gcc.gnu.org/g:6b368b8abdc3b92900777ca407945c3d1125d089

commit r15-10549-g6b368b8abdc3b92900777ca407945c3d1125d089
Author: Pan Li <[email protected]>
Date:   Thu Nov 20 08:16:10 2025 -0700

    [PATCH v1] RISC-V: Fix missed zero extend for unsigned scalar SAT_TRUNC 
[PR122692]
    
    When the input of the scalar unsigned SAT_TRUNC is not Xmode,
    the rtx need to zero extend to Xmode before the underlying
    code gen.  Most of other SAT_* code gen has leveraged
    the API riscv_extend_to_xmode_reg but still have the ustrunc
    missed.  Then results in the failures mentioned in PR.
    
    The below test suites are passed for this patch series.
    * The rv64gcv fully regression test.
    
            PR target/122692
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.cc (riscv_expand_ustrunc): Leverage
            riscv_extend_to_xmode_reg to take care of src rtx.
    
    gcc/testsuite/ChangeLog:
    
            * g++.target/riscv/pr122692-run-1.C: New test.
            * g++.target/riscv/pr122692-run-2.C: New test.
    
    Signed-off-by: Pan Li <[email protected]>
    (cherry picked from commit 8615c855fa5647db11c84b882f0d88aadfc28863)

Diff:
---
 gcc/config/riscv/riscv.cc                       |   2 +-
 gcc/testsuite/g++.target/riscv/pr122692-run-1.C | 116 +++++++++++++++
 gcc/testsuite/g++.target/riscv/pr122692-run-2.C | 178 ++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index dc5c72097e82..d3520af1cddc 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -13017,7 +13017,7 @@ riscv_expand_ustrunc (rtx dest, rtx src)
   gcc_assert (precision < 64);
 
   uint64_t max = ((uint64_t)1u << precision) - 1u;
-  rtx xmode_src = gen_lowpart (Xmode, src);
+  rtx xmode_src = riscv_extend_to_xmode_reg (src, GET_MODE (src), ZERO_EXTEND);
   rtx xmode_dest = gen_reg_rtx (Xmode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
 
diff --git a/gcc/testsuite/g++.target/riscv/pr122692-run-1.C 
b/gcc/testsuite/g++.target/riscv/pr122692-run-1.C
new file mode 100644
index 000000000000..52def7fea2a3
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/pr122692-run-1.C
@@ -0,0 +1,116 @@
+/* { dg-do run { target { rv32 || rv64 } } } */
+/* { dg-options "-O2" } */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <array>
+#include <limits>
+#include <memory>
+#include <new>
+#include <type_traits>
+
+#define HWY_INLINE inline __attribute__((__always_inline__))
+#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#if defined(__GNUC__) && !defined(__clang__)
+#define NOIPA_ATTR __attribute__((__noipa__))
+#else
+#define NOIPA_ATTR
+#endif
+
+namespace test {
+
+static __attribute__((__noinline__)) NOIPA_ATTR int Unpredictable1() {
+  int result = 1;
+  __asm__("" : "+r"(result)::);
+  return result;
+}
+
+class RandomState {
+ public:
+  explicit RandomState(
+      const uint64_t seed = uint64_t{0x123456789} *
+                            static_cast<uint64_t>(test::Unpredictable1())) {
+    s0_ = SplitMix64(seed + 0x9E3779B97F4A7C15ull);
+    s1_ = SplitMix64(s0_);
+  }
+
+  HWY_INLINE uint64_t operator()() {
+    uint64_t s1 = s0_;
+    const uint64_t s0 = s1_;
+    const uint64_t bits = s1 + s0;
+    s0_ = s0;
+    s1 ^= s1 << 23;
+    s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+    s1_ = s1;
+    return bits;
+  }
+
+ private:
+  static uint64_t SplitMix64(uint64_t z) {
+    z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
+    z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
+    return z ^ (z >> 31);
+  }
+
+  uint64_t s0_;
+  uint64_t s1_;
+};
+
+static __attribute__((__noinline__)) NOIPA_ATTR void GenerateRandomU16x16Vals(
+    RandomState& rng, uint16_t* const from, uint8_t* const expected) {
+  using T = uint16_t;
+  using TN = uint8_t;
+
+  const T min = 0;
+  const T max = static_cast<T>(std::numeric_limits<TN>::max());
+
+  for (size_t i = 0; i < 16; ++i) {
+    const uint64_t bits = rng();
+    __builtin_memcpy(&from[i], &bits, sizeof(T));  // not same size
+    expected[i] = static_cast<TN>(HWY_MIN(HWY_MAX(min, from[i]), max));
+  }
+}
+
+static __attribute__((__noinline__)) NOIPA_ATTR void DoVerifyU16x16Demote(
+    const uint16_t* const from, const uint8_t* const expected) {
+  for (int i = 0; i < 16; ++i) {
+    const uint8_t actual =
+        static_cast<uint8_t>((from[i] < 0xFF) ? from[i] : 0xFF);
+    if (expected[i] != actual) {
+      fprintf(stderr,
+              "Mismatch between expected result and actual result\nfrom=%u, "
+              "expected=%u, actual=%u\n",
+              static_cast<unsigned>(from[i]),
+              static_cast<unsigned>(expected[i]),
+              static_cast<unsigned>(actual));
+      __builtin_abort();
+    }
+  }
+}
+
+static void DoDemoteU16x16ToU8x16Test() {
+  using T = uint16_t;
+  using TN = uint8_t;
+  std::array<T, 16> from;
+  std::array<TN, 16> expected;
+
+  RandomState rng;
+  for (size_t rep = 0; rep < 1000; ++rep) {
+    GenerateRandomU16x16Vals(rng, from.data(), expected.data());
+    DoVerifyU16x16Demote(from.data(), expected.data());
+  }
+}
+
+}  // namespace test
+
+int main(int /*argc*/, char** /*argv*/) {
+  printf("Doing DoDemoteU16x16ToU8x16Test\n");
+  test::DoDemoteU16x16ToU8x16Test();
+  printf("Test completed successfully\n");
+  return 0;
+}
diff --git a/gcc/testsuite/g++.target/riscv/pr122692-run-2.C 
b/gcc/testsuite/g++.target/riscv/pr122692-run-2.C
new file mode 100644
index 000000000000..fd50e3238bf7
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/pr122692-run-2.C
@@ -0,0 +1,178 @@
+/* { dg-do run { target { rv32 || rv64 } } } */
+/* { dg-options "-O2" } */
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <array>
+#include <limits>
+#include <memory>
+#include <new>
+#include <type_traits>
+
+#define HWY_INLINE inline __attribute__((__always_inline__))
+#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#if defined(__GNUC__) && !defined(__clang__)
+#define NOIPA_ATTR __attribute__((__noipa__))
+#else
+#define NOIPA_ATTR
+#endif
+
+namespace test {
+
+static __attribute__((__noinline__)) NOIPA_ATTR int Unpredictable1() {
+  int result = 1;
+  __asm__("" : "+r"(result)::);
+  return result;
+}
+
+class RandomState {
+ public:
+  explicit RandomState(
+      const uint64_t seed = uint64_t{0x123456789} *
+                            static_cast<uint64_t>(test::Unpredictable1())) {
+    s0_ = SplitMix64(seed + 0x9E3779B97F4A7C15ull);
+    s1_ = SplitMix64(s0_);
+  }
+
+  HWY_INLINE uint64_t operator()() {
+    uint64_t s1 = s0_;
+    const uint64_t s0 = s1_;
+    const uint64_t bits = s1 + s0;
+    s0_ = s0;
+    s1 ^= s1 << 23;
+    s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+    s1_ = s1;
+    return bits;
+  }
+
+ private:
+  static uint64_t SplitMix64(uint64_t z) {
+    z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
+    z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
+    return z ^ (z >> 31);
+  }
+
+  uint64_t s0_;
+  uint64_t s1_;
+};
+
+template <class T>
+static __attribute__((noinline)) NOIPA_ATTR T* MallocArray(
+    size_t num_to_alloc) {
+  static_assert(sizeof(T) > 0, "sizeof(T) > 0 must be true");
+  constexpr size_t kMaxNumToAlloc =
+      std::numeric_limits<size_t>::max() / sizeof(T);
+  if (num_to_alloc > kMaxNumToAlloc) {
+    return nullptr;
+  }
+
+  return reinterpret_cast<T*>(::malloc(num_to_alloc * sizeof(T)));
+}
+
+struct CFreeDeleter {
+  HWY_INLINE void operator()(const volatile void* ptr) const noexcept {
+    if (ptr) {
+      ::free(const_cast<void*>(ptr));
+    }
+  }
+};
+
+#define HWY_ASSERT(cond)                                              \
+  do {                                                                \
+    if (__builtin_expect(!(cond), false)) {                           \
+      fprintf(stderr, "Assertion failed at line %d of file %s: %s\n", \
+              static_cast<int>(__LINE__), __FILE__, "" #cond);        \
+      fflush(stderr);                                                 \
+      __builtin_abort();                                              \
+    }                                                                 \
+  } while (false)
+
+static __attribute__((__noinline__)) NOIPA_ATTR void AssertU8x16ArrayEquals(
+    std::array<uint8_t, 16> expected, std::array<uint8_t, 16> actual,
+    const int line, const char* filename) {
+  for (size_t i = 0; i < 16; i++) {
+    if (expected[i] != actual[i]) {
+      fprintf(stderr, "Array mismatch at line %d of file %s:\n", line,
+              filename);
+      fprintf(stderr,
+              "Expected: {%" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8
+              ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8
+              ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8
+              ", %" PRIu8 ", %" PRIu8 "}\n",
+              expected[0], expected[1], expected[2], expected[3], expected[4],
+              expected[5], expected[6], expected[7], expected[8], expected[9],
+              expected[10], expected[11], expected[12], expected[13],
+              expected[14], expected[15]);
+      fprintf(stderr,
+              "Actual:   {%" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8
+              ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8
+              ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8 ", %" PRIu8
+              ", %" PRIu8 ", %" PRIu8 "}\n",
+              actual[0], actual[1], actual[2], actual[3], actual[4], actual[5],
+              actual[6], actual[7], actual[8], actual[9], actual[10],
+              actual[11], actual[12], actual[13], actual[14], actual[15]);
+      __builtin_abort();
+    }
+  }
+}
+
+#define ASSERT_U8X16_ARR_EQUALS(expected, actual) \
+  AssertU8x16ArrayEquals(expected, actual, __LINE__, __FILE__)
+
+static std::array<uint8_t, 16> LoadU8x16Vec(const uint8_t* __restrict ptr) {
+  std::array<uint8_t, 16> result;
+  __builtin_memcpy(&result, ptr, 16 * sizeof(uint8_t));
+  return result;
+}
+
+static std::array<uint16_t, 8> LoadU16x8Vec(const uint16_t* __restrict ptr) {
+  std::array<uint16_t, 8> result;
+  __builtin_memcpy(&result, ptr, 8 * sizeof(uint16_t));
+  return result;
+}
+
+static void DoOrderedDemote2U16x8ToU8x16Test() {
+  using T = uint16_t;
+  using TN = uint8_t;
+  std::unique_ptr<T[], CFreeDeleter> from(MallocArray<T>(16));
+  std::unique_ptr<TN[], CFreeDeleter> expected(MallocArray<TN>(16));
+  HWY_ASSERT(from && expected);
+
+  constexpr size_t N = 8;
+  constexpr size_t twiceN = 16;
+
+  // Narrower range in the wider type, for clamping before we cast
+  const T min = static_cast<T>(
+      std::is_signed_v<T> ? std::numeric_limits<TN>::lowest() : TN{0});
+  const T max = std::numeric_limits<TN>::max();
+
+  RandomState rng;
+  for (size_t rep = 0; rep < 1000; ++rep) {
+    for (size_t i = 0; i < twiceN; ++i) {
+      const uint64_t bits = rng();
+      __builtin_memcpy(&from[i], &bits, sizeof(T));  // not same size
+      expected[i] = static_cast<TN>(HWY_MIN(HWY_MAX(min, from[i]), max));
+    }
+
+    std::array<uint8_t, 16> actual;
+    for (size_t i = 0; i < 16; i++) {
+      actual[i] = static_cast<uint8_t>(HWY_MIN(from[i], 0xFF));
+    }
+    ASSERT_U8X16_ARR_EQUALS(LoadU8x16Vec(expected.get()), actual);
+  }
+}
+
+}  // namespace test
+
+int main(int /*argc*/, char** /*argv*/) {
+  printf("Doing DoOrderedDemote2U16x8ToU8x16Test\n");
+  test::DoOrderedDemote2U16x8ToU8x16Test();
+  printf("Test completed successfully\n");
+  return 0;
+}

Reply via email to