When generating a SUBREG from V16QI to V2HF, validate_subreg fails since the V2HF size (4 bytes) is smaller than its natural size (word size). Update remove_redundant_vector_load to skip if the mode size is smaller than its natural size.
gcc/ PR target/120036 * config/i386/i386-features.cc (remove_redundant_vector_load): Also skip if the mode size is smaller than its natural size. gcc/testsuite/ PR target/120036 * g++.target/i386/pr120036.C: New test. -- H.J.
From 6bfacf6014965d3ec498620dd9951efca9ad6015 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Thu, 1 May 2025 06:30:41 +0800 Subject: [PATCH] x86: Skip if the mode size is smaller than its natural size When generating a SUBREG from V16QI to V2HF, validate_subreg fails since the V2HF size (4 bytes) is smaller than its natural size (word size). Update remove_redundant_vector_load to skip if the mode size is smaller than its natural size. gcc/ PR target/120036 * config/i386/i386-features.cc (remove_redundant_vector_load): Also skip if the mode size is smaller than its natural size. gcc/testsuite/ PR target/120036 * g++.target/i386/pr120036.C: New test. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386-features.cc | 7 +- gcc/testsuite/g++.target/i386/pr120036.C | 113 +++++++++++++++++++++++ 2 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/pr120036.C diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 31f3ee2ef17..8e12ca88f7a 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3395,8 +3395,11 @@ remove_redundant_vector_load (void) rtx dest = SET_DEST (set); machine_mode mode = GET_MODE (dest); - /* Skip non-vector instruction. */ - if (!VECTOR_MODE_P (mode)) + /* Skip non-vector instruction. Also skip if the mode size is + smaller than its natural size to avoid validate_subreg + failure. */ + if (!VECTOR_MODE_P (mode) + || GET_MODE_SIZE (mode) < ix86_regmode_natural_size (mode)) continue; rtx src = SET_SRC (set); diff --git a/gcc/testsuite/g++.target/i386/pr120036.C b/gcc/testsuite/g++.target/i386/pr120036.C new file mode 100644 index 00000000000..a2fc24f1286 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr120036.C @@ -0,0 +1,113 @@ +/* { dg-do compile { target fpic } } */ +/* { dg-options "-O2 -std=c++11 -march=sapphirerapids -fPIC" } */ + +typedef _Float16 Native; +struct float16_t +{ + Native native; + float16_t (); + float16_t (Native arg) : native (arg) {} + operator Native (); + float16_t + operator+ (float16_t rhs) + { + return native + rhs.native; + } + float16_t + operator* (float16_t) + { + return native * native; + } +}; +template <int N> struct Simd +{ + static constexpr int kPrivateLanes = N; +}; +template <int N> struct ClampNAndPow2 +{ + using type = Simd<N>; +}; +template <int kLimit> struct CappedTagChecker +{ + static constexpr int N = sizeof (int) ? kLimit : 0; + using type = typename ClampNAndPow2<N>::type; +}; +template <typename, int kLimit, int> +using CappedTag = typename CappedTagChecker<kLimit>::type; +template <class D> +int +Lanes (D) +{ + return D::kPrivateLanes; +} +template <class D> int Zero (D); +template <class D> using VFromD = decltype (Zero (D ())); +struct Vec512 +{ + __attribute__ ((__vector_size__ (16))) _Float16 raw; +}; +Vec512 Zero (Simd<2>); +template <class D> void ReduceSum (D, VFromD<D>); +struct Dot +{ + template <int, class D, typename T> + static T + Compute (D d, T *pa, int num_elements) + { + T *pb; + int N = Lanes (d), i = 0; + if (__builtin_expect (num_elements < N, 0)) + { + T sum0 = 0, sum1 = 0; + for (; i + 2 <= num_elements; i += 2) + { + float16_t __trans_tmp_6 = pa[i] * pb[i], + __trans_tmp_5 = sum0 + __trans_tmp_6, + __trans_tmp_8 = pa[i + 1] * pb[1], + __trans_tmp_7 = sum1 + __trans_tmp_8; + sum0 = __trans_tmp_5; + sum1 = __trans_tmp_7; + } + float16_t __trans_tmp_9 = sum0 + sum1; + return __trans_tmp_9; + } + decltype (Zero (d)) sum0; + ReduceSum (d, sum0); + __builtin_trap (); + } +}; +template <int kMul, class Test, int kPow2> struct ForeachCappedR +{ + static void + Do (int min_lanes, int max_lanes) + { + CappedTag<int, kMul, kPow2> d; + Test () (int (), d); + ForeachCappedR<kMul / 2, Test, kPow2>::Do (min_lanes, max_lanes); + } +}; +template <class Test, int kPow2> struct ForeachCappedR<0, Test, kPow2> +{ + static void Do (int, int); +}; +struct TestDot +{ + template <class T, class D> + void + operator() (T, D d) + { + int counts[]{ 1, 3 }; + for (int num : counts) + { + float16_t a; + T __trans_tmp_4 = Dot::Compute<0> (d, &a, num); + } + } +}; +int DotTest_TestAllDot_TestTestBody_max_lanes; +void +DotTest_TestAllDot_TestTestBody () +{ + ForeachCappedR<64, TestDot, 0>::Do ( + 1, DotTest_TestAllDot_TestTestBody_max_lanes); +} -- 2.49.0