https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120036

            Bug ID: 120036
           Summary: [16 Regression] ICE on highway-1.2.0: during RTL pass:
                    rrvl: in gen_rtx_SUBREG, at emit-rtl.cc:1032
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: slyfox at gcc dot gnu.org
  Target Milestone: ---

Observed the error today on gcc-master against r16-303-g1a13684dfc7286 (did not
bisect yet).

Mildly reduced example:

// $ cat dot_test.cc.cc
#pragma GCC target "sse2,ssse3"                                               
\
                   ",avx512f,avx512cd,avx512vl,avx512dq,avx512bw"             
\
                   ",avx512fp16"
typedef _Float16 Native;
    struct float16_t {
     Native native;
     float16_t();
     float16_t(Native arg) : native(arg) {
 }
     operator Native();
     float16_t operator+(float16_t rhs) {
  return native + rhs.native;
  }
     float16_t operator*(float16_t) {
  return native * native;
  }
   };
    template < int N > struct Simd {
     static constexpr int kPrivateLanes = N;
   };
    template < int N > struct ClampNAndPow2 {
     using type = Simd< N >;
   };
    template < int kLimit > struct CappedTagChecker {
     static constexpr int N = sizeof(int) ? kLimit : 0;
     using type = typename ClampNAndPow2< N >::type;
   };
    template < typename, int kLimit, int > using CappedTag = typename
CappedTagChecker< kLimit >::type;
    template < class D > int Lanes(D) {
   return D::kPrivateLanes;
   }
    template < class D > int Zero(D);
    template < class D > using VFromD = decltype(Zero(D()));
    struct Vec512 {
     __attribute__((__vector_size__(16))) _Float16 raw;
   };
    Vec512 Zero(Simd< 2 >);
    template < class D > void ReduceSum(D, VFromD< D >);
    struct Dot {
     template < int, class D, typename T >   static T Compute(D d, T *pa, int
num_elements) {
      T *pb;
      int N = Lanes(d), i = 0;
      if (__builtin_expect(num_elements < N, 0)) {
       T sum0 = 0, sum1 = 0;
       for (;
 i + 2 <= num_elements;
 i += 2) {         float16_t __trans_tmp_6 = pa[i] * pb[i],                  
__trans_tmp_5 = sum0 + __trans_tmp_6,                   __trans_tmp_8 = pa[i +
1] * pb[1],                   __trans_tmp_7 = sum1 + __trans_tmp_8;        
sum0 = __trans_tmp_5;         sum1 = __trans_tmp_7;       }
       float16_t __trans_tmp_9 = sum0 + sum1;
       return __trans_tmp_9;
     }
      decltype(Zero(d)) sum0;
      ReduceSum(d, sum0);
      __builtin_trap();
    }
   };
    template < int kMul, class Test, int kPow2 > struct ForeachCappedR {
     static void Do(int min_lanes, int max_lanes) {
      CappedTag< int, kMul, kPow2 > d;
      Test()(int(), d);
      ForeachCappedR< kMul / 2, Test, kPow2 >::Do(min_lanes, max_lanes);
    }
   };
    template < class Test, int kPow2 > struct ForeachCappedR< 0, Test, kPow2 >
{
     static void Do(int, int);
   };
    struct TestDot {
     template < class T, class D > void operator()(T, D d) {
      int counts[]{
1, 3};
      for (int num : counts) {
       float16_t a;
       T __trans_tmp_4 = Dot::Compute< 0 >(d, &a, num);
     }
    }
   };
    int DotTest_TestAllDot_TestTestBody_max_lanes;
    void DotTest_TestAllDot_TestTestBody() {
        ForeachCappedR< 64, TestDot, 0 >::Do(1,
DotTest_TestAllDot_TestTestBody_max_lanes);
   }

Crashing:

$ gcc/xg++ -Bgcc -c dot_test.cc.cc -o bug.o -fPIC -O2
-fzero-call-used-regs=used-gpr
during RTL pass: rrvl
dot_test.cc.cc: In function ‘void DotTest_TestAllDot_TestTestBody()’:
dot_test.cc.cc:79:4: internal compiler error: in gen_rtx_SUBREG, at
emit-rtl.cc:1032
   79 |    }
      |    ^
0x2949cb5 diagnostic_context::diagnostic_impl(rich_location*,
diagnostic_metadata const*, diagnostic_option_id, char const*, __va_list_tag
(*) [1], diagnostic_t)
        ???:0
0x295cf75 internal_error(char const*, ...)
        ???:0
0x294591c fancy_abort(char const*, int, char const*)
        ???:0
0xdfe093 gen_rtx_SUBREG(machine_mode, rtx_def*, poly_int<1u, unsigned long>)
        ???:0
0x18de430 replace_vector_const(machine_mode, rtx_def*, auto_bitmap&)
        ???:0
0x18e0c22 (anonymous
namespace)::pass_remove_redundant_vector_load::execute(function*)
        ???:0
Please submit a full bug report, with preprocessed source (by using
-freport-bug).
Please include the complete backtrace with any bug report.
See <https://gcc.gnu.org/bugs/> for instructions.

gcc is built as:

$ gcc/xg++ -Bgcc  -v
Reading specs from gcc/specs
COLLECT_GCC=gcc/xg++
COLLECT_LTO_WRAPPER=gcc/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: /home/slyfox/dev/git/gcc/configure --disable-multilib
--disable-bootstrap --disable-lto --disable-libsanitizer
--disable-libstdcxx-pch --enable-languages=c,c++ --disable-libgomp
--disable-libquadmath --disable-libvtv CFLAGS='-O1 -g0' CXXFLAGS='-O1 -g0'
LDFLAGS='-O1 -g0'
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 16.0.0 20250430 (experimental) (GCC)

Reply via email to