https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112824

--- Comment #3 from Chris Elrod <elrodc at gmail dot com> ---
> I thought I hit the important cases, but my non-minimal example still gets 
> unnecessary register splits and stack spills, so maybe I missed places, or 
> perhaps there's another issue.

Adding the unroll pragma to the `Vector`'s operator + and *:

template<typename T, ptrdiff_t N>
[[gnu::always_inline]] constexpr auto operator+(Vector<T,N> x, Vector<T,N> y)
-> Vector<T,N> {
    Vector<T,N> z;
#pragma GCC unroll 16
    for (ptrdiff_t n = 0; n < Vector<T,N>::L; ++n) z.data[n] = x.data[n] +
y.data[n];
    return z;
}
template<typename T, ptrdiff_t N>
[[gnu::always_inline]] constexpr auto operator*(Vector<T,N> x, Vector<T,N> y)
-> Vector<T,N> {
    Vector<T,N> z;
#pragma GCC unroll 16
    for (ptrdiff_t n = 0; n < Vector<T,N>::L; ++n) z.data[n] = x.data[n] *
y.data[n];
    return z;
}
template<typename T, ptrdiff_t N>
[[gnu::always_inline]] constexpr auto operator+(T x, Vector<T,N> y) ->
Vector<T,N> {
    Vector<T,N> z;
#pragma GCC unroll 16
    for (ptrdiff_t n = 0; n < Vector<T,N>::L; ++n) z.data[n] = x + y.data[n];
    return z;
}
template<typename T, ptrdiff_t N>
[[gnu::always_inline]] constexpr auto operator*(T x, Vector<T,N> y) ->
Vector<T,N> {
    Vector<T,N> z;
#pragma GCC unroll 16
    for (ptrdiff_t n = 0; n < Vector<T,N>::L; ++n) z.data[n] = x * y.data[n];
    return z;
}


does not improve code generation (still get the same problem), so that's a
reproducer for such an issue.

Reply via email to