https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102059
--- Comment #4 from Martin Liška <marxin at gcc dot gnu.org> --- Reduced test-case: $ cat /tmp/basic_op.ii enum { Unaligned, Aligned }; enum { ColMajor }; enum { ReadOnlyAccessors, DefaultProduct }; template <typename> struct traits; struct accessors_level { enum { has_direct_access, has_write_access }; }; template <typename, int _Rows, int _Cols, int, int = _Rows, int = _Cols> class Matrix; template <typename> class MatrixBase; template <typename> class NoAlias; template <typename, typename, int = DefaultProduct> class Product; template <typename, int = accessors_level::has_write_access> class MapBase; template <typename, int, typename = int> class Map; template <typename Derived> struct dense_xpr_base { typedef MatrixBase<Derived> type; }; template <typename> struct assign_op {}; struct DenseBase { typedef int Scalar; enum { Flags }; }; template <typename Derived> struct MatrixBase : DenseBase { NoAlias<Derived> noalias(); }; template <int> class ProductImpl; template <typename _Lhs, typename _Rhs, int Option> struct Product : ProductImpl<Option> { _Lhs lhs(); _Rhs rhs(); }; template <int> struct ProductImpl : dense_xpr_base<int>::type {}; template <typename, typename, typename> struct Assignment; template <typename Dst, typename Src, typename Func> void call_assignment_no_alias(Dst dst, Src src, Func func) { Assignment<Dst, Src, Func>::run(dst, src, func); } template <typename, typename, bool, typename, bool, int, int> struct general_matrix_matrix_product; template <typename, typename, int> class blas_data_mapper; template <typename Scalar, typename Index> struct blas_data_mapper<Scalar, Index, 1> { blas_data_mapper(Scalar *, Index, Index); }; template <typename ExpressionType> struct NoAlias { template <typename OtherDerived> void operator=(OtherDerived) { Product<Map<Matrix<float, -1, -1, 2>, 1>, Map<Matrix<float, -1, -1, 2>, 1>> __trans_tmp_4; call_assignment_no_alias(m_expression, __trans_tmp_4, assign_op<typename OtherDerived::Scalar>()); } ExpressionType m_expression; }; template <typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>> { typedef _Scalar Scalar; }; template <typename Derived> struct MapBase<Derived, ReadOnlyAccessors> : dense_xpr_base<Derived>::type { typedef typename traits<Derived>::Scalar *PointerType; }; template <typename Derived> struct MapBase<Derived> : MapBase<Derived, ReadOnlyAccessors> {}; template <typename PlainObjectType, int MapOptions, typename StrideType> struct traits<Map<PlainObjectType, MapOptions, StrideType>> : traits<PlainObjectType> {}; template <typename, int MapOptions, typename> struct Map : MapBase<Map<Matrix<float, -1, -1, 2>, MapOptions>> { typedef Map Base; Map(typename Base::PointerType, long, long); }; struct gebp_traits { enum { nr, mr }; }; template <typename, typename, typename, int, int, bool, bool> struct gebp_kernel; long parallelize_gemm_cols; template <int, typename Functor, typename Index> void parallelize_gemm(Functor func, Index, bool) { func(0, parallelize_gemm_cols); } template <typename, typename> struct generic_product_impl; template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, assign_op<Scalar>> { static void run(DstXprType dst, Product<Lhs, Rhs> src, assign_op<Scalar>) { Map __trans_tmp_5 = src.rhs(); generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), __trans_tmp_5); } }; template <typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int ResInnerStride> struct general_matrix_matrix_product<Index, LhsScalar, ConjugateLhs, RhsScalar, ConjugateRhs, ColMajor, ResInnerStride> { typedef LhsScalar ResScalar; static void run(ResScalar *_res, ResScalar alpha) { Index resStride, resIncr, actual_mc, actual_kc, actual_nc; typedef blas_data_mapper<ResScalar, Index, ResInnerStride> ResMapper; ResMapper res(_res, resStride, resIncr); gebp_kernel<RhsScalar, Index, ResMapper, gebp_traits::mr, gebp_traits::nr, ConjugateLhs, ConjugateRhs> gebp; LhsScalar blockA, blockB; gebp(res, &blockA, &blockB, actual_mc, actual_kc, actual_nc, alpha); } }; struct gemm_blocking_space; struct gemm_functor { gemm_functor(Map<Matrix<float, -1, -1, 2>, 1>, Map<Matrix<float, -1, -1, 2>, 1>, Map<Matrix<float, -1, -1, 2>, 1>, float, gemm_blocking_space); void operator()(int, int) { general_matrix_matrix_product<long, float, false, float, false, 0, 1>::run( 0, m_actualAlpha); } float m_actualAlpha; }; struct gemm_blocking_space { gemm_blocking_space(long, long, long, long, bool); }; int scaleAndAddTo___trans_tmp_3; template <typename Lhs, typename Rhs> struct generic_product_impl { template <typename Dst> static void evalTo(Dst dst, Lhs lhs, Rhs rhs) { scaleAndAddTo(dst, lhs, rhs); } template <typename Dest> static void scaleAndAddTo(Dest dst, Lhs a_lhs, Rhs a_rhs) { Map lhs(a_lhs); Map rhs(a_rhs); typedef gemm_functor GemmFunctor; gemm_blocking_space blocking(0, 0, 0, 1, true); parallelize_gemm<0>(GemmFunctor(lhs, rhs, dst, 0, blocking), scaleAndAddTo___trans_tmp_3, Dest::Flags); } }; template <typename Packet> Packet bmask(); #pragma GCC target "cpu=power10" template <typename, typename, typename Packet, typename, typename DataMapper, int, int> void gemmMMA(const DataMapper &, const float *, const float *, long, long, long, float, long, long, long, long) { bmask<Packet>(); } #pragma GCC reset_options struct quad_traits { enum { size, rows }; }; template <typename Packet> __attribute__((always_inline)) Packet bmask() {} template <typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs> struct gebp_kernel<float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs> { void operator()(const DataMapper &, const float *, const float *, Index, Index, Index, float, Index = 1, Index = 1, Index = 0, Index = 0); }; template <typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs> void gebp_kernel<float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>::operator()(const DataMapper &, const float *, const float *, Index, Index, Index, float, Index, Index, Index, Index) { void (*gemm_function)(const DataMapper &, const float *, const float *, Index, Index, Index, float, Index, Index, Index, Index) = gemmMMA<float, Index, int, int, DataMapper, quad_traits::rows, quad_traits::size>; } template <class> struct Data_ { float &operator[](long); Data_ *MatrixOp(); }; Product<int, int> __trans_tmp_7; template <class Sp> Data_<Sp> *Data_<Sp>::MatrixOp() { long NbCol0, NbRow1; Data_ res; Map<int, Aligned> m2(&res[0], NbCol0, NbRow1); m2.noalias() = __trans_tmp_7; } template class Data_<int>;