https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111366

--- Comment #15 from Mathieu Malaterre <malat at debian dot org> ---
For some reason the no-htm flag does not seems to work in my case:

% /usr/bin/c++  -O1 -mcpu=power8 -flto=auto -c skeleton_test.cc
skeleton_test.cc: In member function 'TestFloorLog2::operator()<int,
detail::CappedTagChecker<int, 1u, 0> >(int, detail::CappedTagChecker<int, 1u,
0>)void':
skeleton_test.cc:5:44: error: inlining failed in call to 'always_inline'
'hwy::PreventElision(int)': target specific option mismatch
    5 | inline __attribute__((always_inline)) void PreventElision(int output) {
      |                                            ^~~~~~~~~~~~~~
skeleton_test.cc:36:26: note: called from here
   36 |       hwy::PreventElision(sum);
      |       ~~~~~~~~~~~~~~~~~~~^~~~~

with:

% cat skeleton_test.cc
#include <stddef.h>
#define HWY_PRAGMA(tokens) _Pragma(#tokens)
namespace hwy {
#define HWY_PUSH_ATTRIBUTES(targets_str) HWY_PRAGMA(GCC target targets_str)
inline __attribute__((always_inline)) void PreventElision(int output) {
  asm("" : "+r"(output) : : "memory");
}
} // namespace hwy
#if HWY_TARGET == HWY_PPC10
#define HWY_BEFORE_NAMESPACE() HWY_PUSH_ATTRIBUTES(",cpu=power10,no-htm")
#endif
HWY_BEFORE_NAMESPACE() namespace detail {
  template <typename, size_t, int> struct CappedTagChecker {};
}
template <typename T, size_t kLimit, int kPow2 = 0>
using CappedTag = detail::CappedTagChecker<T, kLimit, kPow2>;
template <class D> size_t Lanes(D);
template <typename, size_t, size_t kMinArg, class Test> struct ForeachCappedR {
  static void Do(size_t min_lanes, size_t max_lanes) {
    CappedTag<int, kMinArg> d;
    Test()(int(), d);
    Do(min_lanes, max_lanes);
  }
};
template <class Test> struct ForPartialVectors {
  template <typename T> void operator()(T t) {
    (void)t;
    ForeachCappedR<T, 1, 1, Test>::Do(1, 1);
  }
};
struct TestFloorLog2 {
  template <class T, class DF> void operator()(T, DF df) {
    size_t count = Lanes(df);
    int sum;
    for (size_t i; count; ++i)
      hwy::PreventElision(sum);
  }
};
void TestAllFloorLog2() { ForPartialVectors<TestFloorLog2>()(float()); }

Reply via email to