https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77776
--- Comment #2 from emsr at gcc dot gnu.org ---
I have this in another tree which solves the inf issue:
namespace __detail
{
// Avoid including all of <algorithm>
template<typename _Tp>
constexpr _Tp
__fmax3(_Tp __x, _Tp __y, _Tp __z)
{ return std::fmax(std::fmax(__x, __y), std::fmax(__y, __z)); }
template<typename _Tp>
constexpr _Tp
__hypot3(_Tp __x, _Tp __y, _Tp __z)
{
if (std::isnan(__x)
|| std::isnan(__y)
|| std::isnan(__z))
return std::numeric_limits<_Tp>::quiet_NaN();
else
{
__x = std::abs(__x);
__y = std::abs(__y);
__z = std::abs(__z);
const auto __amax = __fmax3(__x, __y, __z);
if (__amax == _Tp{0})
return _Tp{0};
else if (std::__detail::__isinf(__amax))
return std::numeric_limits<_Tp>::infinity();
else
{
__x /= __amax;
__y /= __amax;
__z /= __amax;
return __amax * std::sqrt(__x * __x + __y * __y + __z * __z);
}
}
}
}
I get your point about scaling and adding smallest first. I have access to
std::fma().
For me I think this means:
/*
const auto __amax = max(max(__x, __y), __z);
const auto __l0 = min(__z, max(__x, __y));
const auto __l1 = min(__y, __x);
const auto __scale = 1 / __amax;
__l0 *= __scale;
__l1 *= __scale;
// Add the two smaller values first.
const auto __lo = __l0 * __l0 + __l1 * __l1;
const auto __r = __amax * sqrt(fma(__h1, __h1, __lo));
*/