https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/132956
These three functions all relatedin that they share tables and helper functions. Furthermore, the acosh builtins calls log1p. As with other work in this area, these builtins are now vectorized. To enable this, there are new table accessor functions which return a vector of table values using a vector of indices. These are internally scalarized, in the absense of gather operations. Some tables which were tables of multiple entries (e.g., double2) are split into two separate "low" and "high" tables. This might affect the performance of memory operations but are hopefully mitigated by better codegen overall. >From 490f34c8fa49666cc4e7af228f71908f6102d76a Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Tue, 25 Mar 2025 13:06:05 +0000 Subject: [PATCH] [libclc] Move log1p/asinh/acosh to the CLC library These three functions all relatedin that they share tables and helper functions. Furthermore, the acosh builtins calls log1p. As with other work in this area, these builtins are now vectorized. To enable this, there are new table accessor functions which return a vector of table values using a vector of indices. These are internally scalarized, in the absense of gather operations. Some tables which were tables of multiple entries (e.g., double2) are split into two separate "low" and "high" tables. This might affect the performance of memory operations but are hopefully mitigated by better codegen overall. --- libclc/clc/include/clc/math/clc_acosh.h | 20 ++ libclc/clc/include/clc/math/clc_asinh.h | 20 ++ libclc/clc/include/clc/math/clc_ep_log.h | 10 + libclc/clc/include/clc/math/clc_ep_log.inc | 16 + libclc/clc/include/clc/math/clc_log1p.h | 20 ++ libclc/clc/include/clc/math/tables.h | 43 ++- libclc/clc/lib/generic/SOURCES | 5 + libclc/clc/lib/generic/math/clc_acosh.cl | 24 ++ libclc/clc/lib/generic/math/clc_acosh.inc | 111 ++++++ libclc/clc/lib/generic/math/clc_asinh.cl | 24 ++ libclc/clc/lib/generic/math/clc_asinh.inc | 310 ++++++++++++++++ .../lib/generic/math/clc_ep_log.cl} | 13 +- libclc/clc/lib/generic/math/clc_ep_log.inc | 81 +++++ libclc/clc/lib/generic/math/clc_log1p.cl | 19 + libclc/clc/lib/generic/math/clc_log1p.inc | 170 +++++++++ libclc/clc/lib/generic/math/clc_tables.cl | 183 ++++++++++ libclc/generic/lib/SOURCES | 1 - libclc/generic/lib/math/acosh.cl | 114 +----- libclc/generic/lib/math/asinh.cl | 280 +-------------- libclc/generic/lib/math/clc_pow.cl | 3 +- libclc/generic/lib/math/clc_pown.cl | 3 +- libclc/generic/lib/math/clc_powr.cl | 3 +- libclc/generic/lib/math/clc_rootn.cl | 3 +- libclc/generic/lib/math/ep_log.cl | 80 ----- libclc/generic/lib/math/log1p.cl | 164 +-------- libclc/generic/lib/math/tables.cl | 335 ------------------ libclc/spirv/lib/SOURCES | 1 - 27 files changed, 1083 insertions(+), 973 deletions(-) create mode 100644 libclc/clc/include/clc/math/clc_acosh.h create mode 100644 libclc/clc/include/clc/math/clc_asinh.h create mode 100644 libclc/clc/include/clc/math/clc_ep_log.h create mode 100644 libclc/clc/include/clc/math/clc_ep_log.inc create mode 100644 libclc/clc/include/clc/math/clc_log1p.h create mode 100644 libclc/clc/lib/generic/math/clc_acosh.cl create mode 100644 libclc/clc/lib/generic/math/clc_acosh.inc create mode 100644 libclc/clc/lib/generic/math/clc_asinh.cl create mode 100644 libclc/clc/lib/generic/math/clc_asinh.inc rename libclc/{generic/lib/math/ep_log.h => clc/lib/generic/math/clc_ep_log.cl} (64%) create mode 100644 libclc/clc/lib/generic/math/clc_ep_log.inc create mode 100644 libclc/clc/lib/generic/math/clc_log1p.cl create mode 100644 libclc/clc/lib/generic/math/clc_log1p.inc create mode 100644 libclc/clc/lib/generic/math/clc_tables.cl delete mode 100644 libclc/generic/lib/math/ep_log.cl diff --git a/libclc/clc/include/clc/math/clc_acosh.h b/libclc/clc/include/clc/math/clc_acosh.h new file mode 100644 index 0000000000000..915f4b3febaa9 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_acosh.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_ACOSH_H__ +#define __CLC_MATH_CLC_ACOSH_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_acosh + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_ACOSH_H__ diff --git a/libclc/clc/include/clc/math/clc_asinh.h b/libclc/clc/include/clc/math/clc_asinh.h new file mode 100644 index 0000000000000..b91ae4f0ebd23 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_asinh.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_ASINH_H__ +#define __CLC_MATH_CLC_ASINH_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_asinh + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_ASINH_H__ diff --git a/libclc/clc/include/clc/math/clc_ep_log.h b/libclc/clc/include/clc/math/clc_ep_log.h new file mode 100644 index 0000000000000..687756f90c620 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_ep_log.h @@ -0,0 +1,10 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define __CLC_BODY <clc/math/clc_ep_log.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/include/clc/math/clc_ep_log.inc b/libclc/clc/include/clc/math/clc_ep_log.inc new file mode 100644 index 0000000000000..a255a6a453074 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_ep_log.inc @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 64 + +_CLC_DECL _CLC_OVERLOAD void __clc_ep_log(__CLC_GENTYPE x, + private __CLC_INTN *xexp, + private __CLC_GENTYPE *r1, + private __CLC_GENTYPE *r2); + +#endif diff --git a/libclc/clc/include/clc/math/clc_log1p.h b/libclc/clc/include/clc/math/clc_log1p.h new file mode 100644 index 0000000000000..f08b44b69cb6f --- /dev/null +++ b/libclc/clc/include/clc/math/clc_log1p.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_LOG1P_H__ +#define __CLC_MATH_CLC_LOG1P_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_log1p + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_LOG1P_H__ diff --git a/libclc/clc/include/clc/math/tables.h b/libclc/clc/include/clc/math/tables.h index ad4e4f568d1f1..b801ba65bb945 100644 --- a/libclc/clc/include/clc/math/tables.h +++ b/libclc/clc/include/clc/math/tables.h @@ -20,12 +20,45 @@ #define TABLE_FUNCTION(TYPE, TABLE, NAME) \ TYPE TABLE_MANGLE(NAME)(size_t idx) { return TABLE[idx]; } +#define CLC_TABLE_FUNCTION(TYPE, TABLE, NAME) \ + _CLC_DEF _CLC_OVERLOAD TYPE TABLE_MANGLE(NAME)(int idx) { \ + return TABLE[idx]; \ + } \ + _CLC_DEF _CLC_OVERLOAD TYPE##2 TABLE_MANGLE(NAME)(int##2 idx) { \ + return (TYPE##2){TABLE[idx.s0], TABLE[idx.s1]}; \ + } \ + _CLC_DEF _CLC_OVERLOAD TYPE##3 TABLE_MANGLE(NAME)(int##3 idx) { \ + return (TYPE##3){TABLE[idx.s0], TABLE[idx.s1], TABLE[idx.s2]}; \ + } \ + _CLC_DEF _CLC_OVERLOAD TYPE##4 TABLE_MANGLE(NAME)(int##4 idx) { \ + return (TYPE##4){TABLE[idx.s0], TABLE[idx.s1], TABLE[idx.s2], \ + TABLE[idx.s3]}; \ + } \ + _CLC_DEF _CLC_OVERLOAD TYPE##8 TABLE_MANGLE(NAME)(int##8 idx) { \ + return (TYPE##8){TABLE[idx.s0], TABLE[idx.s1], TABLE[idx.s2], \ + TABLE[idx.s3], TABLE[idx.s4], TABLE[idx.s5], \ + TABLE[idx.s6], TABLE[idx.s7]}; \ + } \ + _CLC_DEF _CLC_OVERLOAD TYPE##16 TABLE_MANGLE(NAME)(int##16 idx) { \ + return (TYPE##16){ \ + TABLE[idx.s0], TABLE[idx.s1], TABLE[idx.s2], TABLE[idx.s3], \ + TABLE[idx.s4], TABLE[idx.s5], TABLE[idx.s6], TABLE[idx.s7], \ + TABLE[idx.s8], TABLE[idx.s9], TABLE[idx.sA], TABLE[idx.sB], \ + TABLE[idx.sC], TABLE[idx.sD], TABLE[idx.sE], TABLE[idx.sF]}; \ + } + #define TABLE_FUNCTION_DECL(TYPE, NAME) TYPE TABLE_MANGLE(NAME)(size_t idx); +#define CLC_TABLE_FUNCTION_DECL(TYPE, NAME) \ + _CLC_DECL _CLC_OVERLOAD TYPE TABLE_MANGLE(NAME)(int idx); \ + _CLC_DECL _CLC_OVERLOAD TYPE##2 TABLE_MANGLE(NAME)(int##2 idx); \ + _CLC_DECL _CLC_OVERLOAD TYPE##3 TABLE_MANGLE(NAME)(int##3 idx); \ + _CLC_DECL _CLC_OVERLOAD TYPE##4 TABLE_MANGLE(NAME)(int##4 idx); \ + _CLC_DECL _CLC_OVERLOAD TYPE##8 TABLE_MANGLE(NAME)(int##8 idx); \ + _CLC_DECL _CLC_OVERLOAD TYPE##16 TABLE_MANGLE(NAME)(int##16 idx); + #define USE_TABLE(NAME, IDX) TABLE_MANGLE(NAME)(IDX) -TABLE_FUNCTION_DECL(float2, loge_tbl); -TABLE_FUNCTION_DECL(float, log_inv_tbl); TABLE_FUNCTION_DECL(float2, log_inv_tbl_ep); TABLE_FUNCTION_DECL(float2, log2_tbl); TABLE_FUNCTION_DECL(float2, log10_tbl); @@ -35,11 +68,17 @@ TABLE_FUNCTION_DECL(float2, cbrt_tbl); TABLE_FUNCTION_DECL(float, exp_tbl); TABLE_FUNCTION_DECL(float2, exp_tbl_ep); +CLC_TABLE_FUNCTION_DECL(float, loge_tbl_lo); +CLC_TABLE_FUNCTION_DECL(float, loge_tbl_hi); +CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl); + #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable TABLE_FUNCTION_DECL(double2, ln_tbl); +CLC_TABLE_FUNCTION_DECL(double, ln_tbl_lo); +CLC_TABLE_FUNCTION_DECL(double, ln_tbl_hi); TABLE_FUNCTION_DECL(double2, atan_jby256_tbl); TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl); TABLE_FUNCTION_DECL(double2, sinh_tbl); diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 11069e9b0b952..59eb3875b755b 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -18,13 +18,16 @@ integer/clc_rotate.cl integer/clc_sub_sat.cl integer/clc_upsample.cl math/clc_acos.cl +math/clc_acosh.cl math/clc_acospi.cl math/clc_asin.cl +math/clc_asinh.cl math/clc_asinpi.cl math/clc_atan.cl math/clc_atanpi.cl math/clc_ceil.cl math/clc_copysign.cl +math/clc_ep_log.cl math/clc_fabs.cl math/clc_fma.cl math/clc_floor.cl @@ -33,6 +36,7 @@ math/clc_hypot.cl math/clc_ldexp.cl math/clc_log.cl math/clc_log10.cl +math/clc_log1p.cl math/clc_log2.cl math/clc_mad.cl math/clc_modf.cl @@ -44,6 +48,7 @@ math/clc_rsqrt.cl math/clc_sincos_helpers.cl math/clc_sqrt.cl math/clc_sw_fma.cl +math/clc_tables.cl math/clc_trunc.cl relational/clc_all.cl relational/clc_any.cl diff --git a/libclc/clc/lib/generic/math/clc_acosh.cl b/libclc/clc/lib/generic/math/clc_acosh.cl new file mode 100644 index 0000000000000..a46606be59333 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_acosh.cl @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_ep_log.h> +#include <clc/math/clc_fabs.h> +#include <clc/math/clc_fma.h> +#include <clc/math/clc_log1p.h> +#include <clc/math/clc_mad.h> +#include <clc/math/clc_sqrt.h> +#include <clc/math/math.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_isnan.h> +#include <clc/relational/clc_select.h> + +#define __CLC_BODY <clc_acosh.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_acosh.inc b/libclc/clc/lib/generic/math/clc_acosh.inc new file mode 100644 index 0000000000000..2f4a58742b8e3 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_acosh.inc @@ -0,0 +1,111 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) { + __CLC_UINTN ux = __CLC_AS_UINTN(x); + + // Arguments greater than 1/sqrt(epsilon) in magnitude are approximated by + // acosh(x) = ln(2) + ln(x) + // For 2.0 <= x <= 1/sqrt(epsilon) the approximation is: + // acosh(x) = ln(x + sqrt(x * x - 1)) + __CLC_INTN high = ux > 0x46000000U; + __CLC_INTN med = ux > 0x40000000U; + + __CLC_GENTYPE w = x - 1.0f; + __CLC_GENTYPE s = w * w + 2.0f * w; + __CLC_GENTYPE t = x * x - 1.0f; + __CLC_GENTYPE r = __clc_sqrt(med ? t : s) + (med ? x : w); + __CLC_GENTYPE v = (high ? x : r) - (med ? 1.0f : 0.0f); + __CLC_GENTYPE z = __clc_log1p(v) + (high ? 0x1.62e430p-1f : 0.0f); + + z = __clc_select(z, x, __clc_isnan(x) || __clc_isinf(x)); + z = x < 1.0f ? __CLC_GENTYPE_NAN : z; + + return z; +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) { + // 1/sqrt(eps) = 9.49062656242515593767e+07 + const __CLC_GENTYPE recrteps = 0x1.6a09e667f3bcdp+26; + // log2_lead and log2_tail sum to an extra-precise version of log(2) + const __CLC_GENTYPE log2_lead = 0x1.62e42ep-1; + const __CLC_GENTYPE log2_tail = 0x1.efa39ef35793cp-25; + + // Handle x >= 128 here + __CLC_LONGN xlarge = x > recrteps; + __CLC_GENTYPE r = x + __clc_sqrt(__clc_fma(x, x, -1.0)); + r = xlarge ? x : r; + + __CLC_INTN xexp; + __CLC_GENTYPE r1, r2; + __clc_ep_log(r, &xexp, &r1, &r2); + + __CLC_GENTYPE dxexp = __CLC_CONVERT_GENTYPE( + __CLC_CONVERT_LONGN(xexp) + (xlarge ? (__CLC_LONGN)1 : (__CLC_LONGN)0)); + r1 = __clc_fma(dxexp, log2_lead, r1); + r2 = __clc_fma(dxexp, log2_tail, r2); + + __CLC_GENTYPE ret1 = r1 + r2; + + // Handle 1 < x < 128 here + // We compute the value + // t = x - 1.0 + sqrt(2.0*(x - 1.0) + (x - 1.0)*(x - 1.0)) + // using simulated quad precision. + __CLC_GENTYPE t = x - 1.0; + __CLC_GENTYPE u1 = t * 2.0; + + // (t,0) * (t,0) -> (v1, v2) + __CLC_GENTYPE v1 = t * t; + __CLC_GENTYPE v2 = __clc_fma(t, t, -v1); + + // (u1,0) + (v1,v2) -> (w1,w2) + r = u1 + v1; + __CLC_GENTYPE s = (((u1 - r) + v1) + v2); + __CLC_GENTYPE w1 = r + s; + __CLC_GENTYPE w2 = (r - w1) + s; + + // sqrt(w1,w2) -> (u1,u2) + __CLC_GENTYPE p1 = __clc_sqrt(w1); + __CLC_GENTYPE a1 = p1 * p1; + __CLC_GENTYPE a2 = __clc_fma(p1, p1, -a1); + __CLC_GENTYPE temp = (((w1 - a1) - a2) + w2); + __CLC_GENTYPE p2 = MATH_DIVIDE(temp * 0.5, p1); + u1 = p1 + p2; + __CLC_GENTYPE u2 = (p1 - u1) + p2; + + // (u1,u2) + (t,0) -> (r1,r2) + r = u1 + t; + s = ((u1 - r) + t) + u2; + // r1 = r + s; + // r2 = (r - r1) + s; + // t = r1 + r2; + t = r + s; + + // For arguments 1.13 <= x <= 1.5 the log1p function is good enough + __CLC_GENTYPE ret2 = __clc_log1p(t); + + __CLC_GENTYPE ret = x >= 128.0 ? ret1 : ret2; + + ret = (__clc_isinf(x) || __clc_isnan(x)) ? x : ret; + ret = x == 1.0 ? 0.0 : ret; + ret = x < 1.0 ? __CLC_GENTYPE_NAN : ret; + + return ret; +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_acosh(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_acosh(__CLC_CONVERT_FLOATN(x))); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_asinh.cl b/libclc/clc/lib/generic/math/clc_asinh.cl new file mode 100644 index 0000000000000..763efb4e76aca --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_asinh.cl @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_ep_log.h> +#include <clc/math/clc_fabs.h> +#include <clc/math/clc_fma.h> +#include <clc/math/clc_log.h> +#include <clc/math/clc_mad.h> +#include <clc/math/clc_sqrt.h> +#include <clc/math/math.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_isnan.h> +#include <clc/relational/clc_select.h> + +#define __CLC_BODY <clc_asinh.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_asinh.inc b/libclc/clc/lib/generic/math/clc_asinh.inc new file mode 100644 index 0000000000000..5ad07dde7e403 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_asinh.inc @@ -0,0 +1,310 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_asinh(__CLC_GENTYPE x) { + __CLC_UINTN ux = __CLC_AS_UINTN(x); + __CLC_UINTN ax = ux & EXSIGNBIT_SP32; + __CLC_UINTN xsgn = ax ^ ux; + + // |x| <= 2 + __CLC_GENTYPE t = x * x; + __CLC_GENTYPE a = + __clc_mad(t, + __clc_mad(t, + __clc_mad(t, + __clc_mad(t, -1.177198915954942694e-4f, + -4.162727710583425360e-2f), + -5.063201055468483248e-1f), + -1.480204186473758321f), + -1.152965835871758072f); + __CLC_GENTYPE b = + __clc_mad(t, + __clc_mad(t, + __clc_mad(t, + __clc_mad(t, 6.284381367285534560e-2f, + 1.260024978680227945f), + 6.582362487198468066f), + 11.99423176003939087f), + 6.917795026025976739f); + + __CLC_GENTYPE q = MATH_DIVIDE(a, b); + __CLC_GENTYPE z1 = __clc_mad(x * t, q, x); + + // |x| > 2 + + // Arguments greater than 1/sqrt(epsilon) in magnitude are + // approximated by asinh(x) = ln(2) + ln(abs(x)), with sign of x + // Arguments such that 4.0 <= abs(x) <= 1/sqrt(epsilon) are + // approximated by asinhf(x) = ln(abs(x) + sqrt(x*x+1)) + // with the sign of x (see Abramowitz and Stegun 4.6.20) + + __CLC_GENTYPE absx = __clc_fabs(x); + __CLC_INTN hi = ax > 0x46000000U; + __CLC_GENTYPE y = __clc_sqrt(absx * absx + 1.0f) + absx; + y = hi ? absx : y; + __CLC_GENTYPE r = __clc_log(y) + (hi ? 0x1.62e430p-1f : 0.0f); + __CLC_GENTYPE z2 = __CLC_AS_GENTYPE(xsgn | __CLC_AS_UINTN(r)); + + __CLC_GENTYPE z = ax <= 0x40000000 ? z1 : z2; + z = __clc_select(z, x, ax < 0x39800000U || __clc_isinf(x) || __clc_isnan(x)); + + return z; +} + +#elif __CLC_FPSIZE == 64 + +#define NA0 -0.12845379283524906084997e0 +#define NA1 -0.21060688498409799700819e0 +#define NA2 -0.10188951822578188309186e0 +#define NA3 -0.13891765817243625541799e-1 +#define NA4 -0.10324604871728082428024e-3 + +#define DA0 0.77072275701149440164511e0 +#define DA1 0.16104665505597338100747e1 +#define DA2 0.11296034614816689554875e1 +#define DA3 0.30079351943799465092429e0 +#define DA4 0.235224464765951442265117e-1 + +#define NB0 -0.12186605129448852495563e0 +#define NB1 -0.19777978436593069928318e0 +#define NB2 -0.94379072395062374824320e-1 +#define NB3 -0.12620141363821680162036e-1 +#define NB4 -0.903396794842691998748349e-4 + +#define DB0 0.73119630776696495279434e0 +#define DB1 0.15157170446881616648338e1 +#define DB2 0.10524909506981282725413e1 +#define DB3 0.27663713103600182193817e0 +#define DB4 0.21263492900663656707646e-1 + +#define NC0 -0.81210026327726247622500e-1 +#define NC1 -0.12327355080668808750232e0 +#define NC2 -0.53704925162784720405664e-1 +#define NC3 -0.63106739048128554465450e-2 +#define NC4 -0.35326896180771371053534e-4 + +#define DC0 0.48726015805581794231182e0 +#define DC1 0.95890837357081041150936e0 +#define DC2 0.62322223426940387752480e0 +#define DC3 0.15028684818508081155141e0 +#define DC4 0.10302171620320141529445e-1 + +#define ND0 -0.4638179204422665073e-1 +#define ND1 -0.7162729496035415183e-1 +#define ND2 -0.3247795155696775148e-1 +#define ND3 -0.4225785421291932164e-2 +#define ND4 -0.3808984717603160127e-4 +#define ND5 0.8023464184964125826e-6 + +#define DD0 0.2782907534642231184e0 +#define DD1 0.5549945896829343308e0 +#define DD2 0.3700732511330698879e0 +#define DD3 0.9395783438240780722e-1 +#define DD4 0.7200057974217143034e-2 + +#define NE0 -0.121224194072430701e-4 +#define NE1 -0.273145455834305218e-3 +#define NE2 -0.152866982560895737e-2 +#define NE3 -0.292231744584913045e-2 +#define NE4 -0.174670900236060220e-2 +#define NE5 -0.891754209521081538e-12 + +#define DE0 0.499426632161317606e-4 +#define DE1 0.139591210395547054e-2 +#define DE2 0.107665231109108629e-1 +#define DE3 0.325809818749873406e-1 +#define DE4 0.415222526655158363e-1 +#define DE5 0.186315628774716763e-1 + +#define NF0 -0.195436610112717345e-4 +#define NF1 -0.233315515113382977e-3 +#define NF2 -0.645380957611087587e-3 +#define NF3 -0.478948863920281252e-3 +#define NF4 -0.805234112224091742e-12 +#define NF5 0.246428598194879283e-13 + +#define DF0 0.822166621698664729e-4 +#define DF1 0.135346265620413852e-2 +#define DF2 0.602739242861830658e-2 +#define DF3 0.972227795510722956e-2 +#define DF4 0.510878800983771167e-2 + +#define NG0 -0.209689451648100728e-6 +#define NG1 -0.219252358028695992e-5 +#define NG2 -0.551641756327550939e-5 +#define NG3 -0.382300259826830258e-5 +#define NG4 -0.421182121910667329e-17 +#define NG5 0.492236019998237684e-19 + +#define DG0 0.889178444424237735e-6 +#define DG1 0.131152171690011152e-4 +#define DG2 0.537955850185616847e-4 +#define DG3 0.814966175170941864e-4 +#define DG4 0.407786943832260752e-4 + +#define NH0 -0.178284193496441400e-6 +#define NH1 -0.928734186616614974e-6 +#define NH2 -0.923318925566302615e-6 +#define NH3 -0.776417026702577552e-19 +#define NH4 0.290845644810826014e-21 + +#define DH0 0.786694697277890964e-6 +#define DH1 0.685435665630965488e-5 +#define DH2 0.153780175436788329e-4 +#define DH3 0.984873520613417917e-5 + +#define NI0 -0.538003743384069117e-10 +#define NI1 -0.273698654196756169e-9 +#define NI2 -0.268129826956403568e-9 +#define NI3 -0.804163374628432850e-29 + +#define DI0 0.238083376363471960e-9 +#define DI1 0.203579344621125934e-8 +#define DI2 0.450836980450693209e-8 +#define DI3 0.286005148753497156e-8 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_asinh(__CLC_GENTYPE x) { + const __CLC_GENTYPE rteps = 0x1.6a09e667f3bcdp-27; + const __CLC_GENTYPE recrteps = 0x1.6a09e667f3bcdp+26; + + // log2_lead and log2_tail sum to an extra-precise version of log(2) + const __CLC_GENTYPE log2_lead = 0x1.62e42ep-1; + const __CLC_GENTYPE log2_tail = 0x1.efa39ef35793cp-25; + + __CLC_GENTYPE absx = __clc_fabs(x); + + __CLC_GENTYPE t = x * x; + __CLC_GENTYPE pn, tn, pd, td; + + // XXX we are betting here that we can evaluate 8 pairs of + // polys faster than we can grab 12 coefficients from a table + // This also uses fewer registers + + // |x| >= 8 + pn = __clc_fma(t, __clc_fma(t, __clc_fma(t, NI3, NI2), NI1), NI0); + pd = __clc_fma(t, __clc_fma(t, __clc_fma(t, DI3, DI2), DI1), DI0); + + tn = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, NH4, NH3), NH2), NH1), NH0); + td = __clc_fma(t, __clc_fma(t, __clc_fma(t, DH3, DH2), DH1), DH0); + pn = absx < 8.0 ? tn : pn; + pd = absx < 8.0 ? td : pd; + + tn = __clc_fma( + t, + __clc_fma(t, __clc_fma(t, __clc_fma(t, __clc_fma(t, NG5, NG4), NG3), NG2), + NG1), + NG0); + td = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, DG4, DG3), DG2), DG1), DG0); + pn = absx < 4.0 ? tn : pn; + pd = absx < 4.0 ? td : pd; + + tn = __clc_fma( + t, + __clc_fma(t, __clc_fma(t, __clc_fma(t, __clc_fma(t, NF5, NF4), NF3), NF2), + NF1), + NF0); + td = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, DF4, DF3), DF2), DF1), DF0); + pn = absx < 2.0 ? tn : pn; + pd = absx < 2.0 ? td : pd; + + tn = __clc_fma( + t, + __clc_fma(t, __clc_fma(t, __clc_fma(t, __clc_fma(t, NE5, NE4), NE3), NE2), + NE1), + NE0); + td = __clc_fma( + t, + __clc_fma(t, __clc_fma(t, __clc_fma(t, __clc_fma(t, DE5, DE4), DE3), DE2), + DE1), + DE0); + pn = absx < 1.5 ? tn : pn; + pd = absx < 1.5 ? td : pd; + + tn = __clc_fma( + t, + __clc_fma(t, __clc_fma(t, __clc_fma(t, __clc_fma(t, ND5, ND4), ND3), ND2), + ND1), + ND0); + td = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, DD4, DD3), DD2), DD1), DD0); + pn = absx <= 1.0 ? tn : pn; + pd = absx <= 1.0 ? td : pd; + + tn = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, NC4, NC3), NC2), NC1), NC0); + td = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, DC4, DC3), DC2), DC1), DC0); + pn = absx < 0.75 ? tn : pn; + pd = absx < 0.75 ? td : pd; + + tn = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, NB4, NB3), NB2), NB1), NB0); + td = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, DB4, DB3), DB2), DB1), DB0); + pn = absx < 0.5 ? tn : pn; + pd = absx < 0.5 ? td : pd; + + tn = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, NA4, NA3), NA2), NA1), NA0); + td = __clc_fma( + t, __clc_fma(t, __clc_fma(t, __clc_fma(t, DA4, DA3), DA2), DA1), DA0); + pn = absx < 0.25 ? tn : pn; + pd = absx < 0.25 ? td : pd; + + __CLC_GENTYPE pq = MATH_DIVIDE(pn, pd); + + // |x| <= 1 + __CLC_GENTYPE result1 = __clc_fma(absx * t, pq, absx); + + // Other ranges + __CLC_LONGN xout = absx <= 32.0 || absx > recrteps; + __CLC_GENTYPE y = absx + __clc_sqrt(__clc_fma(absx, absx, 1.0)); + y = xout ? absx : y; + + __CLC_GENTYPE r1, r2; + __CLC_INTN xexp; + __clc_ep_log(y, &xexp, &r1, &r2); + + __CLC_GENTYPE dxexp = __CLC_CONVERT_GENTYPE( + __CLC_CONVERT_LONGN(xexp) + (xout ? (__CLC_LONGN)1 : (__CLC_LONGN)0)); + r1 = __clc_fma(dxexp, log2_lead, r1); + r2 = __clc_fma(dxexp, log2_tail, r2); + + // 1 < x <= 32 + __CLC_GENTYPE v2 = (pq + 0.25) / t; + __CLC_GENTYPE r = v2 + r1; + __CLC_GENTYPE s = ((r1 - r) + v2) + r2; + __CLC_GENTYPE v1 = r + s; + v2 = (r - v1) + s; + __CLC_GENTYPE result2 = v1 + v2; + + // x > 32 + __CLC_GENTYPE result3 = r1 + r2; + + __CLC_GENTYPE ret = absx > 1.0 ? result2 : result1; + ret = absx > 32.0 ? result3 : ret; + ret = x < 0.0 ? -ret : ret; + + // NaN, +-Inf, or x small enough that asinh(x) = x + ret = __clc_select( + ret, x, + __CLC_CONVERT_LONGN(__clc_isnan(x) || __clc_isinf(x) || absx < rteps)); + return ret; +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_asinh(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_asinh(__CLC_CONVERT_FLOATN(x))); +} + +#endif diff --git a/libclc/generic/lib/math/ep_log.h b/libclc/clc/lib/generic/math/clc_ep_log.cl similarity index 64% rename from libclc/generic/lib/math/ep_log.h rename to libclc/clc/lib/generic/math/clc_ep_log.cl index 3f98764f577a4..c6782f77a16d9 100644 --- a/libclc/generic/lib/math/ep_log.h +++ b/libclc/clc/lib/generic/math/clc_ep_log.cl @@ -6,13 +6,18 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcfunc.h> - #ifdef cl_khr_fp64 +#include <clc/clc_convert.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_ep_log.h> +#include <clc/math/clc_fma.h> +#include <clc/math/math.h> +#include <clc/math/tables.h> + #pragma OPENCL EXTENSION cl_khr_fp64 : enable -_CLC_DECL void __clc_ep_log(double x, private int *xexp, private double *r1, - private double *r2); +#define __CLC_BODY <clc_ep_log.inc> +#include <clc/math/gentype.inc> #endif diff --git a/libclc/clc/lib/generic/math/clc_ep_log.inc b/libclc/clc/lib/generic/math/clc_ep_log.inc new file mode 100644 index 0000000000000..f51e487bedcaa --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_ep_log.inc @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Computes natural log(x). Algorithm based on: +// Ping-Tak Peter Tang +// "Table-driven implementation of the logarithm function in IEEE floating-point +// arithmetic" +// ACM Transactions on Mathematical Software (TOMS) Volume 16, Issue 4 (December +// 1990) +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 64 + +#define LN0 8.33333333333317923934e-02 +#define LN1 1.25000000037717509602e-02 +#define LN2 2.23213998791944806202e-03 +#define LN3 4.34887777707614552256e-04 + +#define LF0 8.33333333333333593622e-02 +#define LF1 1.24999999978138668903e-02 +#define LF2 2.23219810758559851206e-03 + +_CLC_DEF _CLC_OVERLOAD void __clc_ep_log(__CLC_GENTYPE x, + private __CLC_INTN *xexp, + private __CLC_GENTYPE *r1, + private __CLC_GENTYPE *r2) { + __CLC_LONGN near_one = x >= 0x1.e0faap-1 && x <= 0x1.1082cp+0; + + __CLC_ULONGN ux = __CLC_AS_ULONGN(x); + __CLC_ULONGN uxs = + __CLC_AS_ULONGN(__CLC_AS_GENTYPE(0x03d0000000000000UL | ux) - 0x1.0p-962); + __CLC_LONGN c = ux < IMPBIT_DP64; + ux = c ? uxs : ux; + __CLC_INTN expadjust = + __CLC_CONVERT_INTN(c ? (__CLC_LONGN)60 : (__CLC_LONGN)0); + + // Store the exponent of x in xexp and put f into the range [0.5,1) + __CLC_INTN xexp1 = __CLC_CONVERT_INTN((ux >> EXPSHIFTBITS_DP64) & 0x7ff) - + EXPBIAS_DP64 - expadjust; + __CLC_GENTYPE f = __CLC_AS_GENTYPE(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64)); + *xexp = __CLC_CONVERT_INTN(near_one) ? 0 : xexp1; + + __CLC_GENTYPE r = x - 1.0; + __CLC_GENTYPE u1 = MATH_DIVIDE(r, 2.0 + r); + __CLC_GENTYPE ru1 = -r * u1; + u1 = u1 + u1; + + __CLC_INTN index = __CLC_CONVERT_INTN(ux >> 45); + index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1); + + __CLC_GENTYPE f1 = __CLC_CONVERT_GENTYPE(index) * 0x1.0p-7; + __CLC_GENTYPE f2 = f - f1; + __CLC_GENTYPE u2 = MATH_DIVIDE(f2, __clc_fma(0.5, f2, f1)); + + __CLC_GENTYPE z1 = USE_TABLE(ln_tbl_lo, (index - 64)); + __CLC_GENTYPE q = USE_TABLE(ln_tbl_hi, (index - 64)); + + z1 = near_one ? r : z1; + q = near_one ? 0.0 : q; + __CLC_GENTYPE u = near_one ? u1 : u2; + __CLC_GENTYPE v = u * u; + + __CLC_GENTYPE cc = near_one ? ru1 : u2; + + __CLC_GENTYPE z21 = + __clc_fma(v, __clc_fma(v, __clc_fma(v, LN3, LN2), LN1), LN0); + __CLC_GENTYPE z22 = __clc_fma(v, __clc_fma(v, LF2, LF1), LF0); + __CLC_GENTYPE z2 = near_one ? z21 : z22; + z2 = __clc_fma(u * v, z2, cc) + q; + + *r1 = z1; + *r2 = z2; +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_log1p.cl b/libclc/clc/lib/generic/math/clc_log1p.cl new file mode 100644 index 0000000000000..71ccba79a7289 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_log1p.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_fma.h> +#include <clc/math/clc_mad.h> +#include <clc/math/math.h> +#include <clc/math/tables.h> +#include <clc/relational/clc_isinf.h> + +#define __CLC_BODY <clc_log1p.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_log1p.inc b/libclc/clc/lib/generic/math/clc_log1p.inc new file mode 100644 index 0000000000000..8c7dcfc48c18e --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_log1p.inc @@ -0,0 +1,170 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Computes natural log(1+x). Algorithm based on: +// Ping-Tak Peter Tang +// "Table-driven implementation of the logarithm function in IEEE floating-point +// arithmetic" ACM Transactions on Mathematical Software (TOMS) Volume 16, Issue +// 4 (December 1990) +// +// Note that we use a lookup table of size 64 rather than 128, and compensate by +// having extra terms in the minimax polynomial for the kernel approximation. +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_log1p(__CLC_GENTYPE x) { + __CLC_GENTYPE w = x; + __CLC_UINTN ux = __CLC_AS_UINTN(x); + __CLC_UINTN ax = ux & EXSIGNBIT_SP32; + + // |x| < 2^-4 + __CLC_GENTYPE u2 = MATH_DIVIDE(x, 2.0f + x); + __CLC_GENTYPE u = u2 + u2; + __CLC_GENTYPE v = u * u; + // 2/(5 * 2^5), 2/(3 * 2^3) + __CLC_GENTYPE zsmall = + __clc_mad(-u2, x, __clc_mad(v, 0x1.99999ap-7f, 0x1.555556p-4f) * v * u) + + x; + + // |x| >= 2^-4 + ux = __CLC_AS_UINTN(x + 1.0f); + + __CLC_INTN m = __CLC_AS_INTN((ux >> EXPSHIFTBITS_SP32) & 0xff) - EXPBIAS_SP32; + __CLC_GENTYPE mf = __CLC_CONVERT_GENTYPE(m); + __CLC_UINTN indx = (ux & 0x007f0000) + ((ux & 0x00008000) << 1); + __CLC_GENTYPE F = __CLC_AS_GENTYPE(indx | 0x3f000000); + + // x > 2^24 + __CLC_GENTYPE fg24 = F - __CLC_AS_GENTYPE(0x3f000000 | (ux & MANTBITS_SP32)); + + // x <= 2^24 + __CLC_UINTN xhi = ux & 0xffff8000; + __CLC_GENTYPE xh = __CLC_AS_GENTYPE(xhi); + __CLC_GENTYPE xt = (1.0f - xh) + w; + __CLC_UINTN xnm = ((~(xhi & 0x7f800000)) - 0x00800000) & 0x7f800000; + xt = xt * __CLC_AS_GENTYPE(xnm) * 0.5f; + __CLC_GENTYPE fl24 = + F - __CLC_AS_GENTYPE(0x3f000000 | (xhi & MANTBITS_SP32)) - xt; + + __CLC_GENTYPE f = mf > 24.0f ? fg24 : fl24; + + indx = indx >> 16; + __CLC_GENTYPE r = f * USE_TABLE(log_inv_tbl, __CLC_CONVERT_INTN(indx)); + + // 1/3, 1/2 + __CLC_GENTYPE poly = + __clc_mad(__clc_mad(r, 0x1.555556p-2f, 0x1.0p-1f), r * r, r); + + const __CLC_GENTYPE LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234 + const __CLC_GENTYPE LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833 + + __CLC_GENTYPE tv0 = USE_TABLE(loge_tbl_lo, __CLC_AS_INTN(indx)); + __CLC_GENTYPE tv1 = USE_TABLE(loge_tbl_hi, __CLC_AS_INTN(indx)); + __CLC_GENTYPE z1 = __clc_mad(mf, LOG2_HEAD, tv0); + __CLC_GENTYPE z2 = __clc_mad(mf, LOG2_TAIL, -poly) + tv1; + __CLC_GENTYPE z = z1 + z2; + + z = ax < 0x3d800000U ? zsmall : z; + + // Edge cases + z = ax >= PINFBITPATT_SP32 ? w : z; + z = w < -1.0f ? __CLC_GENTYPE_NAN : z; + z = w == -1.0f ? __CLC_AS_GENTYPE((__CLC_UINTN)NINFBITPATT_SP32) : z; + // Fix subnormals + z = ax < 0x33800000 ? x : z; + + return z; +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_log1p(__CLC_GENTYPE x) { + // Process Inside the threshold now + __CLC_ULONGN ux = __CLC_AS_ULONGN((__CLC_GENTYPE)1.0 + x); + __CLC_INTN xexp = + __CLC_CONVERT_INTN((ux >> EXPSHIFTBITS_DP64) & 0x7ff) - EXPBIAS_DP64; + __CLC_GENTYPE f = + __CLC_AS_GENTYPE((__CLC_ULONGN)ONEEXPBITS_DP64 | (ux & MANTBITS_DP64)); + + __CLC_INTN j = __CLC_CONVERT_INTN(ux >> 45); + j = ((0x80 | (j & 0x7e)) >> 1) + (j & 0x1); + __CLC_GENTYPE f1 = __CLC_CONVERT_GENTYPE(j) * 0x1.0p-6; + j -= 64; + + __CLC_GENTYPE f2temp = f - f1; + __CLC_GENTYPE m2 = + __CLC_AS_GENTYPE(__CLC_CONVERT_ULONGN(0x3ff - xexp) << EXPSHIFTBITS_DP64); + __CLC_GENTYPE f2l = __clc_fma(m2, x, m2 - f1); + __CLC_GENTYPE f2g = __clc_fma(m2, x, -f1) + m2; + __CLC_GENTYPE f2 = + __CLC_CONVERT_LONGN(xexp <= MANTLENGTH_DP64 - 1) ? f2l : f2g; + f2 = __CLC_CONVERT_LONGN(xexp <= -2 || (xexp >= MANTLENGTH_DP64 + 8)) ? f2temp + : f2; + + __CLC_GENTYPE z1 = USE_TABLE(ln_tbl_lo, j); + __CLC_GENTYPE q = USE_TABLE(ln_tbl_hi, j); + + __CLC_GENTYPE u = MATH_DIVIDE(f2, __clc_fma(0.5, f2, f1)); + __CLC_GENTYPE v = u * u; + + __CLC_GENTYPE poly = v * __clc_fma(v, + __clc_fma(v, 2.23219810758559851206e-03, + 1.24999999978138668903e-02), + 8.33333333333333593622e-02); + + // log2_lead and log2_tail sum to an extra-precise version of log(2) + // 0x3fe62e42e0000000 + const __CLC_GENTYPE log2_lead = 6.93147122859954833984e-01; + // 0x3e6efa39ef35793c + const __CLC_GENTYPE log2_tail = 5.76999904754328540596e-08; + + __CLC_GENTYPE z2 = q + __clc_fma(u, poly, u); + __CLC_GENTYPE dxexp = __CLC_CONVERT_GENTYPE(xexp); + __CLC_GENTYPE r1 = __clc_fma(dxexp, log2_lead, z1); + __CLC_GENTYPE r2 = __clc_fma(dxexp, log2_tail, z2); + __CLC_GENTYPE result1 = r1 + r2; + + // Process Outside the threshold now + __CLC_GENTYPE r = x; + u = r / (2.0 + r); + __CLC_GENTYPE correction = r * u; + u = u + u; + v = u * u; + r1 = r; + + poly = __clc_fma(v, + __clc_fma(v, + __clc_fma(v, 4.34887777707614552256e-04, + 2.23213998791944806202e-03), + 1.25000000037717509602e-02), + 8.33333333333317923934e-02); + + r2 = __clc_fma(u * v, poly, -correction); + + // The values exp(-1/16)-1 and exp(1/16)-1 + const __CLC_GENTYPE log1p_thresh1 = -0x1.f0540438fd5c3p-5; + const __CLC_GENTYPE log1p_thresh2 = 0x1.082b577d34ed8p-4; + __CLC_GENTYPE result2 = r1 + r2; + result2 = x < log1p_thresh1 || x > log1p_thresh2 ? result1 : result2; + + result2 = __clc_isinf(x) ? x : result2; + result2 = x < -1.0 ? __CLC_GENTYPE_NAN : result2; + result2 = + x == -1.0 ? __CLC_AS_GENTYPE((__CLC_ULONGN)NINFBITPATT_DP64) : result2; + return result2; +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_log1p(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_log1p(__CLC_CONVERT_FLOATN(x))); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_tables.cl b/libclc/clc/lib/generic/math/clc_tables.cl new file mode 100644 index 0000000000000..7ad005f91b575 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_tables.cl @@ -0,0 +1,183 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/internal/clc.h> +#include <clc/math/tables.h> + +DECLARE_TABLE(float, LOGE_TBL_LO, 129) = { + 0x0.000000p+0f, 0x1.fe0000p-8f, 0x1.fc0000p-7f, 0x1.7b8000p-6f, + 0x1.f82000p-6f, 0x1.39e000p-5f, 0x1.774000p-5f, 0x1.b42000p-5f, + 0x1.f0a000p-5f, 0x1.164000p-4f, 0x1.340000p-4f, 0x1.51a000p-4f, + 0x1.6f0000p-4f, 0x1.8c2000p-4f, 0x1.a92000p-4f, 0x1.c5e000p-4f, + 0x1.e26000p-4f, 0x1.fec000p-4f, 0x1.0d6000p-3f, 0x1.1b6000p-3f, + 0x1.294000p-3f, 0x1.370000p-3f, 0x1.44c000p-3f, 0x1.526000p-3f, + 0x1.5fe000p-3f, 0x1.6d6000p-3f, 0x1.7aa000p-3f, 0x1.87e000p-3f, + 0x1.952000p-3f, 0x1.a22000p-3f, 0x1.af2000p-3f, 0x1.bc2000p-3f, + 0x1.c8e000p-3f, 0x1.d5c000p-3f, 0x1.e26000p-3f, 0x1.ef0000p-3f, + 0x1.fb8000p-3f, 0x1.040000p-2f, 0x1.0a2000p-2f, 0x1.104000p-2f, + 0x1.166000p-2f, 0x1.1c8000p-2f, 0x1.228000p-2f, 0x1.288000p-2f, + 0x1.2e8000p-2f, 0x1.346000p-2f, 0x1.3a6000p-2f, 0x1.404000p-2f, + 0x1.460000p-2f, 0x1.4be000p-2f, 0x1.51a000p-2f, 0x1.576000p-2f, + 0x1.5d0000p-2f, 0x1.62c000p-2f, 0x1.686000p-2f, 0x1.6e0000p-2f, + 0x1.738000p-2f, 0x1.792000p-2f, 0x1.7ea000p-2f, 0x1.842000p-2f, + 0x1.89a000p-2f, 0x1.8f0000p-2f, 0x1.946000p-2f, 0x1.99c000p-2f, + 0x1.9f2000p-2f, 0x1.a48000p-2f, 0x1.a9c000p-2f, 0x1.af0000p-2f, + 0x1.b44000p-2f, 0x1.b98000p-2f, 0x1.bea000p-2f, 0x1.c3c000p-2f, + 0x1.c8e000p-2f, 0x1.ce0000p-2f, 0x1.d32000p-2f, 0x1.d82000p-2f, + 0x1.dd4000p-2f, 0x1.e24000p-2f, 0x1.e74000p-2f, 0x1.ec2000p-2f, + 0x1.f12000p-2f, 0x1.f60000p-2f, 0x1.fae000p-2f, 0x1.ffc000p-2f, + 0x1.024000p-1f, 0x1.04a000p-1f, 0x1.072000p-1f, 0x1.098000p-1f, + 0x1.0be000p-1f, 0x1.0e4000p-1f, 0x1.108000p-1f, 0x1.12e000p-1f, + 0x1.154000p-1f, 0x1.178000p-1f, 0x1.19e000p-1f, 0x1.1c2000p-1f, + 0x1.1e8000p-1f, 0x1.20c000p-1f, 0x1.230000p-1f, 0x1.254000p-1f, + 0x1.278000p-1f, 0x1.29c000p-1f, 0x1.2c0000p-1f, 0x1.2e4000p-1f, + 0x1.306000p-1f, 0x1.32a000p-1f, 0x1.34e000p-1f, 0x1.370000p-1f, + 0x1.392000p-1f, 0x1.3b6000p-1f, 0x1.3d8000p-1f, 0x1.3fa000p-1f, + 0x1.41c000p-1f, 0x1.43e000p-1f, 0x1.460000p-1f, 0x1.482000p-1f, + 0x1.4a4000p-1f, 0x1.4c6000p-1f, 0x1.4e6000p-1f, 0x1.508000p-1f, + 0x1.52a000p-1f, 0x1.54a000p-1f, 0x1.56a000p-1f, 0x1.58c000p-1f, + 0x1.5ac000p-1f, 0x1.5cc000p-1f, 0x1.5ee000p-1f, 0x1.60e000p-1f, + 0x1.62e000p-1f, +}; + +DECLARE_TABLE(float, LOGE_TBL_HI, 129) = { + 0x0.000000p+0f, 0x1.535882p-23f, 0x1.5161f8p-20f, 0x1.1b07d4p-18f, + 0x1.361cf0p-19f, 0x1.0f73fcp-18f, 0x1.63d8cap-19f, 0x1.bae232p-18f, + 0x1.86008ap-20f, 0x1.36eea2p-16f, 0x1.d7961ap-16f, 0x1.073f06p-16f, + 0x1.a515cap-17f, 0x1.45d630p-16f, 0x1.b4e92ap-18f, 0x1.523d6ep-18f, + 0x1.076e2ap-16f, 0x1.2263b6p-17f, 0x1.7e7cd0p-15f, 0x1.2ad52ep-15f, + 0x1.52f81ep-15f, 0x1.fc201ep-15f, 0x1.2b6ccap-15f, 0x1.cbc742p-16f, + 0x1.3070a6p-15f, 0x1.fce33ap-20f, 0x1.890210p-15f, 0x1.a06520p-15f, + 0x1.6a73d0p-17f, 0x1.bc1fe2p-15f, 0x1.c94e80p-15f, 0x1.0ce85ap-16f, + 0x1.f7c79ap-15f, 0x1.0b5a7cp-18f, 0x1.076e2ap-15f, 0x1.5b97b8p-16f, + 0x1.186d5ep-15f, 0x1.2ca5a6p-17f, 0x1.24e272p-14f, 0x1.8bf9aep-14f, + 0x1.5cabaap-14f, 0x1.3182d2p-15f, 0x1.41fbcep-14f, 0x1.5a13dep-14f, + 0x1.c575c2p-15f, 0x1.dd9a98p-14f, 0x1.3155a4p-16f, 0x1.843434p-17f, + 0x1.8bc21cp-14f, 0x1.7e55dcp-16f, 0x1.5b0e5ap-15f, 0x1.dc5d14p-16f, + 0x1.bdbf58p-14f, 0x1.05e572p-15f, 0x1.903d36p-15f, 0x1.1d5456p-15f, + 0x1.d7f6bap-14f, 0x1.4abfbap-15f, 0x1.f07704p-15f, 0x1.a3b43cp-15f, + 0x1.9c360ap-17f, 0x1.1e8736p-14f, 0x1.941c20p-14f, 0x1.958116p-14f, + 0x1.23ecbep-14f, 0x1.024396p-16f, 0x1.d93534p-15f, 0x1.293246p-14f, + 0x1.eef798p-15f, 0x1.625a4cp-16f, 0x1.4d9da6p-14f, 0x1.d7a7ccp-14f, + 0x1.f7c79ap-14f, 0x1.af0b84p-14f, 0x1.fcfc00p-15f, 0x1.e7258ap-14f, + 0x1.a81306p-16f, 0x1.1034f8p-15f, 0x1.09875ap-16f, 0x1.99d246p-14f, + 0x1.1ebf5ep-15f, 0x1.23fa70p-14f, 0x1.588f78p-14f, 0x1.2e0856p-14f, + 0x1.52a5a4p-13f, 0x1.df9da8p-13f, 0x1.f2e0e6p-16f, 0x1.bd3d5cp-15f, + 0x1.cb9094p-15f, 0x1.261746p-15f, 0x1.f39e2cp-13f, 0x1.719592p-13f, + 0x1.87a5e8p-14f, 0x1.eabbd8p-13f, 0x1.cd68cep-14f, 0x1.b81f70p-13f, + 0x1.7d79c0p-15f, 0x1.b9a324p-14f, 0x1.30d7bep-13f, 0x1.5bce98p-13f, + 0x1.5e1288p-13f, 0x1.37fec2p-13f, 0x1.d3da88p-14f, 0x1.d0db90p-15f, + 0x1.d7334ep-13f, 0x1.133912p-13f, 0x1.44ece6p-16f, 0x1.17b546p-13f, + 0x1.e0d356p-13f, 0x1.0893fep-14f, 0x1.026a70p-13f, 0x1.5b84d0p-13f, + 0x1.8fe846p-13f, 0x1.9fe2f8p-13f, 0x1.8bc21cp-13f, 0x1.53d1eap-13f, + 0x1.f0bb60p-14f, 0x1.e6bf32p-15f, 0x1.d811b6p-13f, 0x1.13cc00p-13f, + 0x1.6932dep-16f, 0x1.246798p-13f, 0x1.f9d5b2p-13f, 0x1.5b6b9ap-14f, + 0x1.404c34p-13f, 0x1.b1dc6cp-13f, 0x1.54920ap-20f, 0x1.97a23cp-16f, + 0x1.0bfbe8p-15f, +}; + +CLC_TABLE_FUNCTION(float, LOGE_TBL_LO, loge_tbl_lo); +CLC_TABLE_FUNCTION(float, LOGE_TBL_HI, loge_tbl_hi); + +DECLARE_TABLE(float, LOG_INV_TBL, 129) = { + 0x1.000000p+1f, 0x1.fc07f0p+0f, 0x1.f81f82p+0f, 0x1.f4465ap+0f, + 0x1.f07c20p+0f, 0x1.ecc07cp+0f, 0x1.e9131ap+0f, 0x1.e573acp+0f, + 0x1.e1e1e2p+0f, 0x1.de5d6ep+0f, 0x1.dae608p+0f, 0x1.d77b66p+0f, + 0x1.d41d42p+0f, 0x1.d0cb58p+0f, 0x1.cd8568p+0f, 0x1.ca4b30p+0f, + 0x1.c71c72p+0f, 0x1.c3f8f0p+0f, 0x1.c0e070p+0f, 0x1.bdd2b8p+0f, + 0x1.bacf92p+0f, 0x1.b7d6c4p+0f, 0x1.b4e81cp+0f, 0x1.b20364p+0f, + 0x1.af286cp+0f, 0x1.ac5702p+0f, 0x1.a98ef6p+0f, 0x1.a6d01ap+0f, + 0x1.a41a42p+0f, 0x1.a16d40p+0f, 0x1.9ec8eap+0f, 0x1.9c2d14p+0f, + 0x1.99999ap+0f, 0x1.970e50p+0f, 0x1.948b10p+0f, 0x1.920fb4p+0f, + 0x1.8f9c18p+0f, 0x1.8d3018p+0f, 0x1.8acb90p+0f, 0x1.886e60p+0f, + 0x1.861862p+0f, 0x1.83c978p+0f, 0x1.818182p+0f, 0x1.7f4060p+0f, + 0x1.7d05f4p+0f, 0x1.7ad220p+0f, 0x1.78a4c8p+0f, 0x1.767dcep+0f, + 0x1.745d18p+0f, 0x1.724288p+0f, 0x1.702e06p+0f, 0x1.6e1f76p+0f, + 0x1.6c16c2p+0f, 0x1.6a13cep+0f, 0x1.681682p+0f, 0x1.661ec6p+0f, + 0x1.642c86p+0f, 0x1.623fa8p+0f, 0x1.605816p+0f, 0x1.5e75bcp+0f, + 0x1.5c9882p+0f, 0x1.5ac056p+0f, 0x1.58ed24p+0f, 0x1.571ed4p+0f, + 0x1.555556p+0f, 0x1.539094p+0f, 0x1.51d07ep+0f, 0x1.501502p+0f, + 0x1.4e5e0ap+0f, 0x1.4cab88p+0f, 0x1.4afd6ap+0f, 0x1.49539ep+0f, + 0x1.47ae14p+0f, 0x1.460cbcp+0f, 0x1.446f86p+0f, 0x1.42d662p+0f, + 0x1.414142p+0f, 0x1.3fb014p+0f, 0x1.3e22ccp+0f, 0x1.3c995ap+0f, + 0x1.3b13b2p+0f, 0x1.3991c2p+0f, 0x1.381382p+0f, 0x1.3698e0p+0f, + 0x1.3521d0p+0f, 0x1.33ae46p+0f, 0x1.323e34p+0f, 0x1.30d190p+0f, + 0x1.2f684cp+0f, 0x1.2e025cp+0f, 0x1.2c9fb4p+0f, 0x1.2b404ap+0f, + 0x1.29e412p+0f, 0x1.288b02p+0f, 0x1.27350cp+0f, 0x1.25e228p+0f, + 0x1.24924ap+0f, 0x1.234568p+0f, 0x1.21fb78p+0f, 0x1.20b470p+0f, + 0x1.1f7048p+0f, 0x1.1e2ef4p+0f, 0x1.1cf06ap+0f, 0x1.1bb4a4p+0f, + 0x1.1a7b96p+0f, 0x1.194538p+0f, 0x1.181182p+0f, 0x1.16e068p+0f, + 0x1.15b1e6p+0f, 0x1.1485f0p+0f, 0x1.135c82p+0f, 0x1.12358ep+0f, + 0x1.111112p+0f, 0x1.0fef02p+0f, 0x1.0ecf56p+0f, 0x1.0db20ap+0f, + 0x1.0c9714p+0f, 0x1.0b7e6ep+0f, 0x1.0a6810p+0f, 0x1.0953f4p+0f, + 0x1.084210p+0f, 0x1.073260p+0f, 0x1.0624dep+0f, 0x1.051980p+0f, + 0x1.041042p+0f, 0x1.03091cp+0f, 0x1.020408p+0f, 0x1.010102p+0f, + 0x1.000000p+0f, +}; + +CLC_TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl); + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +DECLARE_TABLE(double, LN_TBL_LO, 65) = { + 0x0.0000000000000p+0, 0x1.fc0a800000000p-7, 0x1.f829800000000p-6, + 0x1.7745800000000p-5, 0x1.f0a3000000000p-5, 0x1.341d700000000p-4, + 0x1.6f0d200000000p-4, 0x1.a926d00000000p-4, 0x1.e270700000000p-4, + 0x1.0d77e00000000p-3, 0x1.2955280000000p-3, 0x1.44d2b00000000p-3, + 0x1.5ff3000000000p-3, 0x1.7ab8900000000p-3, 0x1.9525a80000000p-3, + 0x1.af3c900000000p-3, 0x1.c8ff780000000p-3, 0x1.e270700000000p-3, + 0x1.fb91800000000p-3, 0x1.0a324c0000000p-2, 0x1.1675c80000000p-2, + 0x1.22941c0000000p-2, 0x1.2e8e280000000p-2, 0x1.3a64c40000000p-2, + 0x1.4618bc0000000p-2, 0x1.51aad80000000p-2, 0x1.5d1bd80000000p-2, + 0x1.686c800000000p-2, 0x1.739d7c0000000p-2, 0x1.7eaf800000000p-2, + 0x1.89a3380000000p-2, 0x1.9479400000000p-2, 0x1.9f323c0000000p-2, + 0x1.a9cec80000000p-2, 0x1.b44f740000000p-2, 0x1.beb4d80000000p-2, + 0x1.c8ff7c0000000p-2, 0x1.d32fe40000000p-2, 0x1.dd46a00000000p-2, + 0x1.e744240000000p-2, 0x1.f128f40000000p-2, 0x1.faf5880000000p-2, + 0x1.02552a0000000p-1, 0x1.0723e40000000p-1, 0x1.0be72e0000000p-1, + 0x1.109f380000000p-1, 0x1.154c3c0000000p-1, 0x1.19ee6a0000000p-1, + 0x1.1e85f40000000p-1, 0x1.23130c0000000p-1, 0x1.2795e00000000p-1, + 0x1.2c0e9e0000000p-1, 0x1.307d720000000p-1, 0x1.34e2880000000p-1, + 0x1.393e0c0000000p-1, 0x1.3d90260000000p-1, 0x1.41d8fe0000000p-1, + 0x1.4618bc0000000p-1, 0x1.4a4f840000000p-1, 0x1.4e7d800000000p-1, + 0x1.52a2d20000000p-1, 0x1.56bf9c0000000p-1, 0x1.5ad4040000000p-1, + 0x1.5ee02a0000000p-1, 0x1.62e42e0000000p-1, +}; + +CLC_TABLE_FUNCTION(double, LN_TBL_LO, ln_tbl_lo); + +DECLARE_TABLE(double, LN_TBL_HI, 65) = { + 0x0.0000000000000p+0, 0x1.61f807c79f3dbp-28, 0x1.873c1980267c8p-25, + 0x1.ec65b9f88c69ep-26, 0x1.8022c54cc2f99p-26, 0x1.2c37a3a125330p-25, + 0x1.15cad69737c93p-25, 0x1.d256ab1b285e9p-27, 0x1.b8abcb97a7aa2p-26, + 0x1.f34239659a5dcp-25, 0x1.e07fd48d30177p-25, 0x1.b32df4799f4f6p-25, + 0x1.c29e4f4f21cf8p-25, 0x1.086c848df1b59p-30, 0x1.cf456b4764130p-27, + 0x1.3a02ffcb63398p-25, 0x1.1e6a6886b0976p-25, 0x1.b8abcb97a7aa2p-25, + 0x1.b578f8aa35552p-25, 0x1.139c871afb9fcp-25, 0x1.5d5d30701ce64p-25, + 0x1.de7bcb2d12142p-25, 0x1.d708e984e1664p-25, 0x1.56945e9c72f36p-26, + 0x1.0e2f613e85bdap-29, 0x1.cb7e0b42724f6p-28, 0x1.fac04e52846c7p-25, + 0x1.e9b14aec442bep-26, 0x1.b5de8034e7126p-25, 0x1.dc157e1b259d3p-25, + 0x1.b05096ad69c62p-28, 0x1.c2116faba4cddp-26, 0x1.65fcc25f95b47p-25, + 0x1.a9a08498d4850p-26, 0x1.de647b1465f77p-25, 0x1.da71b7bf7861dp-26, + 0x1.e6a6886b09760p-28, 0x1.f0075eab0ef64p-25, 0x1.3071282fb989bp-28, + 0x1.0eb43c3f1bed2p-25, 0x1.faf06ecb35c84p-26, 0x1.ef1e63db35f68p-27, + 0x1.69743fb1a71a5p-27, 0x1.c1cdf404e5796p-25, 0x1.094aa0ada625ep-27, + 0x1.e2d4c96fde3ecp-25, 0x1.2f4d5e9a98f34p-25, 0x1.467c96ecc5cbep-25, + 0x1.e7040d03dec5ap-25, 0x1.7bebf4282de36p-25, 0x1.289b11aeb783fp-25, + 0x1.a891d1772f538p-26, 0x1.34f10be1fb591p-25, 0x1.d9ce1d316eb93p-25, + 0x1.3562a19a9c442p-25, 0x1.4e2adf548084cp-26, 0x1.08ce55cc8c97ap-26, + 0x1.0e2f613e85bdap-28, 0x1.db03ebb0227bfp-25, 0x1.1b75bb09cb098p-25, + 0x1.96f16abb9df22p-27, 0x1.5b3f399411c62p-25, 0x1.86b3e59f65355p-26, + 0x1.2482ceae1ac12p-26, 0x1.efa39ef35793cp-25, +}; + +CLC_TABLE_FUNCTION(double, LN_TBL_HI, ln_tbl_hi); + +#endif // cl_khr_fp64 diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 15aafee79dfec..b17ac97af0c47 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -94,7 +94,6 @@ math/copysign.cl math/cos.cl math/cosh.cl math/cospi.cl -math/ep_log.cl math/erf.cl math/erfc.cl math/exp.cl diff --git a/libclc/generic/lib/math/acosh.cl b/libclc/generic/lib/math/acosh.cl index a855592601f66..2accecd59de7f 100644 --- a/libclc/generic/lib/math/acosh.cl +++ b/libclc/generic/lib/math/acosh.cl @@ -6,115 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "ep_log.h" #include <clc/clc.h> -#include <clc/clcmacro.h> -#include <clc/math/math.h> +#include <clc/math/clc_acosh.h> -_CLC_OVERLOAD _CLC_DEF float acosh(float x) { - uint ux = as_uint(x); - - // Arguments greater than 1/sqrt(epsilon) in magnitude are - // approximated by acosh(x) = ln(2) + ln(x) - // For 2.0 <= x <= 1/sqrt(epsilon) the approximation is - // acosh(x) = ln(x + sqrt(x*x-1)) */ - int high = ux > 0x46000000U; - int med = ux > 0x40000000U; - - float w = x - 1.0f; - float s = w*w + 2.0f*w; - float t = x*x - 1.0f; - float r = sqrt(med ? t : s) + (med ? x : w); - float v = (high ? x : r) - (med ? 1.0f : 0.0f); - float z = log1p(v) + (high ? 0x1.62e430p-1f : 0.0f); - - z = ux >= PINFBITPATT_SP32 ? x : z; - z = x < 1.0f ? as_float(QNANBITPATT_SP32) : z; - - return z; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, acosh, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double acosh(double x) { - const double recrteps = 0x1.6a09e667f3bcdp+26; // 1/sqrt(eps) = 9.49062656242515593767e+07 - //log2_lead and log2_tail sum to an extra-precise version of log(2) - const double log2_lead = 0x1.62e42ep-1; - const double log2_tail = 0x1.efa39ef35793cp-25; - - // Handle x >= 128 here - int xlarge = x > recrteps; - double r = x + sqrt(fma(x, x, -1.0)); - r = xlarge ? x : r; - - int xexp; - double r1, r2; - __clc_ep_log(r, &xexp, &r1, &r2); - - double dxexp = xexp + xlarge; - r1 = fma(dxexp, log2_lead, r1); - r2 = fma(dxexp, log2_tail, r2); - - double ret1 = r1 + r2; - - // Handle 1 < x < 128 here - // We compute the value - // t = x - 1.0 + sqrt(2.0*(x - 1.0) + (x - 1.0)*(x - 1.0)) - // using simulated quad precision. - double t = x - 1.0; - double u1 = t * 2.0; - - // (t,0) * (t,0) -> (v1, v2) - double v1 = t * t; - double v2 = fma(t, t, -v1); - - // (u1,0) + (v1,v2) -> (w1,w2) - r = u1 + v1; - double s = (((u1 - r) + v1) + v2); - double w1 = r + s; - double w2 = (r - w1) + s; - - // sqrt(w1,w2) -> (u1,u2) - double p1 = sqrt(w1); - double a1 = p1*p1; - double a2 = fma(p1, p1, -a1); - double temp = (((w1 - a1) - a2) + w2); - double p2 = MATH_DIVIDE(temp * 0.5, p1); - u1 = p1 + p2; - double u2 = (p1 - u1) + p2; - - // (u1,u2) + (t,0) -> (r1,r2) - r = u1 + t; - s = ((u1 - r) + t) + u2; - // r1 = r + s; - // r2 = (r - r1) + s; - // t = r1 + r2; - t = r + s; - - // For arguments 1.13 <= x <= 1.5 the log1p function is good enough - double ret2 = log1p(t); - - ulong ux = as_ulong(x); - double ret = x >= 128.0 ? ret1 : ret2; - - ret = ux >= 0x7FF0000000000000 ? x : ret; - ret = x == 1.0 ? 0.0 : ret; - ret = ((ux & SIGNBIT_DP64) != 0UL | x < 1.0) ? as_double(QNANBITPATT_DP64) : ret; - - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acosh, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_FP16(acosh) - -#endif +#undef __CLC_FUNCTION +#define __CLC_FUNCTION acosh +#include <clc/math/unary_builtin.inc> diff --git a/libclc/generic/lib/math/asinh.cl b/libclc/generic/lib/math/asinh.cl index 9890b6e3535f3..2e2eb57d148ab 100644 --- a/libclc/generic/lib/math/asinh.cl +++ b/libclc/generic/lib/math/asinh.cl @@ -6,281 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "ep_log.h" #include <clc/clc.h> -#include <clc/clcmacro.h> -#include <clc/math/math.h> +#include <clc/math/clc_asinh.h> -_CLC_OVERLOAD _CLC_DEF float asinh(float x) { - uint ux = as_uint(x); - uint ax = ux & EXSIGNBIT_SP32; - uint xsgn = ax ^ ux; - - // |x| <= 2 - float t = x * x; - float a = mad(t, - mad(t, - mad(t, - mad(t, -1.177198915954942694e-4f, -4.162727710583425360e-2f), - -5.063201055468483248e-1f), - -1.480204186473758321f), - -1.152965835871758072f); - float b = mad(t, - mad(t, - mad(t, - mad(t, 6.284381367285534560e-2f, 1.260024978680227945f), - 6.582362487198468066f), - 11.99423176003939087f), - 6.917795026025976739f); - - float q = MATH_DIVIDE(a, b); - float z1 = mad(x*t, q, x); - - // |x| > 2 - - // Arguments greater than 1/sqrt(epsilon) in magnitude are - // approximated by asinh(x) = ln(2) + ln(abs(x)), with sign of x - // Arguments such that 4.0 <= abs(x) <= 1/sqrt(epsilon) are - // approximated by asinhf(x) = ln(abs(x) + sqrt(x*x+1)) - // with the sign of x (see Abramowitz and Stegun 4.6.20) - - float absx = as_float(ax); - int hi = ax > 0x46000000U; - float y = MATH_SQRT(absx * absx + 1.0f) + absx; - y = hi ? absx : y; - float r = log(y) + (hi ? 0x1.62e430p-1f : 0.0f); - float z2 = as_float(xsgn | as_uint(r)); - - float z = ax <= 0x40000000 ? z1 : z2; - z = ax < 0x39800000U | ax >= PINFBITPATT_SP32 ? x : z; - - return z; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, asinh, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -#define NA0 -0.12845379283524906084997e0 -#define NA1 -0.21060688498409799700819e0 -#define NA2 -0.10188951822578188309186e0 -#define NA3 -0.13891765817243625541799e-1 -#define NA4 -0.10324604871728082428024e-3 - -#define DA0 0.77072275701149440164511e0 -#define DA1 0.16104665505597338100747e1 -#define DA2 0.11296034614816689554875e1 -#define DA3 0.30079351943799465092429e0 -#define DA4 0.235224464765951442265117e-1 - -#define NB0 -0.12186605129448852495563e0 -#define NB1 -0.19777978436593069928318e0 -#define NB2 -0.94379072395062374824320e-1 -#define NB3 -0.12620141363821680162036e-1 -#define NB4 -0.903396794842691998748349e-4 - -#define DB0 0.73119630776696495279434e0 -#define DB1 0.15157170446881616648338e1 -#define DB2 0.10524909506981282725413e1 -#define DB3 0.27663713103600182193817e0 -#define DB4 0.21263492900663656707646e-1 - -#define NC0 -0.81210026327726247622500e-1 -#define NC1 -0.12327355080668808750232e0 -#define NC2 -0.53704925162784720405664e-1 -#define NC3 -0.63106739048128554465450e-2 -#define NC4 -0.35326896180771371053534e-4 - -#define DC0 0.48726015805581794231182e0 -#define DC1 0.95890837357081041150936e0 -#define DC2 0.62322223426940387752480e0 -#define DC3 0.15028684818508081155141e0 -#define DC4 0.10302171620320141529445e-1 - -#define ND0 -0.4638179204422665073e-1 -#define ND1 -0.7162729496035415183e-1 -#define ND2 -0.3247795155696775148e-1 -#define ND3 -0.4225785421291932164e-2 -#define ND4 -0.3808984717603160127e-4 -#define ND5 0.8023464184964125826e-6 - -#define DD0 0.2782907534642231184e0 -#define DD1 0.5549945896829343308e0 -#define DD2 0.3700732511330698879e0 -#define DD3 0.9395783438240780722e-1 -#define DD4 0.7200057974217143034e-2 - -#define NE0 -0.121224194072430701e-4 -#define NE1 -0.273145455834305218e-3 -#define NE2 -0.152866982560895737e-2 -#define NE3 -0.292231744584913045e-2 -#define NE4 -0.174670900236060220e-2 -#define NE5 -0.891754209521081538e-12 - -#define DE0 0.499426632161317606e-4 -#define DE1 0.139591210395547054e-2 -#define DE2 0.107665231109108629e-1 -#define DE3 0.325809818749873406e-1 -#define DE4 0.415222526655158363e-1 -#define DE5 0.186315628774716763e-1 - -#define NF0 -0.195436610112717345e-4 -#define NF1 -0.233315515113382977e-3 -#define NF2 -0.645380957611087587e-3 -#define NF3 -0.478948863920281252e-3 -#define NF4 -0.805234112224091742e-12 -#define NF5 0.246428598194879283e-13 - -#define DF0 0.822166621698664729e-4 -#define DF1 0.135346265620413852e-2 -#define DF2 0.602739242861830658e-2 -#define DF3 0.972227795510722956e-2 -#define DF4 0.510878800983771167e-2 - -#define NG0 -0.209689451648100728e-6 -#define NG1 -0.219252358028695992e-5 -#define NG2 -0.551641756327550939e-5 -#define NG3 -0.382300259826830258e-5 -#define NG4 -0.421182121910667329e-17 -#define NG5 0.492236019998237684e-19 - -#define DG0 0.889178444424237735e-6 -#define DG1 0.131152171690011152e-4 -#define DG2 0.537955850185616847e-4 -#define DG3 0.814966175170941864e-4 -#define DG4 0.407786943832260752e-4 - -#define NH0 -0.178284193496441400e-6 -#define NH1 -0.928734186616614974e-6 -#define NH2 -0.923318925566302615e-6 -#define NH3 -0.776417026702577552e-19 -#define NH4 0.290845644810826014e-21 - -#define DH0 0.786694697277890964e-6 -#define DH1 0.685435665630965488e-5 -#define DH2 0.153780175436788329e-4 -#define DH3 0.984873520613417917e-5 - -#define NI0 -0.538003743384069117e-10 -#define NI1 -0.273698654196756169e-9 -#define NI2 -0.268129826956403568e-9 -#define NI3 -0.804163374628432850e-29 - -#define DI0 0.238083376363471960e-9 -#define DI1 0.203579344621125934e-8 -#define DI2 0.450836980450693209e-8 -#define DI3 0.286005148753497156e-8 - -_CLC_OVERLOAD _CLC_DEF double asinh(double x) { - const double rteps = 0x1.6a09e667f3bcdp-27; - const double recrteps = 0x1.6a09e667f3bcdp+26; - - // log2_lead and log2_tail sum to an extra-precise version of log(2) - const double log2_lead = 0x1.62e42ep-1; - const double log2_tail = 0x1.efa39ef35793cp-25; - - ulong ux = as_ulong(x); - ulong ax = ux & ~SIGNBIT_DP64; - double absx = as_double(ax); - - double t = x * x; - double pn, tn, pd, td; - - // XXX we are betting here that we can evaluate 8 pairs of - // polys faster than we can grab 12 coefficients from a table - // This also uses fewer registers - - // |x| >= 8 - pn = fma(t, fma(t, fma(t, NI3, NI2), NI1), NI0); - pd = fma(t, fma(t, fma(t, DI3, DI2), DI1), DI0); - - tn = fma(t, fma(t, fma(t, fma(t, NH4, NH3), NH2), NH1), NH0); - td = fma(t, fma(t, fma(t, DH3, DH2), DH1), DH0); - pn = absx < 8.0 ? tn : pn; - pd = absx < 8.0 ? td : pd; - - tn = fma(t, fma(t, fma(t, fma(t, fma(t, NG5, NG4), NG3), NG2), NG1), NG0); - td = fma(t, fma(t, fma(t, fma(t, DG4, DG3), DG2), DG1), DG0); - pn = absx < 4.0 ? tn : pn; - pd = absx < 4.0 ? td : pd; - - tn = fma(t, fma(t, fma(t, fma(t, fma(t, NF5, NF4), NF3), NF2), NF1), NF0); - td = fma(t, fma(t, fma(t, fma(t, DF4, DF3), DF2), DF1), DF0); - pn = absx < 2.0 ? tn : pn; - pd = absx < 2.0 ? td : pd; - - tn = fma(t, fma(t, fma(t, fma(t, fma(t, NE5, NE4), NE3), NE2), NE1), NE0); - td = fma(t, fma(t, fma(t, fma(t, fma(t, DE5, DE4), DE3), DE2), DE1), DE0); - pn = absx < 1.5 ? tn : pn; - pd = absx < 1.5 ? td : pd; - - tn = fma(t, fma(t, fma(t, fma(t, fma(t, ND5, ND4), ND3), ND2), ND1), ND0); - td = fma(t, fma(t, fma(t, fma(t, DD4, DD3), DD2), DD1), DD0); - pn = absx <= 1.0 ? tn : pn; - pd = absx <= 1.0 ? td : pd; - - tn = fma(t, fma(t, fma(t, fma(t, NC4, NC3), NC2), NC1), NC0); - td = fma(t, fma(t, fma(t, fma(t, DC4, DC3), DC2), DC1), DC0); - pn = absx < 0.75 ? tn : pn; - pd = absx < 0.75 ? td : pd; - - tn = fma(t, fma(t, fma(t, fma(t, NB4, NB3), NB2), NB1), NB0); - td = fma(t, fma(t, fma(t, fma(t, DB4, DB3), DB2), DB1), DB0); - pn = absx < 0.5 ? tn : pn; - pd = absx < 0.5 ? td : pd; - - tn = fma(t, fma(t, fma(t, fma(t, NA4, NA3), NA2), NA1), NA0); - td = fma(t, fma(t, fma(t, fma(t, DA4, DA3), DA2), DA1), DA0); - pn = absx < 0.25 ? tn : pn; - pd = absx < 0.25 ? td : pd; - - double pq = MATH_DIVIDE(pn, pd); - - // |x| <= 1 - double result1 = fma(absx*t, pq, absx); - - // Other ranges - int xout = absx <= 32.0 | absx > recrteps; - double y = absx + sqrt(fma(absx, absx, 1.0)); - y = xout ? absx : y; - - double r1, r2; - int xexp; - __clc_ep_log(y, &xexp, &r1, &r2); - - double dxexp = (double)(xexp + xout); - r1 = fma(dxexp, log2_lead, r1); - r2 = fma(dxexp, log2_tail, r2); - - // 1 < x <= 32 - double v2 = (pq + 0.25) / t; - double r = v2 + r1; - double s = ((r1 - r) + v2) + r2; - double v1 = r + s; - v2 = (r - v1) + s; - double result2 = v1 + v2; - - // x > 32 - double result3 = r1 + r2; - - double ret = absx > 1.0 ? result2 : result1; - ret = absx > 32.0 ? result3 : ret; - ret = x < 0.0 ? -ret : ret; - - // NaN, +-Inf, or x small enough that asinh(x) = x - ret = ax >= PINFBITPATT_DP64 | absx < rteps ? x : ret; - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinh, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_FP16(asinh) - -#endif +#undef __CLC_FUNCTION +#define __CLC_FUNCTION asinh +#include <clc/math/unary_builtin.inc> diff --git a/libclc/generic/lib/math/clc_pow.cl b/libclc/generic/lib/math/clc_pow.cl index afa598f5538d0..9205133bd7a24 100644 --- a/libclc/generic/lib/math/clc_pow.cl +++ b/libclc/generic/lib/math/clc_pow.cl @@ -115,7 +115,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) { const float LOG2_HEAD = 0x1.62e000p-1f; /* 0.693115234 */ const float LOG2_TAIL = 0x1.0bfbe8p-15f; /* 0.0000319461833 */ - tv = USE_TABLE(loge_tbl, indx); + tv.s0 = USE_TABLE(loge_tbl_lo, indx); + tv.s1 = USE_TABLE(loge_tbl_hi, indx); float lth = -r; float ltt = __clc_mad(mfn, LOG2_TAIL, -poly) + tv.s1; float lt = lth + ltt; diff --git a/libclc/generic/lib/math/clc_pown.cl b/libclc/generic/lib/math/clc_pown.cl index d2a9001193fd8..e6ea645121f87 100644 --- a/libclc/generic/lib/math/clc_pown.cl +++ b/libclc/generic/lib/math/clc_pown.cl @@ -113,7 +113,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) { const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234 const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833 - tv = USE_TABLE(loge_tbl, indx); + tv.s0 = USE_TABLE(loge_tbl_lo, indx); + tv.s1 = USE_TABLE(loge_tbl_hi, indx); float lth = -r; float ltt = __clc_mad(mfn, LOG2_TAIL, -poly) + tv.s1; float lt = lth + ltt; diff --git a/libclc/generic/lib/math/clc_powr.cl b/libclc/generic/lib/math/clc_powr.cl index a75279b45196d..428101cf78bc5 100644 --- a/libclc/generic/lib/math/clc_powr.cl +++ b/libclc/generic/lib/math/clc_powr.cl @@ -111,7 +111,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) { const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234 const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833 - tv = USE_TABLE(loge_tbl, indx); + tv.s0 = USE_TABLE(loge_tbl_lo, indx); + tv.s1 = USE_TABLE(loge_tbl_hi, indx); float lth = -r; float ltt = __clc_mad(mfn, LOG2_TAIL, -poly) + tv.s1; float lt = lth + ltt; diff --git a/libclc/generic/lib/math/clc_rootn.cl b/libclc/generic/lib/math/clc_rootn.cl index eaf934ec4f1b5..99fe539dad26d 100644 --- a/libclc/generic/lib/math/clc_rootn.cl +++ b/libclc/generic/lib/math/clc_rootn.cl @@ -113,7 +113,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) { const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234 const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833 - tv = USE_TABLE(loge_tbl, indx); + tv.s0 = USE_TABLE(loge_tbl_lo, indx); + tv.s1 = USE_TABLE(loge_tbl_hi, indx); float lth = -r; float ltt = __clc_mad(mfn, LOG2_TAIL, -poly) + tv.s1; float lt = lth + ltt; diff --git a/libclc/generic/lib/math/ep_log.cl b/libclc/generic/lib/math/ep_log.cl deleted file mode 100644 index 592421e7b1893..0000000000000 --- a/libclc/generic/lib/math/ep_log.cl +++ /dev/null @@ -1,80 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifdef cl_khr_fp64 - -#include "ep_log.h" -#include <clc/clc.h> -#include <clc/math/math.h> -#include <clc/math/tables.h> - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -#define LN0 8.33333333333317923934e-02 -#define LN1 1.25000000037717509602e-02 -#define LN2 2.23213998791944806202e-03 -#define LN3 4.34887777707614552256e-04 - -#define LF0 8.33333333333333593622e-02 -#define LF1 1.24999999978138668903e-02 -#define LF2 2.23219810758559851206e-03 - -_CLC_DEF void __clc_ep_log(double x, private int *xexp, private double *r1, - private double *r2) { - // Computes natural log(x). Algorithm based on: - // Ping-Tak Peter Tang - // "Table-driven implementation of the logarithm function in IEEE - // floating-point arithmetic" - // ACM Transactions on Mathematical Software (TOMS) - // Volume 16, Issue 4 (December 1990) - int near_one = x >= 0x1.e0faap-1 & x <= 0x1.1082cp+0; - - ulong ux = as_ulong(x); - ulong uxs = as_ulong(as_double(0x03d0000000000000UL | ux) - 0x1.0p-962); - int c = ux < IMPBIT_DP64; - ux = c ? uxs : ux; - int expadjust = c ? 60 : 0; - - // Store the exponent of x in xexp and put f into the range [0.5,1) - int xexp1 = ((as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64 - expadjust; - double f = as_double(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64)); - *xexp = near_one ? 0 : xexp1; - - double r = x - 1.0; - double u1 = MATH_DIVIDE(r, 2.0 + r); - double ru1 = -r * u1; - u1 = u1 + u1; - - int index = as_int2(ux).hi >> 13; - index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1); - - double f1 = index * 0x1.0p-7; - double f2 = f - f1; - double u2 = MATH_DIVIDE(f2, fma(0.5, f2, f1)); - - double2 tv = USE_TABLE(ln_tbl, (index - 64)); - double z1 = tv.s0; - double q = tv.s1; - - z1 = near_one ? r : z1; - q = near_one ? 0.0 : q; - double u = near_one ? u1 : u2; - double v = u * u; - - double cc = near_one ? ru1 : u2; - - double z21 = fma(v, fma(v, fma(v, LN3, LN2), LN1), LN0); - double z22 = fma(v, fma(v, LF2, LF1), LF0); - double z2 = near_one ? z21 : z22; - z2 = fma(u * v, z2, cc) + q; - - *r1 = z1; - *r2 = z2; -} - -#endif diff --git a/libclc/generic/lib/math/log1p.cl b/libclc/generic/lib/math/log1p.cl index abd785dd6e902..8db8c3c1950c5 100644 --- a/libclc/generic/lib/math/log1p.cl +++ b/libclc/generic/lib/math/log1p.cl @@ -7,164 +7,8 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> -#include <clc/clcmacro.h> -#include <clc/math/math.h> -#include <clc/math/tables.h> +#include <clc/math/clc_log1p.h> -_CLC_OVERLOAD _CLC_DEF float log1p(float x) -{ - float w = x; - uint ux = as_uint(x); - uint ax = ux & EXSIGNBIT_SP32; - - // |x| < 2^-4 - float u2 = MATH_DIVIDE(x, 2.0f + x); - float u = u2 + u2; - float v = u * u; - // 2/(5 * 2^5), 2/(3 * 2^3) - float zsmall = mad(-u2, x, mad(v, 0x1.99999ap-7f, 0x1.555556p-4f) * v * u) + x; - - // |x| >= 2^-4 - ux = as_uint(x + 1.0f); - - int m = (int)((ux >> EXPSHIFTBITS_SP32) & 0xff) - EXPBIAS_SP32; - float mf = (float)m; - uint indx = (ux & 0x007f0000) + ((ux & 0x00008000) << 1); - float F = as_float(indx | 0x3f000000); - - // x > 2^24 - float fg24 = F - as_float(0x3f000000 | (ux & MANTBITS_SP32)); - - // x <= 2^24 - uint xhi = ux & 0xffff8000; - float xh = as_float(xhi); - float xt = (1.0f - xh) + w; - uint xnm = ((~(xhi & 0x7f800000)) - 0x00800000) & 0x7f800000; - xt = xt * as_float(xnm) * 0.5f; - float fl24 = F - as_float(0x3f000000 | (xhi & MANTBITS_SP32)) - xt; - - float f = mf > 24.0f ? fg24 : fl24; - - indx = indx >> 16; - float r = f * USE_TABLE(log_inv_tbl, indx); - - // 1/3, 1/2 - float poly = mad(mad(r, 0x1.555556p-2f, 0x1.0p-1f), r*r, r); - - const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234 - const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833 - - float2 tv = USE_TABLE(loge_tbl, indx); - float z1 = mad(mf, LOG2_HEAD, tv.s0); - float z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1; - float z = z1 + z2; - - z = ax < 0x3d800000U ? zsmall : z; - - - - // Edge cases - z = ax >= PINFBITPATT_SP32 ? w : z; - z = w < -1.0f ? as_float(QNANBITPATT_SP32) : z; - z = w == -1.0f ? as_float(NINFBITPATT_SP32) : z; - //fix subnormals - z = ax < 0x33800000 ? x : z; - - return z; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log1p, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double log1p(double x) -{ - // Computes natural log(1+x). Algorithm based on: - // Ping-Tak Peter Tang - // "Table-driven implementation of the logarithm function in IEEE - // floating-point arithmetic" - // ACM Transactions on Mathematical Software (TOMS) - // Volume 16, Issue 4 (December 1990) - // Note that we use a lookup table of size 64 rather than 128, - // and compensate by having extra terms in the minimax polynomial - // for the kernel approximation. - - // Process Inside the threshold now - ulong ux = as_ulong(1.0 + x); - int xexp = ((as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64; - double f = as_double(ONEEXPBITS_DP64 | (ux & MANTBITS_DP64)); - - int j = as_int2(ux).hi >> 13; - j = ((0x80 | (j & 0x7e)) >> 1) + (j & 0x1); - double f1 = (double)j * 0x1.0p-6; - j -= 64; - - double f2temp = f - f1; - double m2 = as_double(convert_ulong(0x3ff - xexp) << EXPSHIFTBITS_DP64); - double f2l = fma(m2, x, m2 - f1); - double f2g = fma(m2, x, -f1) + m2; - double f2 = xexp <= MANTLENGTH_DP64-1 ? f2l : f2g; - f2 = (xexp <= -2) | (xexp >= MANTLENGTH_DP64+8) ? f2temp : f2; - - double2 tv = USE_TABLE(ln_tbl, j); - double z1 = tv.s0; - double q = tv.s1; - - double u = MATH_DIVIDE(f2, fma(0.5, f2, f1)); - double v = u * u; - - double poly = v * fma(v, - fma(v, 2.23219810758559851206e-03, 1.24999999978138668903e-02), - 8.33333333333333593622e-02); - - // log2_lead and log2_tail sum to an extra-precise version of log(2) - const double log2_lead = 6.93147122859954833984e-01; /* 0x3fe62e42e0000000 */ - const double log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */ - - double z2 = q + fma(u, poly, u); - double dxexp = (double)xexp; - double r1 = fma(dxexp, log2_lead, z1); - double r2 = fma(dxexp, log2_tail, z2); - double result1 = r1 + r2; - - // Process Outside the threshold now - double r = x; - u = r / (2.0 + r); - double correction = r * u; - u = u + u; - v = u * u; - r1 = r; - - poly = fma(v, - fma(v, - fma(v, 4.34887777707614552256e-04, 2.23213998791944806202e-03), - 1.25000000037717509602e-02), - 8.33333333333317923934e-02); - - r2 = fma(u*v, poly, -correction); - - // The values exp(-1/16)-1 and exp(1/16)-1 - const double log1p_thresh1 = -0x1.f0540438fd5c3p-5; - const double log1p_thresh2 = 0x1.082b577d34ed8p-4; - double result2 = r1 + r2; - result2 = x < log1p_thresh1 | x > log1p_thresh2 ? result1 : result2; - - result2 = isinf(x) ? x : result2; - result2 = x < -1.0 ? as_double(QNANBITPATT_DP64) : result2; - result2 = x == -1.0 ? as_double(NINFBITPATT_DP64) : result2; - return result2; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double); - -#endif // cl_khr_fp64 - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_FP16(log1p) - -#endif +#undef __CLC_FUNCTION +#define __CLC_FUNCTION log1p +#include <clc/math/unary_builtin.inc> diff --git a/libclc/generic/lib/math/tables.cl b/libclc/generic/lib/math/tables.cl index 869497386afbb..d7d3ba4aafec9 100644 --- a/libclc/generic/lib/math/tables.cl +++ b/libclc/generic/lib/math/tables.cl @@ -10,270 +10,6 @@ #include <clc/math/tables.h> -DECLARE_TABLE(float2, LOGE_TBL, 129) = { - (float2)(0x0.000000p+0f, 0x0.000000p+0f), - (float2)(0x1.fe0000p-8f, 0x1.535882p-23f), - (float2)(0x1.fc0000p-7f, 0x1.5161f8p-20f), - (float2)(0x1.7b8000p-6f, 0x1.1b07d4p-18f), - (float2)(0x1.f82000p-6f, 0x1.361cf0p-19f), - (float2)(0x1.39e000p-5f, 0x1.0f73fcp-18f), - (float2)(0x1.774000p-5f, 0x1.63d8cap-19f), - (float2)(0x1.b42000p-5f, 0x1.bae232p-18f), - (float2)(0x1.f0a000p-5f, 0x1.86008ap-20f), - (float2)(0x1.164000p-4f, 0x1.36eea2p-16f), - (float2)(0x1.340000p-4f, 0x1.d7961ap-16f), - (float2)(0x1.51a000p-4f, 0x1.073f06p-16f), - (float2)(0x1.6f0000p-4f, 0x1.a515cap-17f), - (float2)(0x1.8c2000p-4f, 0x1.45d630p-16f), - (float2)(0x1.a92000p-4f, 0x1.b4e92ap-18f), - (float2)(0x1.c5e000p-4f, 0x1.523d6ep-18f), - (float2)(0x1.e26000p-4f, 0x1.076e2ap-16f), - (float2)(0x1.fec000p-4f, 0x1.2263b6p-17f), - (float2)(0x1.0d6000p-3f, 0x1.7e7cd0p-15f), - (float2)(0x1.1b6000p-3f, 0x1.2ad52ep-15f), - (float2)(0x1.294000p-3f, 0x1.52f81ep-15f), - (float2)(0x1.370000p-3f, 0x1.fc201ep-15f), - (float2)(0x1.44c000p-3f, 0x1.2b6ccap-15f), - (float2)(0x1.526000p-3f, 0x1.cbc742p-16f), - (float2)(0x1.5fe000p-3f, 0x1.3070a6p-15f), - (float2)(0x1.6d6000p-3f, 0x1.fce33ap-20f), - (float2)(0x1.7aa000p-3f, 0x1.890210p-15f), - (float2)(0x1.87e000p-3f, 0x1.a06520p-15f), - (float2)(0x1.952000p-3f, 0x1.6a73d0p-17f), - (float2)(0x1.a22000p-3f, 0x1.bc1fe2p-15f), - (float2)(0x1.af2000p-3f, 0x1.c94e80p-15f), - (float2)(0x1.bc2000p-3f, 0x1.0ce85ap-16f), - (float2)(0x1.c8e000p-3f, 0x1.f7c79ap-15f), - (float2)(0x1.d5c000p-3f, 0x1.0b5a7cp-18f), - (float2)(0x1.e26000p-3f, 0x1.076e2ap-15f), - (float2)(0x1.ef0000p-3f, 0x1.5b97b8p-16f), - (float2)(0x1.fb8000p-3f, 0x1.186d5ep-15f), - (float2)(0x1.040000p-2f, 0x1.2ca5a6p-17f), - (float2)(0x1.0a2000p-2f, 0x1.24e272p-14f), - (float2)(0x1.104000p-2f, 0x1.8bf9aep-14f), - (float2)(0x1.166000p-2f, 0x1.5cabaap-14f), - (float2)(0x1.1c8000p-2f, 0x1.3182d2p-15f), - (float2)(0x1.228000p-2f, 0x1.41fbcep-14f), - (float2)(0x1.288000p-2f, 0x1.5a13dep-14f), - (float2)(0x1.2e8000p-2f, 0x1.c575c2p-15f), - (float2)(0x1.346000p-2f, 0x1.dd9a98p-14f), - (float2)(0x1.3a6000p-2f, 0x1.3155a4p-16f), - (float2)(0x1.404000p-2f, 0x1.843434p-17f), - (float2)(0x1.460000p-2f, 0x1.8bc21cp-14f), - (float2)(0x1.4be000p-2f, 0x1.7e55dcp-16f), - (float2)(0x1.51a000p-2f, 0x1.5b0e5ap-15f), - (float2)(0x1.576000p-2f, 0x1.dc5d14p-16f), - (float2)(0x1.5d0000p-2f, 0x1.bdbf58p-14f), - (float2)(0x1.62c000p-2f, 0x1.05e572p-15f), - (float2)(0x1.686000p-2f, 0x1.903d36p-15f), - (float2)(0x1.6e0000p-2f, 0x1.1d5456p-15f), - (float2)(0x1.738000p-2f, 0x1.d7f6bap-14f), - (float2)(0x1.792000p-2f, 0x1.4abfbap-15f), - (float2)(0x1.7ea000p-2f, 0x1.f07704p-15f), - (float2)(0x1.842000p-2f, 0x1.a3b43cp-15f), - (float2)(0x1.89a000p-2f, 0x1.9c360ap-17f), - (float2)(0x1.8f0000p-2f, 0x1.1e8736p-14f), - (float2)(0x1.946000p-2f, 0x1.941c20p-14f), - (float2)(0x1.99c000p-2f, 0x1.958116p-14f), - (float2)(0x1.9f2000p-2f, 0x1.23ecbep-14f), - (float2)(0x1.a48000p-2f, 0x1.024396p-16f), - (float2)(0x1.a9c000p-2f, 0x1.d93534p-15f), - (float2)(0x1.af0000p-2f, 0x1.293246p-14f), - (float2)(0x1.b44000p-2f, 0x1.eef798p-15f), - (float2)(0x1.b98000p-2f, 0x1.625a4cp-16f), - (float2)(0x1.bea000p-2f, 0x1.4d9da6p-14f), - (float2)(0x1.c3c000p-2f, 0x1.d7a7ccp-14f), - (float2)(0x1.c8e000p-2f, 0x1.f7c79ap-14f), - (float2)(0x1.ce0000p-2f, 0x1.af0b84p-14f), - (float2)(0x1.d32000p-2f, 0x1.fcfc00p-15f), - (float2)(0x1.d82000p-2f, 0x1.e7258ap-14f), - (float2)(0x1.dd4000p-2f, 0x1.a81306p-16f), - (float2)(0x1.e24000p-2f, 0x1.1034f8p-15f), - (float2)(0x1.e74000p-2f, 0x1.09875ap-16f), - (float2)(0x1.ec2000p-2f, 0x1.99d246p-14f), - (float2)(0x1.f12000p-2f, 0x1.1ebf5ep-15f), - (float2)(0x1.f60000p-2f, 0x1.23fa70p-14f), - (float2)(0x1.fae000p-2f, 0x1.588f78p-14f), - (float2)(0x1.ffc000p-2f, 0x1.2e0856p-14f), - (float2)(0x1.024000p-1f, 0x1.52a5a4p-13f), - (float2)(0x1.04a000p-1f, 0x1.df9da8p-13f), - (float2)(0x1.072000p-1f, 0x1.f2e0e6p-16f), - (float2)(0x1.098000p-1f, 0x1.bd3d5cp-15f), - (float2)(0x1.0be000p-1f, 0x1.cb9094p-15f), - (float2)(0x1.0e4000p-1f, 0x1.261746p-15f), - (float2)(0x1.108000p-1f, 0x1.f39e2cp-13f), - (float2)(0x1.12e000p-1f, 0x1.719592p-13f), - (float2)(0x1.154000p-1f, 0x1.87a5e8p-14f), - (float2)(0x1.178000p-1f, 0x1.eabbd8p-13f), - (float2)(0x1.19e000p-1f, 0x1.cd68cep-14f), - (float2)(0x1.1c2000p-1f, 0x1.b81f70p-13f), - (float2)(0x1.1e8000p-1f, 0x1.7d79c0p-15f), - (float2)(0x1.20c000p-1f, 0x1.b9a324p-14f), - (float2)(0x1.230000p-1f, 0x1.30d7bep-13f), - (float2)(0x1.254000p-1f, 0x1.5bce98p-13f), - (float2)(0x1.278000p-1f, 0x1.5e1288p-13f), - (float2)(0x1.29c000p-1f, 0x1.37fec2p-13f), - (float2)(0x1.2c0000p-1f, 0x1.d3da88p-14f), - (float2)(0x1.2e4000p-1f, 0x1.d0db90p-15f), - (float2)(0x1.306000p-1f, 0x1.d7334ep-13f), - (float2)(0x1.32a000p-1f, 0x1.133912p-13f), - (float2)(0x1.34e000p-1f, 0x1.44ece6p-16f), - (float2)(0x1.370000p-1f, 0x1.17b546p-13f), - (float2)(0x1.392000p-1f, 0x1.e0d356p-13f), - (float2)(0x1.3b6000p-1f, 0x1.0893fep-14f), - (float2)(0x1.3d8000p-1f, 0x1.026a70p-13f), - (float2)(0x1.3fa000p-1f, 0x1.5b84d0p-13f), - (float2)(0x1.41c000p-1f, 0x1.8fe846p-13f), - (float2)(0x1.43e000p-1f, 0x1.9fe2f8p-13f), - (float2)(0x1.460000p-1f, 0x1.8bc21cp-13f), - (float2)(0x1.482000p-1f, 0x1.53d1eap-13f), - (float2)(0x1.4a4000p-1f, 0x1.f0bb60p-14f), - (float2)(0x1.4c6000p-1f, 0x1.e6bf32p-15f), - (float2)(0x1.4e6000p-1f, 0x1.d811b6p-13f), - (float2)(0x1.508000p-1f, 0x1.13cc00p-13f), - (float2)(0x1.52a000p-1f, 0x1.6932dep-16f), - (float2)(0x1.54a000p-1f, 0x1.246798p-13f), - (float2)(0x1.56a000p-1f, 0x1.f9d5b2p-13f), - (float2)(0x1.58c000p-1f, 0x1.5b6b9ap-14f), - (float2)(0x1.5ac000p-1f, 0x1.404c34p-13f), - (float2)(0x1.5cc000p-1f, 0x1.b1dc6cp-13f), - (float2)(0x1.5ee000p-1f, 0x1.54920ap-20f), - (float2)(0x1.60e000p-1f, 0x1.97a23cp-16f), - (float2)(0x1.62e000p-1f, 0x1.0bfbe8p-15f), -}; - -DECLARE_TABLE(float, LOG_INV_TBL, 129) = { - 0x1.000000p+1f, - 0x1.fc07f0p+0f, - 0x1.f81f82p+0f, - 0x1.f4465ap+0f, - 0x1.f07c20p+0f, - 0x1.ecc07cp+0f, - 0x1.e9131ap+0f, - 0x1.e573acp+0f, - 0x1.e1e1e2p+0f, - 0x1.de5d6ep+0f, - 0x1.dae608p+0f, - 0x1.d77b66p+0f, - 0x1.d41d42p+0f, - 0x1.d0cb58p+0f, - 0x1.cd8568p+0f, - 0x1.ca4b30p+0f, - 0x1.c71c72p+0f, - 0x1.c3f8f0p+0f, - 0x1.c0e070p+0f, - 0x1.bdd2b8p+0f, - 0x1.bacf92p+0f, - 0x1.b7d6c4p+0f, - 0x1.b4e81cp+0f, - 0x1.b20364p+0f, - 0x1.af286cp+0f, - 0x1.ac5702p+0f, - 0x1.a98ef6p+0f, - 0x1.a6d01ap+0f, - 0x1.a41a42p+0f, - 0x1.a16d40p+0f, - 0x1.9ec8eap+0f, - 0x1.9c2d14p+0f, - 0x1.99999ap+0f, - 0x1.970e50p+0f, - 0x1.948b10p+0f, - 0x1.920fb4p+0f, - 0x1.8f9c18p+0f, - 0x1.8d3018p+0f, - 0x1.8acb90p+0f, - 0x1.886e60p+0f, - 0x1.861862p+0f, - 0x1.83c978p+0f, - 0x1.818182p+0f, - 0x1.7f4060p+0f, - 0x1.7d05f4p+0f, - 0x1.7ad220p+0f, - 0x1.78a4c8p+0f, - 0x1.767dcep+0f, - 0x1.745d18p+0f, - 0x1.724288p+0f, - 0x1.702e06p+0f, - 0x1.6e1f76p+0f, - 0x1.6c16c2p+0f, - 0x1.6a13cep+0f, - 0x1.681682p+0f, - 0x1.661ec6p+0f, - 0x1.642c86p+0f, - 0x1.623fa8p+0f, - 0x1.605816p+0f, - 0x1.5e75bcp+0f, - 0x1.5c9882p+0f, - 0x1.5ac056p+0f, - 0x1.58ed24p+0f, - 0x1.571ed4p+0f, - 0x1.555556p+0f, - 0x1.539094p+0f, - 0x1.51d07ep+0f, - 0x1.501502p+0f, - 0x1.4e5e0ap+0f, - 0x1.4cab88p+0f, - 0x1.4afd6ap+0f, - 0x1.49539ep+0f, - 0x1.47ae14p+0f, - 0x1.460cbcp+0f, - 0x1.446f86p+0f, - 0x1.42d662p+0f, - 0x1.414142p+0f, - 0x1.3fb014p+0f, - 0x1.3e22ccp+0f, - 0x1.3c995ap+0f, - 0x1.3b13b2p+0f, - 0x1.3991c2p+0f, - 0x1.381382p+0f, - 0x1.3698e0p+0f, - 0x1.3521d0p+0f, - 0x1.33ae46p+0f, - 0x1.323e34p+0f, - 0x1.30d190p+0f, - 0x1.2f684cp+0f, - 0x1.2e025cp+0f, - 0x1.2c9fb4p+0f, - 0x1.2b404ap+0f, - 0x1.29e412p+0f, - 0x1.288b02p+0f, - 0x1.27350cp+0f, - 0x1.25e228p+0f, - 0x1.24924ap+0f, - 0x1.234568p+0f, - 0x1.21fb78p+0f, - 0x1.20b470p+0f, - 0x1.1f7048p+0f, - 0x1.1e2ef4p+0f, - 0x1.1cf06ap+0f, - 0x1.1bb4a4p+0f, - 0x1.1a7b96p+0f, - 0x1.194538p+0f, - 0x1.181182p+0f, - 0x1.16e068p+0f, - 0x1.15b1e6p+0f, - 0x1.1485f0p+0f, - 0x1.135c82p+0f, - 0x1.12358ep+0f, - 0x1.111112p+0f, - 0x1.0fef02p+0f, - 0x1.0ecf56p+0f, - 0x1.0db20ap+0f, - 0x1.0c9714p+0f, - 0x1.0b7e6ep+0f, - 0x1.0a6810p+0f, - 0x1.0953f4p+0f, - 0x1.084210p+0f, - 0x1.073260p+0f, - 0x1.0624dep+0f, - 0x1.051980p+0f, - 0x1.041042p+0f, - 0x1.03091cp+0f, - 0x1.020408p+0f, - 0x1.010102p+0f, - 0x1.000000p+0f, -}; - DECLARE_TABLE(float2, LOG_INV_TBL_EP, 129) = { (float2)(0x1.000000p+1f, 0x0.000000p+0f), (float2)(0x1.fc0000p+0f, 0x1.fc07f0p-14f), @@ -994,8 +730,6 @@ DECLARE_TABLE(float2, EXP_TBL_EP, 65) = { (float2) (0x1.000000p+1f, 0x0.000000p+0f), }; -TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl); -TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl); TABLE_FUNCTION(float2, LOG_INV_TBL_EP, log_inv_tbl_ep); TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl); TABLE_FUNCTION(float2, LOG10_TBL, log10_tbl); @@ -1011,75 +745,6 @@ TABLE_FUNCTION(float2, EXP_TBL_EP, exp_tbl_ep); #ifdef cl_khr_fp64 -DECLARE_TABLE(double2, LN_TBL, 65) = { - (double2)(0x0.0000000000000p+0, 0x0.0000000000000p+0), - (double2)(0x1.fc0a800000000p-7, 0x1.61f807c79f3dbp-28), - (double2)(0x1.f829800000000p-6, 0x1.873c1980267c8p-25), - (double2)(0x1.7745800000000p-5, 0x1.ec65b9f88c69ep-26), - (double2)(0x1.f0a3000000000p-5, 0x1.8022c54cc2f99p-26), - (double2)(0x1.341d700000000p-4, 0x1.2c37a3a125330p-25), - (double2)(0x1.6f0d200000000p-4, 0x1.15cad69737c93p-25), - (double2)(0x1.a926d00000000p-4, 0x1.d256ab1b285e9p-27), - (double2)(0x1.e270700000000p-4, 0x1.b8abcb97a7aa2p-26), - (double2)(0x1.0d77e00000000p-3, 0x1.f34239659a5dcp-25), - (double2)(0x1.2955280000000p-3, 0x1.e07fd48d30177p-25), - (double2)(0x1.44d2b00000000p-3, 0x1.b32df4799f4f6p-25), - (double2)(0x1.5ff3000000000p-3, 0x1.c29e4f4f21cf8p-25), - (double2)(0x1.7ab8900000000p-3, 0x1.086c848df1b59p-30), - (double2)(0x1.9525a80000000p-3, 0x1.cf456b4764130p-27), - (double2)(0x1.af3c900000000p-3, 0x1.3a02ffcb63398p-25), - (double2)(0x1.c8ff780000000p-3, 0x1.1e6a6886b0976p-25), - (double2)(0x1.e270700000000p-3, 0x1.b8abcb97a7aa2p-25), - (double2)(0x1.fb91800000000p-3, 0x1.b578f8aa35552p-25), - (double2)(0x1.0a324c0000000p-2, 0x1.139c871afb9fcp-25), - (double2)(0x1.1675c80000000p-2, 0x1.5d5d30701ce64p-25), - (double2)(0x1.22941c0000000p-2, 0x1.de7bcb2d12142p-25), - (double2)(0x1.2e8e280000000p-2, 0x1.d708e984e1664p-25), - (double2)(0x1.3a64c40000000p-2, 0x1.56945e9c72f36p-26), - (double2)(0x1.4618bc0000000p-2, 0x1.0e2f613e85bdap-29), - (double2)(0x1.51aad80000000p-2, 0x1.cb7e0b42724f6p-28), - (double2)(0x1.5d1bd80000000p-2, 0x1.fac04e52846c7p-25), - (double2)(0x1.686c800000000p-2, 0x1.e9b14aec442bep-26), - (double2)(0x1.739d7c0000000p-2, 0x1.b5de8034e7126p-25), - (double2)(0x1.7eaf800000000p-2, 0x1.dc157e1b259d3p-25), - (double2)(0x1.89a3380000000p-2, 0x1.b05096ad69c62p-28), - (double2)(0x1.9479400000000p-2, 0x1.c2116faba4cddp-26), - (double2)(0x1.9f323c0000000p-2, 0x1.65fcc25f95b47p-25), - (double2)(0x1.a9cec80000000p-2, 0x1.a9a08498d4850p-26), - (double2)(0x1.b44f740000000p-2, 0x1.de647b1465f77p-25), - (double2)(0x1.beb4d80000000p-2, 0x1.da71b7bf7861dp-26), - (double2)(0x1.c8ff7c0000000p-2, 0x1.e6a6886b09760p-28), - (double2)(0x1.d32fe40000000p-2, 0x1.f0075eab0ef64p-25), - (double2)(0x1.dd46a00000000p-2, 0x1.3071282fb989bp-28), - (double2)(0x1.e744240000000p-2, 0x1.0eb43c3f1bed2p-25), - (double2)(0x1.f128f40000000p-2, 0x1.faf06ecb35c84p-26), - (double2)(0x1.faf5880000000p-2, 0x1.ef1e63db35f68p-27), - (double2)(0x1.02552a0000000p-1, 0x1.69743fb1a71a5p-27), - (double2)(0x1.0723e40000000p-1, 0x1.c1cdf404e5796p-25), - (double2)(0x1.0be72e0000000p-1, 0x1.094aa0ada625ep-27), - (double2)(0x1.109f380000000p-1, 0x1.e2d4c96fde3ecp-25), - (double2)(0x1.154c3c0000000p-1, 0x1.2f4d5e9a98f34p-25), - (double2)(0x1.19ee6a0000000p-1, 0x1.467c96ecc5cbep-25), - (double2)(0x1.1e85f40000000p-1, 0x1.e7040d03dec5ap-25), - (double2)(0x1.23130c0000000p-1, 0x1.7bebf4282de36p-25), - (double2)(0x1.2795e00000000p-1, 0x1.289b11aeb783fp-25), - (double2)(0x1.2c0e9e0000000p-1, 0x1.a891d1772f538p-26), - (double2)(0x1.307d720000000p-1, 0x1.34f10be1fb591p-25), - (double2)(0x1.34e2880000000p-1, 0x1.d9ce1d316eb93p-25), - (double2)(0x1.393e0c0000000p-1, 0x1.3562a19a9c442p-25), - (double2)(0x1.3d90260000000p-1, 0x1.4e2adf548084cp-26), - (double2)(0x1.41d8fe0000000p-1, 0x1.08ce55cc8c97ap-26), - (double2)(0x1.4618bc0000000p-1, 0x1.0e2f613e85bdap-28), - (double2)(0x1.4a4f840000000p-1, 0x1.db03ebb0227bfp-25), - (double2)(0x1.4e7d800000000p-1, 0x1.1b75bb09cb098p-25), - (double2)(0x1.52a2d20000000p-1, 0x1.96f16abb9df22p-27), - (double2)(0x1.56bf9c0000000p-1, 0x1.5b3f399411c62p-25), - (double2)(0x1.5ad4040000000p-1, 0x1.86b3e59f65355p-26), - (double2)(0x1.5ee02a0000000p-1, 0x1.2482ceae1ac12p-26), - (double2)(0x1.62e42e0000000p-1, 0x1.efa39ef35793cp-25), -}; - -TABLE_FUNCTION(double2, LN_TBL, ln_tbl); // Arrays atan_jby256_lead and atan_jby256_tail contain diff --git a/libclc/spirv/lib/SOURCES b/libclc/spirv/lib/SOURCES index 8378587d52bf3..e39c95a985fff 100644 --- a/libclc/spirv/lib/SOURCES +++ b/libclc/spirv/lib/SOURCES @@ -32,7 +32,6 @@ subnormal_config.cl ../../generic/lib/math/cos.cl ../../generic/lib/math/cosh.cl ../../generic/lib/math/cospi.cl -../../generic/lib/math/ep_log.cl ../../generic/lib/math/erf.cl ../../generic/lib/math/erfc.cl ../../generic/lib/math/exp.cl _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits