Author: Fraser Cormack Date: 2025-04-01T12:03:21+01:00 New Revision: 13a313fe582a3c41fb5c50ca2325c0987c0af6d7
URL: https://github.com/llvm/llvm-project/commit/13a313fe582a3c41fb5c50ca2325c0987c0af6d7 DIFF: https://github.com/llvm/llvm-project/commit/13a313fe582a3c41fb5c50ca2325c0987c0af6d7.diff LOG: [libclc] Move sinpi/cospi/tanpi to the CLC library (#133889) Additionally, these builtins are now vectorized. This also moves the native_recip and native_divide builtins as they are used by the tanpi builtin. Added: libclc/clc/include/clc/math/clc_cospi.h libclc/clc/include/clc/math/clc_native_divide.h libclc/clc/include/clc/math/clc_native_recip.h libclc/clc/include/clc/math/clc_sincos_piby4.h libclc/clc/include/clc/math/clc_sincos_piby4.inc libclc/clc/include/clc/math/clc_sinpi.h libclc/clc/include/clc/math/clc_tanpi.h libclc/clc/lib/generic/math/clc_cospi.cl libclc/clc/lib/generic/math/clc_cospi.inc libclc/clc/lib/generic/math/clc_native_divide.cl libclc/clc/lib/generic/math/clc_native_divide.inc libclc/clc/lib/generic/math/clc_native_recip.cl libclc/clc/lib/generic/math/clc_native_recip.inc libclc/clc/lib/generic/math/clc_sinpi.cl libclc/clc/lib/generic/math/clc_sinpi.inc libclc/clc/lib/generic/math/clc_tanpi.cl libclc/clc/lib/generic/math/clc_tanpi.inc Modified: libclc/CMakeLists.txt libclc/clc/include/clc/math/clc_sincos_helpers.inc libclc/clc/lib/generic/SOURCES libclc/clc/lib/generic/math/clc_sincos_helpers.cl libclc/clc/lib/generic/math/clc_sincos_helpers.inc libclc/clspv/lib/SOURCES libclc/generic/lib/SOURCES libclc/generic/lib/math/clc_tan.cl libclc/generic/lib/math/cospi.cl libclc/generic/lib/math/native_divide.cl libclc/generic/lib/math/native_recip.cl libclc/generic/lib/math/sincos_helpers.cl libclc/generic/lib/math/sincos_helpers.h libclc/generic/lib/math/sinpi.cl libclc/generic/lib/math/tanpi.cl libclc/spirv/lib/SOURCES Removed: libclc/generic/lib/math/clc_tanpi.cl libclc/generic/lib/math/native_divide.inc libclc/generic/lib/math/native_recip.inc libclc/generic/lib/math/sincosD_piby4.h libclc/generic/lib/math/sincospiF_piby4.h ################################################################################ diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index efe7f5804e8fb..d4753b22ed01c 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -265,11 +265,13 @@ endif() set_source_files_properties( # CLC builtins ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_cos.cl + ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_divide.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_exp2.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_exp.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log10.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log2.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log.cl + ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_recip.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_rsqrt.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_sin.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_sqrt.cl diff --git a/libclc/clc/include/clc/math/clc_cospi.h b/libclc/clc/include/clc/math/clc_cospi.h new file mode 100644 index 0000000000000..07565c23a2f07 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_cospi.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_COSPI_H__ +#define __CLC_MATH_CLC_COSPI_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_cospi + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_COSPI_H__ diff --git a/libclc/clc/include/clc/math/clc_native_divide.h b/libclc/clc/include/clc/math/clc_native_divide.h new file mode 100644 index 0000000000000..b48c3e5d03b36 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_native_divide.h @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_NATIVE_DIVIDE_H__ +#define __CLC_MATH_CLC_NATIVE_DIVIDE_H__ + +#define __FLOAT_ONLY +#define __CLC_FUNCTION __clc_native_divide +#define __CLC_BODY <clc/shared/binary_decl.inc> + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION +#undef __FLOAT_ONLY + +#endif // __CLC_MATH_CLC_NATIVE_DIVIDE_H__ diff --git a/libclc/clc/include/clc/math/clc_native_recip.h b/libclc/clc/include/clc/math/clc_native_recip.h new file mode 100644 index 0000000000000..9af36b0c7ce85 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_native_recip.h @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_NATIVE_RECIP_H__ +#define __CLC_MATH_CLC_NATIVE_RECIP_H__ + +#define __FLOAT_ONLY +#define __CLC_FUNCTION __clc_native_recip +#define __CLC_BODY <clc/shared/unary_decl.inc> + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION +#undef __FLOAT_ONLY + +#endif // __CLC_MATH_CLC_NATIVE_RECIP_H__ diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers.inc b/libclc/clc/include/clc/math/clc_sincos_helpers.inc index c891dd91dfd2b..4daff92955cd7 100644 --- a/libclc/clc/include/clc/math/clc_sincos_helpers.inc +++ b/libclc/clc/include/clc/math/clc_sincos_helpers.inc @@ -10,6 +10,8 @@ _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x, __CLC_FLOATN y); _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x, __CLC_FLOATN y); +_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x, + __CLC_INTN regn); _CLC_DECL _CLC_OVERLOAD __CLC_INTN __clc_argReductionS(private __CLC_FLOATN *r, private __CLC_FLOATN *rr, diff --git a/libclc/clc/include/clc/math/clc_sincos_piby4.h b/libclc/clc/include/clc/math/clc_sincos_piby4.h new file mode 100644 index 0000000000000..50608ae24e947 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_sincos_piby4.h @@ -0,0 +1,14 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/math/clc_fma.h> +#include <clc/math/clc_mad.h> +#include <clc/math/math.h> + +#define __CLC_BODY <clc/math/clc_sincos_piby4.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/include/clc/math/clc_sincos_piby4.inc b/libclc/clc/include/clc/math/clc_sincos_piby4.inc new file mode 100644 index 0000000000000..91ec518b70e97 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_sincos_piby4.inc @@ -0,0 +1,174 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4] +_CLC_INLINE _CLC_OVERLOAD void +__clc_sincos_piby4(__CLC_GENTYPE x, private __CLC_GENTYPE *sinval, + private __CLC_GENTYPE *cosval) { + // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... + // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... + // = x * f(w) + // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... + // We use a minimax approximation of (f(w) - 1) / w + // because this produces an expansion in even powers of x. + + // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... + // = f(w) + // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... + // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) + // because this produces an expansion in even powers of x. + + const __CLC_GENTYPE sc1 = -0.166666666638608441788607926e0F; + const __CLC_GENTYPE sc2 = 0.833333187633086262120839299e-2F; + const __CLC_GENTYPE sc3 = -0.198400874359527693921333720e-3F; + const __CLC_GENTYPE sc4 = 0.272500015145584081596826911e-5F; + + const __CLC_GENTYPE cc1 = 0.41666666664325175238031e-1F; + const __CLC_GENTYPE cc2 = -0.13888887673175665567647e-2F; + const __CLC_GENTYPE cc3 = 0.24800600878112441958053e-4F; + const __CLC_GENTYPE cc4 = -0.27301013343179832472841e-6F; + + __CLC_GENTYPE x2 = x * x; + + *sinval = __clc_mad( + x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1), + x); + *cosval = __clc_mad( + x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1), + __clc_mad(x2, -0.5f, 1.0f)); +} + +#elif __CLC_FPSIZE == 64 + +_CLC_INLINE _CLC_OVERLOAD void +__clc_sincos_piby4(__CLC_GENTYPE x, __CLC_GENTYPE xx, + private __CLC_GENTYPE *sinval, + private __CLC_GENTYPE *cosval) { + // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... + // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... + // = x * f(w) + // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... + // We use a minimax approximation of (f(w) - 1) / w + // because this produces an expansion in even powers of x. + // If xx (the tail of x) is non-zero, we add a correction + // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx) + // is an approximation to cos(x)*sin(xx) valid because + // xx is tiny relative to x. + + // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... + // = f(w) + // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... + // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) + // because this produces an expansion in even powers of x. + // If xx (the tail of x) is non-zero, we subtract a correction + // term g(x,xx) = x*xx to the result, where g(x,xx) + // is an approximation to sin(x)*sin(xx) valid because + // xx is tiny relative to x. + + const __CLC_GENTYPE sc1 = -0.166666666666666646259241729; + const __CLC_GENTYPE sc2 = 0.833333333333095043065222816e-2; + const __CLC_GENTYPE sc3 = -0.19841269836761125688538679e-3; + const __CLC_GENTYPE sc4 = 0.275573161037288022676895908448e-5; + const __CLC_GENTYPE sc5 = -0.25051132068021699772257377197e-7; + const __CLC_GENTYPE sc6 = 0.159181443044859136852668200e-9; + + const __CLC_GENTYPE cc1 = 0.41666666666666665390037e-1; + const __CLC_GENTYPE cc2 = -0.13888888888887398280412e-2; + const __CLC_GENTYPE cc3 = 0.248015872987670414957399e-4; + const __CLC_GENTYPE cc4 = -0.275573172723441909470836e-6; + const __CLC_GENTYPE cc5 = 0.208761463822329611076335e-8; + const __CLC_GENTYPE cc6 = -0.113826398067944859590880e-10; + + __CLC_GENTYPE x2 = x * x; + __CLC_GENTYPE x3 = x2 * x; + __CLC_GENTYPE r = (__CLC_GENTYPE)0.5 * x2; + __CLC_GENTYPE t = (__CLC_GENTYPE)1.0 - r; + + __CLC_GENTYPE sp = __clc_fma( + __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2); + + __CLC_GENTYPE cp = + t + + __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5), + x2, cc4), + x2, cc3), + x2, cc2), + x2, cc1), + x2 * x2, __clc_fma(x, xx, (1.0 - t) - r)); + + *sinval = + x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx)); + *cosval = cp; +} + +_CLC_INLINE _CLC_OVERLOAD void __clc_tan_piby4(__CLC_GENTYPE x, + __CLC_GENTYPE xx, + private __CLC_GENTYPE *leadval, + private __CLC_GENTYPE *tailval) { + // 0x3fe921fb54442d18 + const __CLC_GENTYPE piby4_lead = 7.85398163397448278999e-01; + // 0x3c81a62633145c06 + const __CLC_GENTYPE piby4_tail = 3.06161699786838240164e-17; + + // In order to maintain relative precision transform using the identity: + // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. + // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. + + __CLC_LONGN ca = x > 0.68; + __CLC_LONGN cb = x < -0.68; + __CLC_GENTYPE transform = ca ? 1.0 : 0.0; + transform = cb ? -1.0 : transform; + + __CLC_GENTYPE tx = __clc_fma(-transform, x, piby4_lead) + + __clc_fma(-transform, xx, piby4_tail); + __CLC_LONGN c = ca | cb; + x = c ? tx : x; + xx = c ? 0.0 : xx; + + // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68]. + __CLC_GENTYPE t1 = x; + __CLC_GENTYPE r = __clc_fma(2.0, x * xx, x * x); + + __CLC_GENTYPE a = __clc_fma(r, + __clc_fma(r, 0.224044448537022097264602535574e-3, + -0.229345080057565662883358588111e-1), + 0.372379159759792203640806338901e0); + + __CLC_GENTYPE b = + __clc_fma(r, + __clc_fma(r, + __clc_fma(r, -0.232371494088563558304549252913e-3, + 0.260656620398645407524064091208e-1), + -0.515658515729031149329237816945e0), + 0.111713747927937668539901657944e1); + + __CLC_GENTYPE t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx); + + __CLC_GENTYPE tp = t1 + t2; + + // Compute -1.0/(t1 + t2) accurately + __CLC_GENTYPE z1 = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L); + __CLC_GENTYPE z2 = t2 - (z1 - t1); + __CLC_GENTYPE trec = -MATH_RECIP(tp); + __CLC_GENTYPE trec_top = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L); + + __CLC_GENTYPE tpr = __clc_fma( + __clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top); + + __CLC_GENTYPE tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp)); + __CLC_GENTYPE tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0); + + *leadval = c ? tpt : tp; + *tailval = c ? tptr : tpr; +} + +#endif diff --git a/libclc/clc/include/clc/math/clc_sinpi.h b/libclc/clc/include/clc/math/clc_sinpi.h new file mode 100644 index 0000000000000..46fec465ceb03 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_sinpi.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_SINPI_H__ +#define __CLC_MATH_CLC_SINPI_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_sinpi + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_SINPI_H__ diff --git a/libclc/clc/include/clc/math/clc_tanpi.h b/libclc/clc/include/clc/math/clc_tanpi.h new file mode 100644 index 0000000000000..0b8efce27dee8 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_tanpi.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_TANPI_H__ +#define __CLC_MATH_CLC_TANPI_H__ + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION __clc_tanpi + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_TANPI_H__ diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index c31963c59e950..474b11d745a44 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -30,6 +30,7 @@ math/clc_atanh.cl math/clc_atanpi.cl math/clc_ceil.cl math/clc_copysign.cl +math/clc_cospi.cl math/clc_ep_log.cl math/clc_fabs.cl math/clc_fma.cl @@ -46,12 +47,14 @@ math/clc_mad.cl math/clc_modf.cl math/clc_nan.cl math/clc_native_cos.cl +math/clc_native_divide.cl math/clc_native_exp.cl math/clc_native_exp2.cl math/clc_native_log.cl math/clc_native_log10.cl math/clc_native_log2.cl math/clc_native_rsqrt.cl +math/clc_native_recip.cl math/clc_native_sin.cl math/clc_native_sqrt.cl math/clc_nextafter.cl @@ -65,9 +68,11 @@ math/clc_rootn.cl math/clc_round.cl math/clc_rsqrt.cl math/clc_sincos_helpers.cl +math/clc_sinpi.cl math/clc_sqrt.cl math/clc_sw_fma.cl math/clc_tables.cl +math/clc_tanpi.cl math/clc_trunc.cl relational/clc_all.cl relational/clc_any.cl diff --git a/libclc/clc/lib/generic/math/clc_cospi.cl b/libclc/clc/lib/generic/math/clc_cospi.cl new file mode 100644 index 0000000000000..07e1b49cc9e02 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_cospi.cl @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_fabs.h> +#include <clc/math/clc_sincos_helpers.h> +#include <clc/math/clc_sincos_piby4.h> +#include <clc/math/math.h> + +#define __CLC_BODY <clc_cospi.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_cospi.inc b/libclc/clc/lib/generic/math/clc_cospi.inc new file mode 100644 index 0000000000000..b037f82872dde --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_cospi.inc @@ -0,0 +1,116 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) { + __CLC_GENTYPE absx = __clc_fabs(x); + __CLC_INTN ix = __CLC_AS_INTN(absx); + __CLC_INTN iax = __CLC_CONVERT_INTN(absx); + __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax); + __CLC_INTN xodd = (iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0; + + // Initialize with return for +-Inf and NaN + __CLC_INTN ir = QNANBITPATT_SP32; + + // 2^24 <= |x| < Inf, the result is always even integer + ir = ix < PINFBITPATT_SP32 ? 0x3f800000 : ir; + + // 2^23 <= |x| < 2^24, the result is always integer + ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir; + + // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval + + // r < 1.0 + __CLC_GENTYPE a = 1.0f - r; + __CLC_INTN e = 1; + __CLC_INTN s = xodd ^ (__CLC_INTN)0x80000000; + + // r <= 0.75 + __CLC_INTN c = r <= 0.75f; + a = c ? r - 0.5f : a; + e = c ? 0 : e; + + // r < 0.5 + c = r < 0.5f; + a = c ? 0.5f - r : a; + s = c ? xodd : s; + + // r <= 0.25 + c = r <= 0.25f; + a = c ? r : a; + e = c ? 1 : e; + + __CLC_GENTYPE sinval, cosval; + __clc_sincos_piby4(a * M_PI_F, &sinval, &cosval); + __CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? cosval : sinval); + + ir = ix < 0x4b000000 ? jr : ir; + + return __CLC_AS_GENTYPE(ir); +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) { + __CLC_GENTYPE absx = __clc_fabs(x); + __CLC_LONGN ix = __CLC_AS_LONGN(absx); + __CLC_LONGN iax = __CLC_CONVERT_LONGN(absx); + __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax); + __CLC_LONGN xodd = + (iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L; + + // Initialize with return for +-Inf and NaN + __CLC_LONGN ir = QNANBITPATT_DP64; + + // 2^53 <= |x| < Inf, the result is always even integer + ir = ix < PINFBITPATT_DP64 ? 0x3ff0000000000000L : ir; + + // 2^52 <= |x| < 2^53, the result is always integer + ir = absx < 0x1.0p+53 ? xodd | 0x3ff0000000000000L : ir; + + // 0x1.0p-7 <= |x| < 2^52, result depends on which 0.25 interval + + // r < 1.0 + __CLC_GENTYPE a = 1.0 - r; + __CLC_LONGN e = 1; + __CLC_LONGN s = xodd ^ (__CLC_LONGN)0x8000000000000000L; + + // r <= 0.75 + __CLC_LONGN c = r <= 0.75; + __CLC_GENTYPE t = r - 0.5; + a = c ? t : a; + e = c ? 0 : e; + + // r < 0.5 + c = r < 0.5; + t = 0.5 - r; + a = c ? t : a; + s = c ? xodd : s; + + // r <= 0.25 + c = r <= 0.25; + a = c ? r : a; + e = c ? 1 : e; + + __CLC_GENTYPE sinval, cosval; + __clc_sincos_piby4(a * M_PI, 0.0, &sinval, &cosval); + __CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval); + + ir = absx < 0x1.0p+52 ? jr : ir; + + return __CLC_AS_GENTYPE(ir); +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_cospi(__CLC_CONVERT_FLOATN(x))); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_native_divide.cl b/libclc/clc/lib/generic/math/clc_native_divide.cl new file mode 100644 index 0000000000000..005089b1ba15d --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_native_divide.cl @@ -0,0 +1,14 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/internal/clc.h> + +#define __FLOAT_ONLY +#define __CLC_BODY <clc_native_divide.inc> + +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_native_divide.inc b/libclc/clc/lib/generic/math/clc_native_divide.inc new file mode 100644 index 0000000000000..fdf1794812c5a --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_native_divide.inc @@ -0,0 +1,12 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_divide(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return x / y; +} diff --git a/libclc/generic/lib/math/native_divide.inc b/libclc/clc/lib/generic/math/clc_native_recip.cl similarity index 74% rename from libclc/generic/lib/math/native_divide.inc rename to libclc/clc/lib/generic/math/clc_native_recip.cl index b0c83d503b965..4377f10b1543f 100644 --- a/libclc/generic/lib/math/native_divide.inc +++ b/libclc/clc/lib/generic/math/clc_native_recip.cl @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_divide(__CLC_GENTYPE x, __CLC_GENTYPE y) { - return x / y; -} +#include <clc/internal/clc.h> + +#define __FLOAT_ONLY +#define __CLC_BODY <clc_native_recip.inc> + +#include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/native_recip.inc b/libclc/clc/lib/generic/math/clc_native_recip.inc similarity index 83% rename from libclc/generic/lib/math/native_recip.inc rename to libclc/clc/lib/generic/math/clc_native_recip.inc index d6652fc2d2c69..57eb35a9522f8 100644 --- a/libclc/generic/lib/math/native_recip.inc +++ b/libclc/clc/lib/generic/math/clc_native_recip.inc @@ -6,6 +6,6 @@ // //===----------------------------------------------------------------------===// -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_recip(__CLC_GENTYPE val) { +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_recip(__CLC_GENTYPE val) { return 1.0f / val; } diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl index d8a7b10d8e868..24676d3c7711c 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl @@ -12,6 +12,7 @@ #include <clc/internal/clc.h> #include <clc/math/clc_fma.h> #include <clc/math/clc_mad.h> +#include <clc/math/clc_native_divide.h> #include <clc/math/clc_trunc.h> #include <clc/math/math.h> diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc index d9d2b81226b72..516a40c4672a9 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc @@ -74,6 +74,25 @@ _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x, return ret; } +_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x, + __CLC_INTN regn) { + // Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4]. + __CLC_FLOATN r = x * x; + + __CLC_FLOATN a = + __clc_mad(r, -0.0172032480471481694693109f, 0.385296071263995406715129f); + + __CLC_FLOATN b = __clc_mad( + r, + __clc_mad(r, 0.01844239256901656082986661f, -0.51396505478854532132342f), + 1.15588821434688393452299f); + + __CLC_FLOATN t = __clc_mad(x * r, __clc_native_divide(a, b), x); + __CLC_FLOATN tr = -MATH_RECIP(t); + + return regn & 1 ? tr : t; +} + _CLC_DEF _CLC_OVERLOAD void __clc_fullMulS(private __CLC_FLOATN *hi, private __CLC_FLOATN *lo, __CLC_FLOATN a, __CLC_FLOATN b, diff --git a/libclc/clc/lib/generic/math/clc_sinpi.cl b/libclc/clc/lib/generic/math/clc_sinpi.cl new file mode 100644 index 0000000000000..6cff247707845 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_sinpi.cl @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_fabs.h> +#include <clc/math/clc_sincos_helpers.h> +#include <clc/math/clc_sincos_piby4.h> +#include <clc/math/math.h> + +#define __CLC_BODY <clc_sinpi.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_sinpi.inc b/libclc/clc/lib/generic/math/clc_sinpi.inc new file mode 100644 index 0000000000000..264609aeaca45 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_sinpi.inc @@ -0,0 +1,114 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) { + __CLC_INTN ix = __CLC_AS_INTN(x); + __CLC_INTN xsgn = ix & (__CLC_INTN)0x80000000; + ix ^= xsgn; + __CLC_GENTYPE absx = __clc_fabs(x); + __CLC_INTN iax = __CLC_CONVERT_INTN(absx); + __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax); + __CLC_INTN xodd = + xsgn ^ ((iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0); + + // Initialize with return for +-Inf and NaN + __CLC_INTN ir = QNANBITPATT_SP32; + + // 2^23 <= |x| < Inf, the result is always integer + ir = ix < PINFBITPATT_SP32 ? xsgn : ir; + + // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval + + // r < 1.0 + __CLC_GENTYPE a = 1.0f - r; + __CLC_INTN e = 0; + + // r <= 0.75 + __CLC_INTN c = r <= 0.75f; + a = c ? r - 0.5f : a; + e = c ? 1 : e; + + // r < 0.5 + c = r < 0.5f; + a = c ? 0.5f - r : a; + + // 0 < r <= 0.25 + c = r <= 0.25f; + a = c ? r : a; + e = c ? 0 : e; + + __CLC_GENTYPE sinval, cosval; + __clc_sincos_piby4(a * M_PI_F, &sinval, &cosval); + __CLC_INTN jr = xodd ^ __CLC_AS_INTN(e != 0 ? cosval : sinval); + + ir = ix < 0x4b000000 ? jr : ir; + + return __CLC_AS_GENTYPE(ir); +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) { + __CLC_LONGN ix = __CLC_AS_LONGN(x); + __CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L; + ix ^= xsgn; + __CLC_GENTYPE absx = __clc_fabs(x); + __CLC_LONGN iax = __CLC_CONVERT_LONGN(absx); + __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax); + __CLC_LONGN xodd = + xsgn ^ + ((iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L); + + // Initialize with return for +-Inf and NaN + __CLC_LONGN ir = QNANBITPATT_DP64; + + // 2^23 <= |x| < Inf, the result is always integer + ir = ix < PINFBITPATT_DP64 ? xsgn : ir; + + // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval + + // r < 1.0 + __CLC_GENTYPE a = 1.0 - r; + __CLC_LONGN e = 0; + + // r <= 0.75 + __CLC_LONGN c = r <= 0.75; + __CLC_GENTYPE t = r - 0.5; + a = c ? t : a; + e = c ? 1 : e; + + // r < 0.5 + c = r < 0.5; + t = 0.5 - r; + a = c ? t : a; + + // r <= 0.25 + c = r <= 0.25; + a = c ? r : a; + e = c ? 0 : e; + + __CLC_GENTYPE api = a * M_PI; + + __CLC_GENTYPE sinval, cosval; + __clc_sincos_piby4(api, 0.0, &sinval, &cosval); + __CLC_LONGN jr = xodd ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval); + + ir = absx < 0x1.0p+52 ? jr : ir; + + return __CLC_AS_GENTYPE(ir); +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_sinpi(__CLC_CONVERT_FLOATN(x))); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_tanpi.cl b/libclc/clc/lib/generic/math/clc_tanpi.cl new file mode 100644 index 0000000000000..f1265892d107b --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_tanpi.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_fabs.h> +#include <clc/math/clc_native_recip.h> +#include <clc/math/clc_sincos_helpers.h> +#include <clc/math/clc_sincos_piby4.h> +#include <clc/math/math.h> + +#define __CLC_BODY <clc_tanpi.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_tanpi.inc b/libclc/clc/lib/generic/math/clc_tanpi.inc new file mode 100644 index 0000000000000..3a2f5dcf7b1ee --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_tanpi.inc @@ -0,0 +1,132 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) { + __CLC_INTN ix = __CLC_AS_INTN(x); + __CLC_INTN xsgn = ix & (__CLC_INTN)SIGNBIT_SP32; + __CLC_INTN xnsgn = xsgn ^ (__CLC_INTN)SIGNBIT_SP32; + ix ^= xsgn; + __CLC_GENTYPE absx = __clc_fabs(x); + __CLC_INTN iax = __CLC_CONVERT_INTN(absx); + __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax); + __CLC_INTN xodd = xsgn ^ __CLC_AS_INTN((iax & 0x1) != 0 ? SIGNBIT_SP32 : 0); + + // Initialize with return for +-Inf and NaN + __CLC_INTN ir = QNANBITPATT_SP32; + + // 2^24 <= |x| < Inf, the result is always even integer + ir = ix < PINFBITPATT_SP32 ? xsgn : ir; + + // 2^23 <= |x| < 2^24, the result is always integer + ir = ix < 0x4b800000 ? xodd : ir; + + // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval + + // r < 1.0 + __CLC_GENTYPE a = 1.0f - r; + __CLC_INTN e = 0; + __CLC_INTN s = xnsgn; + + // r <= 0.75 + __CLC_INTN c = r <= 0.75f; + a = c ? r - 0.5f : a; + e = c ? 1 : e; + s = c ? xsgn : s; + + // r < 0.5 + c = r < 0.5f; + a = c ? 0.5f - r : a; + s = c ? xnsgn : s; + + // 0 < r <= 0.25 + c = r <= 0.25f; + a = c ? r : a; + e = c ? 0 : e; + s = c ? xsgn : s; + + __CLC_GENTYPE t = __clc_tanf_piby4(a * M_PI_F, 0); + __CLC_GENTYPE tr = -__clc_native_recip(t); + __CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? tr : t); + + jr = r == 0.5f ? xodd | 0x7f800000 : jr; + + ir = ix < 0x4b000000 ? jr : ir; + + return __CLC_AS_GENTYPE(ir); +} + +#elif __CLC_FPSIZE == 64 + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) { + __CLC_LONGN ix = __CLC_AS_LONGN(x); + __CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L; + __CLC_LONGN xnsgn = xsgn ^ (__CLC_LONGN)0x8000000000000000L; + ix ^= xsgn; + __CLC_GENTYPE absx = __clc_fabs(x); + __CLC_LONGN iax = __CLC_CONVERT_LONGN(absx); + __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax); + __CLC_LONGN xodd = + xsgn ^ __CLC_AS_LONGN((iax & 0x1) != 0 ? 0x8000000000000000L : 0L); + + // Initialize with return for +-Inf and NaN + __CLC_LONGN ir = QNANBITPATT_DP64; + + // 2^53 <= |x| < Inf, the result is always even integer + ir = ix < PINFBITPATT_DP64 ? xsgn : ir; + + // 2^52 <= |x| < 2^53, the result is always integer + ir = ix < 0x4340000000000000L ? xodd : ir; + + // 0x1.0p-14 <= |x| < 2^53, result depends on which 0.25 interval + + // r < 1.0 + __CLC_GENTYPE a = 1.0 - r; + __CLC_LONGN e = 0; + __CLC_LONGN s = xnsgn; + + // r <= 0.75 + __CLC_LONGN c = r <= 0.75; + __CLC_GENTYPE t = r - 0.5; + a = c ? t : a; + e = c ? 1 : e; + s = c ? xsgn : s; + + // r < 0.5 + c = r < 0.5; + t = 0.5 - r; + a = c ? t : a; + s = c ? xnsgn : s; + + // r <= 0.25 + c = r <= 0.25; + a = c ? r : a; + e = c ? 0 : e; + s = c ? xsgn : s; + + __CLC_GENTYPE api = a * M_PI; + __CLC_GENTYPE lo, hi; + __clc_tan_piby4(api, 0.0, &lo, &hi); + __CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? hi : lo); + + __CLC_LONGN si = xodd | 0x7ff0000000000000L; + jr = r == 0.5 ? si : jr; + + ir = ix < 0x4330000000000000L ? jr : ir; + + return __CLC_AS_GENTYPE(ir); +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) { + return __CLC_CONVERT_GENTYPE(__clc_tanpi(__CLC_CONVERT_FLOATN(x))); +} + +#endif diff --git a/libclc/clspv/lib/SOURCES b/libclc/clspv/lib/SOURCES index 1c86fb0fbc8e8..0ef09d275243b 100644 --- a/libclc/clspv/lib/SOURCES +++ b/libclc/clspv/lib/SOURCES @@ -18,7 +18,6 @@ subnormal_config.cl ../../generic/lib/math/cbrt.cl ../../generic/lib/math/clc_exp10.cl ../../generic/lib/math/clc_tan.cl -../../generic/lib/math/clc_tanpi.cl ../../generic/lib/math/cos.cl ../../generic/lib/math/cosh.cl ../../generic/lib/math/cospi.cl diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index a93444af0c954..9b5bbc5d9b53c 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -174,7 +174,6 @@ math/sqrt.cl math/clc_tan.cl math/tan.cl math/tanh.cl -math/clc_tanpi.cl math/tanpi.cl math/tgamma.cl math/trunc.cl diff --git a/libclc/generic/lib/math/clc_tan.cl b/libclc/generic/lib/math/clc_tan.cl index eb02879339307..7e28e9ffed3b6 100644 --- a/libclc/generic/lib/math/clc_tan.cl +++ b/libclc/generic/lib/math/clc_tan.cl @@ -35,7 +35,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_tan(float x) { _CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tan, float); #ifdef cl_khr_fp64 -#include "sincosD_piby4.h" +#include <clc/math/clc_sincos_piby4.h> _CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) { double y = __clc_fabs(x); @@ -48,9 +48,10 @@ _CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) { else __clc_remainder_piby2_large(y, &r, &rr, ®n); - double2 tt = __clc_tan_piby4(r, rr); + double lead, tail; + __clc_tan_piby4(r, rr, &lead, &tail); - int2 t = as_int2(regn & 1 ? tt.y : tt.x); + int2 t = as_int2(regn & 1 ? tail : lead); t.hi ^= (x < 0.0) << 31; return __clc_isnan(x) || __clc_isinf(x) ? as_double(QNANBITPATT_DP64) diff --git a/libclc/generic/lib/math/clc_tanpi.cl b/libclc/generic/lib/math/clc_tanpi.cl deleted file mode 100644 index 533db5e4d1877..0000000000000 --- a/libclc/generic/lib/math/clc_tanpi.cl +++ /dev/null @@ -1,132 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "sincos_helpers.h" -#include <clc/clc.h> -#include <clc/clcmacro.h> -#include <clc/math/math.h> -#include <clc/math/tables.h> - -_CLC_DEF _CLC_OVERLOAD float __clc_tanpi(float x) -{ - int ix = as_int(x); - int xsgn = ix & 0x80000000; - int xnsgn = xsgn ^ 0x80000000; - ix ^= xsgn; - float ax = as_float(ix); - int iax = (int)ax; - float r = ax - iax; - int xodd = xsgn ^ (iax & 0x1 ? 0x80000000 : 0); - - // Initialize with return for +-Inf and NaN - int ir = 0x7fc00000; - - // 2^24 <= |x| < Inf, the result is always even integer - ir = ix < 0x7f800000 ? xsgn : ir; - - // 2^23 <= |x| < 2^24, the result is always integer - ir = ix < 0x4b800000 ? xodd : ir; - - // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval - - // r < 1.0 - float a = 1.0f - r; - int e = 0; - int s = xnsgn; - - // r <= 0.75 - int c = r <= 0.75f; - a = c ? r - 0.5f : a; - e = c ? 1 : e; - s = c ? xsgn : s; - - // r < 0.5 - c = r < 0.5f; - a = c ? 0.5f - r : a; - s = c ? xnsgn : s; - - // 0 < r <= 0.25 - c = r <= 0.25f; - a = c ? r : a; - e = c ? 0 : e; - s = c ? xsgn : s; - - float t = __clc_tanf_piby4(a * M_PI_F, 0); - float tr = -native_recip(t); - int jr = s ^ as_int(e ? tr : t); - - jr = r == 0.5f ? xodd | 0x7f800000 : jr; - - ir = ix < 0x4b000000 ? jr : ir; - - return as_float(ir); -} -_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tanpi, float); - -#ifdef cl_khr_fp64 -#include "sincosD_piby4.h" - -_CLC_DEF _CLC_OVERLOAD double __clc_tanpi(double x) -{ - long ix = as_long(x); - long xsgn = ix & 0x8000000000000000L; - long xnsgn = xsgn ^ 0x8000000000000000L; - ix ^= xsgn; - double ax = as_double(ix); - long iax = (long)ax; - double r = ax - iax; - long xodd = xsgn ^ (iax & 0x1 ? 0x8000000000000000L : 0L); - - // Initialize with return for +-Inf and NaN - long ir = 0x7ff8000000000000L; - - // 2^53 <= |x| < Inf, the result is always even integer - ir = ix < 0x7ff0000000000000L ? xsgn : ir; - - // 2^52 <= |x| < 2^53, the result is always integer - ir = ix < 0x4340000000000000L ? xodd : ir; - - // 0x1.0p-14 <= |x| < 2^53, result depends on which 0.25 interval - - // r < 1.0 - double a = 1.0 - r; - int e = 0; - long s = xnsgn; - - // r <= 0.75 - int c = r <= 0.75; - double t = r - 0.5; - a = c ? t : a; - e = c ? 1 : e; - s = c ? xsgn : s; - - // r < 0.5 - c = r < 0.5; - t = 0.5 - r; - a = c ? t : a; - s = c ? xnsgn : s; - - // r <= 0.25 - c = r <= 0.25; - a = c ? r : a; - e = c ? 0 : e; - s = c ? xsgn : s; - - double api = a * M_PI; - double2 tt = __clc_tan_piby4(api, 0.0); - long jr = s ^ as_long(e ? tt.hi : tt.lo); - - long si = xodd | 0x7ff0000000000000L; - jr = r == 0.5 ? si : jr; - - ir = ix < 0x4330000000000000L ? jr : ir; - - return as_double(ir); -} -_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_tanpi, double); -#endif diff --git a/libclc/generic/lib/math/cospi.cl b/libclc/generic/lib/math/cospi.cl index c8d4148de700f..f78935664c65b 100644 --- a/libclc/generic/lib/math/cospi.cl +++ b/libclc/generic/lib/math/cospi.cl @@ -7,124 +7,9 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> -#include <clc/clcmacro.h> -#include <clc/math/math.h> +#include <clc/math/clc_cospi.h> -#include "sincos_helpers.h" -#include "sincospiF_piby4.h" -#ifdef cl_khr_fp64 -#include "sincosD_piby4.h" -#endif +#define FUNCTION cospi +#define __CLC_BODY <clc/shared/unary_def.inc> -_CLC_OVERLOAD _CLC_DEF float cospi(float x) -{ - int ix = as_int(x) & 0x7fffffff; - float ax = as_float(ix); - int iax = (int)ax; - float r = ax - iax; - int xodd = iax & 0x1 ? 0x80000000 : 0; - - // Initialize with return for +-Inf and NaN - int ir = 0x7fc00000; - - // 2^24 <= |x| < Inf, the result is always even integer - ir = ix < 0x7f800000 ? 0x3f800000 : ir; - - // 2^23 <= |x| < 2^24, the result is always integer - ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir; - - // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval - - // r < 1.0 - float a = 1.0f - r; - int e = 1; - int s = xodd ^ 0x80000000; - - // r <= 0.75 - int c = r <= 0.75f; - a = c ? r - 0.5f : a; - e = c ? 0 : e; - - // r < 0.5 - c = r < 0.5f; - a = c ? 0.5f - r : a; - s = c ? xodd : s; - - // r <= 0.25 - c = r <= 0.25f; - a = c ? r : a; - e = c ? 1 : e; - - float2 t = __libclc__sincosf_piby4(a * M_PI_F); - int jr = s ^ as_int(e ? t.hi : t.lo); - - ir = ix < 0x4b000000 ? jr : ir; - - return as_float(ir); -} - - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cospi, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double cospi(double x) { - - long ix = as_long(x) & 0x7fffffffffffffffL; - double ax = as_double(ix); - long iax = (long)ax; - double r = ax - (double)iax; - long xodd = iax & 0x1L ? 0x8000000000000000L : 0L; - - // Initialize with return for +-Inf and NaN - long ir = 0x7ff8000000000000L; - - // 2^53 <= |x| < Inf, the result is always even integer - ir = ix < 0x7ff0000000000000 ? 0x3ff0000000000000L : ir; - - // 2^52 <= |x| < 2^53, the result is always integer - ir = ax < 0x1.0p+53 ? xodd | 0x3ff0000000000000L : ir; - - // 0x1.0p-7 <= |x| < 2^52, result depends on which 0.25 interval - - // r < 1.0 - double a = 1.0 - r; - int e = 1; - long s = xodd ^ 0x8000000000000000L; - - // r <= 0.75 - int c = r <= 0.75; - double t = r - 0.5; - a = c ? t : a; - e = c ? 0 : e; - - // r < 0.5 - c = r < 0.5; - t = 0.5 - r; - a = c ? t : a; - s = c ? xodd : s; - - // r <= 0.25 - c = r <= 0.25; - a = c ? r : a; - e = c ? 1 : e; - - double2 sc = __libclc__sincos_piby4(a * M_PI, 0.0); - long jr = s ^ as_long(e ? sc.hi : sc.lo); - - ir = ax < 0x1.0p+52 ? jr : ir; - - return as_double(ir); -} -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double); -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_FP16(cospi) - -#endif +#include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/native_divide.cl b/libclc/generic/lib/math/native_divide.cl index a1e9008a90c94..a4d9b830b5519 100644 --- a/libclc/generic/lib/math/native_divide.cl +++ b/libclc/generic/lib/math/native_divide.cl @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> +#include <clc/math/clc_native_divide.h> -#define __CLC_BODY <native_divide.inc> #define __FLOAT_ONLY +#define FUNCTION native_divide +#define __CLC_BODY <clc/shared/binary_def.inc> + #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/native_recip.cl b/libclc/generic/lib/math/native_recip.cl index b43248e6aeae6..3c844495046f1 100644 --- a/libclc/generic/lib/math/native_recip.cl +++ b/libclc/generic/lib/math/native_recip.cl @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> +#include <clc/math/clc_native_recip.h> -#define __CLC_BODY <native_recip.inc> #define __FLOAT_ONLY +#define FUNCTION native_recip +#define __CLC_BODY <clc/shared/unary_def.inc> + #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/sincosD_piby4.h b/libclc/generic/lib/math/sincosD_piby4.h deleted file mode 100644 index cce3d1554583f..0000000000000 --- a/libclc/generic/lib/math/sincosD_piby4.h +++ /dev/null @@ -1,119 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_INLINE double2 -__libclc__sincos_piby4(double x, double xx) -{ - // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... - // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... - // = x * f(w) - // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... - // We use a minimax approximation of (f(w) - 1) / w - // because this produces an expansion in even powers of x. - // If xx (the tail of x) is non-zero, we add a correction - // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx) - // is an approximation to cos(x)*sin(xx) valid because - // xx is tiny relative to x. - - // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... - // = f(w) - // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... - // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) - // because this produces an expansion in even powers of x. - // If xx (the tail of x) is non-zero, we subtract a correction - // term g(x,xx) = x*xx to the result, where g(x,xx) - // is an approximation to sin(x)*sin(xx) valid because - // xx is tiny relative to x. - - const double sc1 = -0.166666666666666646259241729; - const double sc2 = 0.833333333333095043065222816e-2; - const double sc3 = -0.19841269836761125688538679e-3; - const double sc4 = 0.275573161037288022676895908448e-5; - const double sc5 = -0.25051132068021699772257377197e-7; - const double sc6 = 0.159181443044859136852668200e-9; - - const double cc1 = 0.41666666666666665390037e-1; - const double cc2 = -0.13888888888887398280412e-2; - const double cc3 = 0.248015872987670414957399e-4; - const double cc4 = -0.275573172723441909470836e-6; - const double cc5 = 0.208761463822329611076335e-8; - const double cc6 = -0.113826398067944859590880e-10; - - double x2 = x * x; - double x3 = x2 * x; - double r = 0.5 * x2; - double t = 1.0 - r; - - double sp = fma(fma(fma(fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2); - - double cp = t + fma(fma(fma(fma(fma(fma(cc6, x2, cc5), x2, cc4), x2, cc3), x2, cc2), x2, cc1), - x2*x2, fma(x, xx, (1.0 - t) - r)); - - double2 ret; - ret.lo = x - fma(-x3, sc1, fma(fma(-x3, sp, 0.5*xx), x2, -xx)); - ret.hi = cp; - - return ret; -} - -_CLC_INLINE double2 -__clc_tan_piby4(double x, double xx) -{ - const double piby4_lead = 7.85398163397448278999e-01; // 0x3fe921fb54442d18 - const double piby4_tail = 3.06161699786838240164e-17; // 0x3c81a62633145c06 - - // In order to maintain relative precision transform using the identity: - // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. - // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. - - int ca = x > 0.68; - int cb = x < -0.68; - double transform = ca ? 1.0 : 0.0; - transform = cb ? -1.0 : transform; - - double tx = fma(-transform, x, piby4_lead) + fma(-transform, xx, piby4_tail); - int c = ca | cb; - x = c ? tx : x; - xx = c ? 0.0 : xx; - - // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68]. - double t1 = x; - double r = fma(2.0, x*xx, x*x); - - double a = fma(r, - fma(r, 0.224044448537022097264602535574e-3, -0.229345080057565662883358588111e-1), - 0.372379159759792203640806338901e0); - - double b = fma(r, - fma(r, - fma(r, -0.232371494088563558304549252913e-3, 0.260656620398645407524064091208e-1), - -0.515658515729031149329237816945e0), - 0.111713747927937668539901657944e1); - - double t2 = fma(MATH_DIVIDE(a, b), x*r, xx); - - double tp = t1 + t2; - - // Compute -1.0/(t1 + t2) accurately - double z1 = as_double(as_long(tp) & 0xffffffff00000000L); - double z2 = t2 - (z1 - t1); - double trec = -MATH_RECIP(tp); - double trec_top = as_double(as_long(trec) & 0xffffffff00000000L); - - double tpr = fma(fma(trec_top, z2, fma(trec_top, z1, 1.0)), trec, trec_top); - - double tpt = transform * (1.0 - MATH_DIVIDE(2.0*tp, 1.0 + tp)); - double tptr = transform * (MATH_DIVIDE(2.0*tp, tp - 1.0) - 1.0); - - double2 ret; - ret.lo = c ? tpt : tp; - ret.hi = c ? tptr : tpr; - return ret; -} diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl index 32ab5af4ca90c..651cd11ccf016 100644 --- a/libclc/generic/lib/math/sincos_helpers.cl +++ b/libclc/generic/lib/math/sincos_helpers.cl @@ -17,31 +17,13 @@ #include <clc/math/tables.h> #include <clc/shared/clc_max.h> -#define bytealign(src0, src1, src2) \ - ((uint)(((((long)(src0)) << 32) | (long)(src1)) >> (((src2) & 3) * 8))) - -_CLC_DEF float __clc_tanf_piby4(float x, int regn) { - // Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4]. - float r = x * x; - - float a = - __clc_mad(r, -0.0172032480471481694693109f, 0.385296071263995406715129f); - - float b = __clc_mad( - r, - __clc_mad(r, 0.01844239256901656082986661f, -0.51396505478854532132342f), - 1.15588821434688393452299f); - - float t = __clc_mad(x * r, native_divide(a, b), x); - float tr = -MATH_RECIP(t); - - return regn & 1 ? tr : t; -} - #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable +#define bytealign(src0, src1, src2) \ + ((uint)(((((long)(src0)) << 32) | (long)(src1)) >> (((src2) & 3) * 8))) + // Reduction for medium sized arguments _CLC_DEF void __clc_remainder_piby2_medium(double x, private double *r, private double *rr, diff --git a/libclc/generic/lib/math/sincos_helpers.h b/libclc/generic/lib/math/sincos_helpers.h index c94784081cd64..11cb93f34850d 100644 --- a/libclc/generic/lib/math/sincos_helpers.h +++ b/libclc/generic/lib/math/sincos_helpers.h @@ -9,8 +9,6 @@ #include <clc/clcfunc.h> #include <clc/clctypes.h> -_CLC_DECL float __clc_tanf_piby4(float x, int y); - #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable diff --git a/libclc/generic/lib/math/sincospiF_piby4.h b/libclc/generic/lib/math/sincospiF_piby4.h deleted file mode 100644 index 66596395fdd1e..0000000000000 --- a/libclc/generic/lib/math/sincospiF_piby4.h +++ /dev/null @@ -1,46 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/math/clc_mad.h> - -// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4] -_CLC_INLINE float2 __libclc__sincosf_piby4(float x) { - // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... - // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... - // = x * f(w) - // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... - // We use a minimax approximation of (f(w) - 1) / w - // because this produces an expansion in even powers of x. - - // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... - // = f(w) - // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... - // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) - // because this produces an expansion in even powers of x. - - const float sc1 = -0.166666666638608441788607926e0F; - const float sc2 = 0.833333187633086262120839299e-2F; - const float sc3 = -0.198400874359527693921333720e-3F; - const float sc4 = 0.272500015145584081596826911e-5F; - - const float cc1 = 0.41666666664325175238031e-1F; - const float cc2 = -0.13888887673175665567647e-2F; - const float cc3 = 0.24800600878112441958053e-4F; - const float cc4 = -0.27301013343179832472841e-6F; - - float x2 = x * x; - - float2 ret; - ret.x = __clc_mad( - x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1), - x); - ret.y = __clc_mad( - x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1), - __clc_mad(x2, -0.5f, 1.0f)); - return ret; -} diff --git a/libclc/generic/lib/math/sinpi.cl b/libclc/generic/lib/math/sinpi.cl index 1261b7f9c5277..223e7db142117 100644 --- a/libclc/generic/lib/math/sinpi.cl +++ b/libclc/generic/lib/math/sinpi.cl @@ -7,119 +7,9 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> -#include <clc/clcmacro.h> -#include <clc/math/math.h> +#include <clc/math/clc_sinpi.h> -#include "sincospiF_piby4.h" -#ifdef cl_khr_fp64 -#include "sincosD_piby4.h" -#endif +#define FUNCTION sinpi +#define __CLC_BODY <clc/shared/unary_def.inc> -_CLC_OVERLOAD _CLC_DEF float sinpi(float x) -{ - int ix = as_int(x); - int xsgn = ix & 0x80000000; - ix ^= xsgn; - float ax = as_float(ix); - int iax = (int)ax; - float r = ax - iax; - int xodd = xsgn ^ (iax & 0x1 ? 0x80000000 : 0); - - // Initialize with return for +-Inf and NaN - int ir = 0x7fc00000; - - // 2^23 <= |x| < Inf, the result is always integer - ir = ix < 0x7f800000 ? xsgn : ir; - - // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval - - // r < 1.0 - float a = 1.0f - r; - int e = 0; - - // r <= 0.75 - int c = r <= 0.75f; - a = c ? r - 0.5f : a; - e = c ? 1 : e; - - // r < 0.5 - c = r < 0.5f; - a = c ? 0.5f - r : a; - - // 0 < r <= 0.25 - c = r <= 0.25f; - a = c ? r : a; - e = c ? 0 : e; - - float2 t = __libclc__sincosf_piby4(a * M_PI_F); - int jr = xodd ^ as_int(e ? t.hi : t.lo); - - ir = ix < 0x4b000000 ? jr : ir; - - return as_float(ir); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinpi, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double sinpi(double x) -{ - long ix = as_long(x); - long xsgn = ix & 0x8000000000000000L; - ix ^= xsgn; - double ax = as_double(ix); - long iax = (long)ax; - double r = ax - (double)iax; - long xodd = xsgn ^ (iax & 0x1L ? 0x8000000000000000L : 0L); - - // Initialize with return for +-Inf and NaN - long ir = 0x7ff8000000000000L; - - // 2^23 <= |x| < Inf, the result is always integer - ir = ix < 0x7ff0000000000000 ? xsgn : ir; - - // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval - - // r < 1.0 - double a = 1.0 - r; - int e = 0; - - // r <= 0.75 - int c = r <= 0.75; - double t = r - 0.5; - a = c ? t : a; - e = c ? 1 : e; - - // r < 0.5 - c = r < 0.5; - t = 0.5 - r; - a = c ? t : a; - - // r <= 0.25 - c = r <= 0.25; - a = c ? r : a; - e = c ? 0 : e; - - double api = a * M_PI; - double2 sc = __libclc__sincos_piby4(api, 0.0); - long jr = xodd ^ as_long(e ? sc.hi : sc.lo); - - ir = ax < 0x1.0p+52 ? jr : ir; - - return as_double(ir); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_FP16(sinpi) - -#endif +#include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/tanpi.cl b/libclc/generic/lib/math/tanpi.cl index 0f0461b1742c1..8015d32adb38d 100644 --- a/libclc/generic/lib/math/tanpi.cl +++ b/libclc/generic/lib/math/tanpi.cl @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> +#include <clc/math/clc_tanpi.h> -#include <math/clc_tanpi.h> +#define FUNCTION tanpi +#define __CLC_BODY <clc/shared/unary_def.inc> -#define __CLC_FUNC tanpi -#define __CLC_BODY <clc_sw_unary.inc> #include <clc/math/gentype.inc> diff --git a/libclc/spirv/lib/SOURCES b/libclc/spirv/lib/SOURCES index ad9f44a6149ae..5358577ea1805 100644 --- a/libclc/spirv/lib/SOURCES +++ b/libclc/spirv/lib/SOURCES @@ -72,7 +72,6 @@ math/fma.cl ../../generic/lib/math/clc_tan.cl ../../generic/lib/math/tan.cl ../../generic/lib/math/tanh.cl -../../generic/lib/math/clc_tanpi.cl ../../generic/lib/math/tanpi.cl ../../generic/lib/math/tgamma.cl ../../generic/lib/shared/vload.cl _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits