https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/116786
>From 00f8980411e74a3071e6efbf553f78363ac92bb2 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Tue, 17 Dec 2024 16:52:51 +0000 Subject: [PATCH] [libclc] Move several integer functions to CLC library This commit moves over the OpenCL clz, hadd, mad24, mad_hi, mul24, mul_hi, popcount, rhadd, and upsample builtins to the CLC library. There are no changes to any target's CLC libraries. The OpenCL mad_hi builtin wasn't previously publicly available from the CLC libraries, as it was hash-defined to mul_hi in the header files. That issue has been fixed, and mad_hi is now exposed. The custom AMD implementation/workaround for popcount has been removed as it was only valid for clang < 7. There are still two integer functions which haven't been moved over. The OpenCL add_sat, sub_sat, and mad_sat builtins require saturating conversion builtins which haven't yet been ported. --- libclc/amdgcn/lib/SOURCES | 1 - libclc/amdgcn/lib/integer/popcount.cl | 6 - libclc/amdgcn/lib/integer/popcount.inc | 17 --- libclc/clc/include/clc/integer/clc_clz.h | 12 ++ libclc/clc/include/clc/integer/clc_hadd.h | 12 ++ libclc/clc/include/clc/integer/clc_mad24.h | 12 ++ libclc/clc/include/clc/integer/clc_mad_hi.h | 8 + libclc/clc/include/clc/integer/clc_mul24.h | 12 ++ libclc/clc/include/clc/integer/clc_mul_hi.h | 12 ++ libclc/clc/include/clc/integer/clc_popcount.h | 12 ++ libclc/clc/include/clc/integer/clc_rhadd.h | 12 ++ libclc/clc/include/clc/integer/clc_upsample.h | 32 ++++ .../include/clc/integer/definitions.h | 7 +- libclc/clc/include/clc/integer/gentype24.inc | 137 ++++++++++++++++++ libclc/clc/include/clc/math/clc_mad.h | 2 +- .../clc/{math => shared}/ternary_decl.inc | 0 libclc/clc/include/clc/shared/ternary_def.inc | 10 ++ libclc/clc/include/clc/shared/unary_decl.inc | 1 + libclc/clc/include/clc/shared/unary_def.inc | 9 ++ libclc/clc/lib/clspv/SOURCES | 8 + libclc/clc/lib/generic/SOURCES | 8 + libclc/clc/lib/generic/integer/clc_clz.cl | 44 ++++++ libclc/clc/lib/generic/integer/clc_hadd.cl | 4 + libclc/clc/lib/generic/integer/clc_hadd.inc | 8 + libclc/clc/lib/generic/integer/clc_mad24.cl | 5 + libclc/clc/lib/generic/integer/clc_mad24.inc | 5 + libclc/clc/lib/generic/integer/clc_mul24.cl | 4 + .../lib/generic/integer/clc_mul24.inc} | 4 +- libclc/clc/lib/generic/integer/clc_mul_hi.cl | 132 +++++++++++++++++ .../clc/lib/generic/integer/clc_popcount.cl | 7 + libclc/clc/lib/generic/integer/clc_rhadd.cl | 4 + libclc/clc/lib/generic/integer/clc_rhadd.inc | 8 + .../clc/lib/generic/integer/clc_upsample.cl | 35 +++++ libclc/clc/lib/spirv/SOURCES | 8 + libclc/clc/lib/spirv64/SOURCES | 8 + libclc/generic/include/clc/integer/clz.h | 7 +- libclc/generic/include/clc/integer/clz.inc | 1 - libclc/generic/include/clc/integer/hadd.h | 7 +- libclc/generic/include/clc/integer/hadd.inc | 1 - libclc/generic/include/clc/integer/mad24.h | 9 +- libclc/generic/include/clc/integer/mad24.inc | 1 - libclc/generic/include/clc/integer/mad_hi.h | 8 +- libclc/generic/include/clc/integer/mul24.h | 8 +- libclc/generic/include/clc/integer/mul24.inc | 1 - libclc/generic/include/clc/integer/mul_hi.h | 7 +- libclc/generic/include/clc/integer/mul_hi.inc | 1 - libclc/generic/include/clc/integer/popcount.h | 6 +- libclc/generic/include/clc/integer/rhadd.h | 7 +- libclc/generic/include/clc/integer/rhadd.inc | 1 - libclc/generic/include/clc/integer/upsample.h | 33 ++--- libclc/generic/include/clc/math/fma.h | 2 +- libclc/generic/include/clc/math/mad.h | 2 +- libclc/generic/include/integer/popcount.h | 3 - .../generic/include/integer/unary_intrin.inc | 20 --- libclc/generic/include/math/clc_fma.h | 2 +- libclc/generic/lib/SOURCES | 1 + libclc/generic/lib/integer/clz.cl | 44 +----- libclc/generic/lib/integer/hadd.cl | 5 +- libclc/generic/lib/integer/hadd.inc | 6 - libclc/generic/lib/integer/mad24.cl | 7 +- libclc/generic/lib/integer/mad24.inc | 3 - libclc/generic/lib/integer/mad_hi.cl | 7 + libclc/generic/lib/integer/mul24.cl | 7 +- libclc/generic/lib/integer/mul_hi.cl | 110 +------------- libclc/generic/lib/integer/popcount.cl | 7 +- libclc/generic/lib/integer/rhadd.cl | 5 +- libclc/generic/lib/integer/rhadd.inc | 6 - libclc/generic/lib/integer/upsample.cl | 54 +++---- libclc/generic/lib/math/clc_fma.cl | 3 +- libclc/generic/lib/math/clc_fmod.cl | 5 +- libclc/generic/lib/math/clc_remainder.cl | 5 +- libclc/generic/lib/math/clc_remquo.cl | 5 +- libclc/generic/lib/math/sincos_helpers.cl | 10 +- 73 files changed, 726 insertions(+), 297 deletions(-) delete mode 100644 libclc/amdgcn/lib/integer/popcount.cl delete mode 100644 libclc/amdgcn/lib/integer/popcount.inc create mode 100644 libclc/clc/include/clc/integer/clc_clz.h create mode 100644 libclc/clc/include/clc/integer/clc_hadd.h create mode 100644 libclc/clc/include/clc/integer/clc_mad24.h create mode 100644 libclc/clc/include/clc/integer/clc_mad_hi.h create mode 100644 libclc/clc/include/clc/integer/clc_mul24.h create mode 100644 libclc/clc/include/clc/integer/clc_mul_hi.h create mode 100644 libclc/clc/include/clc/integer/clc_popcount.h create mode 100644 libclc/clc/include/clc/integer/clc_rhadd.h create mode 100644 libclc/clc/include/clc/integer/clc_upsample.h rename libclc/{generic => clc}/include/clc/integer/definitions.h (71%) create mode 100644 libclc/clc/include/clc/integer/gentype24.inc rename libclc/clc/include/clc/{math => shared}/ternary_decl.inc (100%) create mode 100644 libclc/clc/include/clc/shared/ternary_def.inc create mode 100644 libclc/clc/include/clc/shared/unary_decl.inc create mode 100644 libclc/clc/include/clc/shared/unary_def.inc create mode 100644 libclc/clc/lib/generic/integer/clc_clz.cl create mode 100644 libclc/clc/lib/generic/integer/clc_hadd.cl create mode 100644 libclc/clc/lib/generic/integer/clc_hadd.inc create mode 100644 libclc/clc/lib/generic/integer/clc_mad24.cl create mode 100644 libclc/clc/lib/generic/integer/clc_mad24.inc create mode 100644 libclc/clc/lib/generic/integer/clc_mul24.cl rename libclc/{generic/lib/integer/mul24.inc => clc/lib/generic/integer/clc_mul24.inc} (68%) create mode 100644 libclc/clc/lib/generic/integer/clc_mul_hi.cl create mode 100644 libclc/clc/lib/generic/integer/clc_popcount.cl create mode 100644 libclc/clc/lib/generic/integer/clc_rhadd.cl create mode 100644 libclc/clc/lib/generic/integer/clc_rhadd.inc create mode 100644 libclc/clc/lib/generic/integer/clc_upsample.cl delete mode 100644 libclc/generic/include/clc/integer/clz.inc delete mode 100644 libclc/generic/include/clc/integer/hadd.inc delete mode 100644 libclc/generic/include/clc/integer/mad24.inc delete mode 100644 libclc/generic/include/clc/integer/mul24.inc delete mode 100644 libclc/generic/include/clc/integer/mul_hi.inc delete mode 100644 libclc/generic/include/clc/integer/rhadd.inc delete mode 100644 libclc/generic/include/integer/popcount.h delete mode 100644 libclc/generic/include/integer/unary_intrin.inc delete mode 100644 libclc/generic/lib/integer/hadd.inc delete mode 100644 libclc/generic/lib/integer/mad24.inc create mode 100644 libclc/generic/lib/integer/mad_hi.cl delete mode 100644 libclc/generic/lib/integer/rhadd.inc diff --git a/libclc/amdgcn/lib/SOURCES b/libclc/amdgcn/lib/SOURCES index b235457f9ab7c3..4ea66385fe50ee 100644 --- a/libclc/amdgcn/lib/SOURCES +++ b/libclc/amdgcn/lib/SOURCES @@ -1,5 +1,4 @@ cl_khr_int64_extended_atomics/minmax_helpers.ll -integer/popcount.cl math/fmax.cl math/fmin.cl math/ldexp.cl diff --git a/libclc/amdgcn/lib/integer/popcount.cl b/libclc/amdgcn/lib/integer/popcount.cl deleted file mode 100644 index 3b493fbd146f01..00000000000000 --- a/libclc/amdgcn/lib/integer/popcount.cl +++ /dev/null @@ -1,6 +0,0 @@ -#include <clc/clc.h> -#include <clc/utils.h> -#include <integer/popcount.h> - -#define __CLC_BODY "popcount.inc" -#include <clc/integer/gentype.inc> diff --git a/libclc/amdgcn/lib/integer/popcount.inc b/libclc/amdgcn/lib/integer/popcount.inc deleted file mode 100644 index 402ddb768c6a6f..00000000000000 --- a/libclc/amdgcn/lib/integer/popcount.inc +++ /dev/null @@ -1,17 +0,0 @@ -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE popcount(__CLC_GENTYPE x) { -/* LLVM-4+ implements i16 ops for VI+ ASICs. However, ctpop implementation - * is missing until r326535. Therefore we have to convert sub i32 types to uint - * as a workaround. */ -#if __clang_major__ < 7 && __clang_major__ > 3 && __CLC_GENSIZE < 32 - /* Prevent sign extension on uint conversion */ - const __CLC_U_GENTYPE y = __CLC_XCONCAT(as_, __CLC_U_GENTYPE)(x); - /* Convert to uintX */ - const __CLC_XCONCAT(uint, __CLC_VECSIZE) z = __CLC_XCONCAT(convert_uint, __CLC_VECSIZE)(y); - /* Call popcount on uintX type */ - const __CLC_XCONCAT(uint, __CLC_VECSIZE) res = __clc_native_popcount(z); - /* Convert the result back to gentype. */ - return __CLC_XCONCAT(convert_, __CLC_GENTYPE)(res); -#else - return __clc_native_popcount(x); -#endif -} diff --git a/libclc/clc/include/clc/integer/clc_clz.h b/libclc/clc/include/clc/integer/clc_clz.h new file mode 100644 index 00000000000000..a642391e46960c --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_clz.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_CLZ_H__ +#define __CLC_INTEGER_CLC_CLZ_H__ + +#define __CLC_FUNCTION __clc_clz +#define __CLC_BODY <clc/shared/unary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_CLZ_H__ diff --git a/libclc/clc/include/clc/integer/clc_hadd.h b/libclc/clc/include/clc/integer/clc_hadd.h new file mode 100644 index 00000000000000..6693afbb8b8037 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_hadd.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_HADD_H__ +#define __CLC_INTEGER_CLC_HADD_H__ + +#define __CLC_FUNCTION __clc_hadd +#define __CLC_BODY <clc/shared/binary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_HADD_H__ diff --git a/libclc/clc/include/clc/integer/clc_mad24.h b/libclc/clc/include/clc/integer/clc_mad24.h new file mode 100644 index 00000000000000..13d39d04d1a2a6 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_mad24.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_MAD24_H__ +#define __CLC_INTEGER_CLC_MAD24_H__ + +#define __CLC_FUNCTION __clc_mad24 +#define __CLC_BODY <clc/shared/ternary_decl.inc> + +#include <clc/integer/gentype24.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_MAD24_H__ diff --git a/libclc/clc/include/clc/integer/clc_mad_hi.h b/libclc/clc/include/clc/integer/clc_mad_hi.h new file mode 100644 index 00000000000000..24a590df6027a8 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_mad_hi.h @@ -0,0 +1,8 @@ +#ifndef __CLC_INTEGER_CLC_MAD_HI_H__ +#define __CLC_INTEGER_CLC_MAD_HI_H__ + +#include <clc/integer/clc_mul_hi.h> + +#define __clc_mad_hi(a, b, c) (__clc_mul_hi((a), (b)) + (c)) + +#endif // __CLC_INTEGER_CLC_MAD_HI_H__ diff --git a/libclc/clc/include/clc/integer/clc_mul24.h b/libclc/clc/include/clc/integer/clc_mul24.h new file mode 100644 index 00000000000000..acab4e9da9eaa7 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_mul24.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_MUL24_H__ +#define __CLC_INTEGER_CLC_MUL24_H__ + +#define __CLC_FUNCTION __clc_mul24 +#define __CLC_BODY <clc/shared/binary_decl.inc> + +#include <clc/integer/gentype24.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_MUL24_H__ diff --git a/libclc/clc/include/clc/integer/clc_mul_hi.h b/libclc/clc/include/clc/integer/clc_mul_hi.h new file mode 100644 index 00000000000000..e395794066f96a --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_mul_hi.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_MUL_HI_H__ +#define __CLC_INTEGER_CLC_MUL_HI_H__ + +#define __CLC_FUNCTION __clc_mul_hi +#define __CLC_BODY <clc/shared/binary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_MUL_HI_H__ diff --git a/libclc/clc/include/clc/integer/clc_popcount.h b/libclc/clc/include/clc/integer/clc_popcount.h new file mode 100644 index 00000000000000..50f6d173c70b42 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_popcount.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_POPCOUNT_H__ +#define __CLC_INTEGER_CLC_POPCOUNT_H__ + +#define __CLC_FUNCTION __clc_popcount +#define __CLC_BODY <clc/shared/unary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_INTRINSIC +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_POPCOUNT_H__ diff --git a/libclc/clc/include/clc/integer/clc_rhadd.h b/libclc/clc/include/clc/integer/clc_rhadd.h new file mode 100644 index 00000000000000..9f8d1f367ac053 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_rhadd.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_RHADD_H__ +#define __CLC_INTEGER_CLC_RHADD_H__ + +#define __CLC_FUNCTION __clc_rhadd +#define __CLC_BODY <clc/shared/binary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_RHADD_H__ diff --git a/libclc/clc/include/clc/integer/clc_upsample.h b/libclc/clc/include/clc/integer/clc_upsample.h new file mode 100644 index 00000000000000..262e33af954889 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_upsample.h @@ -0,0 +1,32 @@ +#ifndef __CLC_INTEGER_CLC_UPSAMPLE_H__ +#define __CLC_INTEGER_CLC_UPSAMPLE_H__ + +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + +#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ + _CLC_OVERLOAD _CLC_DECL BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo); + +#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \ + __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) + +#define __CLC_UPSAMPLE_TYPES() \ + __CLC_UPSAMPLE_VEC(short, char, uchar) \ + __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \ + __CLC_UPSAMPLE_VEC(int, short, ushort) \ + __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \ + __CLC_UPSAMPLE_VEC(long, int, uint) \ + __CLC_UPSAMPLE_VEC(ulong, uint, uint) + +__CLC_UPSAMPLE_TYPES() + +#undef __CLC_UPSAMPLE_TYPES +#undef __CLC_UPSAMPLE_DECL +#undef __CLC_UPSAMPLE_VEC + +#endif // __CLC_INTEGER_CLC_UPSAMPLE_H__ diff --git a/libclc/generic/include/clc/integer/definitions.h b/libclc/clc/include/clc/integer/definitions.h similarity index 71% rename from libclc/generic/include/clc/integer/definitions.h rename to libclc/clc/include/clc/integer/definitions.h index 0079c30123db80..18a9e54dec75c6 100644 --- a/libclc/generic/include/clc/integer/definitions.h +++ b/libclc/clc/include/clc/integer/definitions.h @@ -1,7 +1,10 @@ +#ifndef __CLC_INTEGER_DEFINITIONS_H__ +#define __CLC_INTEGER_DEFINITIONS_H__ + #define CHAR_BIT 8 #define INT_MAX 2147483647 #define INT_MIN (-2147483647 - 1) -#define LONG_MAX 0x7fffffffffffffffL +#define LONG_MAX 0x7fffffffffffffffL #define LONG_MIN (-0x7fffffffffffffffL - 1) #define CHAR_MAX SCHAR_MAX #define CHAR_MIN SCHAR_MIN @@ -13,3 +16,5 @@ #define USHRT_MAX 65535 #define UINT_MAX 0xffffffff #define ULONG_MAX 0xffffffffffffffffUL + +#endif // __CLC_INTEGER_DEFINITIONS_H__ diff --git a/libclc/clc/include/clc/integer/gentype24.inc b/libclc/clc/include/clc/integer/gentype24.inc new file mode 100644 index 00000000000000..ad34c3818b1d60 --- /dev/null +++ b/libclc/clc/include/clc/integer/gentype24.inc @@ -0,0 +1,137 @@ +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + +#define __CLC_GENSIZE 32 +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE int + +#define __CLC_GENTYPE int +#define __CLC_U_GENTYPE uint +#define __CLC_S_GENTYPE int +#define __CLC_SCALAR 1 +#define __CLC_VECSIZE +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int2 +#define __CLC_U_GENTYPE uint2 +#define __CLC_S_GENTYPE int2 +#define __CLC_VECSIZE 2 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int3 +#define __CLC_U_GENTYPE uint3 +#define __CLC_S_GENTYPE int3 +#define __CLC_VECSIZE 3 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int4 +#define __CLC_U_GENTYPE uint4 +#define __CLC_S_GENTYPE int4 +#define __CLC_VECSIZE 4 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int8 +#define __CLC_U_GENTYPE uint8 +#define __CLC_S_GENTYPE int8 +#define __CLC_VECSIZE 8 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int16 +#define __CLC_U_GENTYPE uint16 +#define __CLC_S_GENTYPE int16 +#define __CLC_VECSIZE 16 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE uint + +#define __CLC_GENTYPE uint +#define __CLC_U_GENTYPE uint +#define __CLC_S_GENTYPE int +#define __CLC_SCALAR 1 +#define __CLC_VECSIZE +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint2 +#define __CLC_U_GENTYPE uint2 +#define __CLC_S_GENTYPE int2 +#define __CLC_VECSIZE 2 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint3 +#define __CLC_U_GENTYPE uint3 +#define __CLC_S_GENTYPE int3 +#define __CLC_VECSIZE 3 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint4 +#define __CLC_U_GENTYPE uint4 +#define __CLC_S_GENTYPE int4 +#define __CLC_VECSIZE 4 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint8 +#define __CLC_U_GENTYPE uint8 +#define __CLC_S_GENTYPE int8 +#define __CLC_VECSIZE 8 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint16 +#define __CLC_U_GENTYPE uint16 +#define __CLC_S_GENTYPE int16 +#define __CLC_VECSIZE 16 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_GENSIZE +#undef __CLC_SCALAR_GENTYPE +#undef __CLC_BODY diff --git a/libclc/clc/include/clc/math/clc_mad.h b/libclc/clc/include/clc/math/clc_mad.h index 3eb718e87f3705..5427c0215688e8 100644 --- a/libclc/clc/include/clc/math/clc_mad.h +++ b/libclc/clc/include/clc/math/clc_mad.h @@ -1,7 +1,7 @@ #ifndef __CLC_MATH_CLC_MAD_H__ #define __CLC_MATH_CLC_MAD_H__ -#define __CLC_BODY <clc/math/ternary_decl.inc> +#define __CLC_BODY <clc/shared/ternary_decl.inc> #define __CLC_FUNCTION __clc_mad #include <clc/math/gentype.inc> diff --git a/libclc/clc/include/clc/math/ternary_decl.inc b/libclc/clc/include/clc/shared/ternary_decl.inc similarity index 100% rename from libclc/clc/include/clc/math/ternary_decl.inc rename to libclc/clc/include/clc/shared/ternary_decl.inc diff --git a/libclc/clc/include/clc/shared/ternary_def.inc b/libclc/clc/include/clc/shared/ternary_def.inc new file mode 100644 index 00000000000000..59528d825a1715 --- /dev/null +++ b/libclc/clc/include/clc/shared/ternary_def.inc @@ -0,0 +1,10 @@ +#include <clc/utils.h> + +#ifndef __CLC_FUNCTION +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b, + __CLC_GENTYPE c) { + return __CLC_FUNCTION(FUNCTION)(a, b, c); +} diff --git a/libclc/clc/include/clc/shared/unary_decl.inc b/libclc/clc/include/clc/shared/unary_decl.inc new file mode 100644 index 00000000000000..9858d908da09fe --- /dev/null +++ b/libclc/clc/include/clc/shared/unary_decl.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x); diff --git a/libclc/clc/include/clc/shared/unary_def.inc b/libclc/clc/include/clc/shared/unary_def.inc new file mode 100644 index 00000000000000..e0f34ad817e8b0 --- /dev/null +++ b/libclc/clc/include/clc/shared/unary_def.inc @@ -0,0 +1,9 @@ +#include <clc/utils.h> + +#ifndef __CLC_FUNCTION +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a) { + return __CLC_FUNCTION(FUNCTION)(a); +} diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES index 6efa3c59b53e70..2581abe64f1443 100644 --- a/libclc/clc/lib/clspv/SOURCES +++ b/libclc/clc/lib/clspv/SOURCES @@ -1,5 +1,13 @@ ../generic/integer/clc_add_sat.cl +../generic/integer/clc_clz.cl +../generic/integer/clc_hadd.cl +../generic/integer/clc_mad24.cl +../generic/integer/clc_mul24.cl +../generic/integer/clc_mul_hi.cl +../generic/integer/clc_popcount.cl +../generic/integer/clc_rhadd.cl ../generic/integer/clc_sub_sat.cl +../generic/integer/clc_upsample.cl ../generic/math/clc_ceil.cl ../generic/math/clc_copysign.cl ../generic/math/clc_fabs.cl diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 1ef6636be90b62..2f4df168f70745 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -5,7 +5,15 @@ geometric/clc_dot.cl integer/clc_abs.cl integer/clc_abs_diff.cl integer/clc_add_sat.cl +integer/clc_clz.cl +integer/clc_hadd.cl +integer/clc_mad24.cl +integer/clc_mul24.cl +integer/clc_mul_hi.cl +integer/clc_popcount.cl +integer/clc_rhadd.cl integer/clc_sub_sat.cl +integer/clc_upsample.cl math/clc_ceil.cl math/clc_copysign.cl math/clc_fabs.cl diff --git a/libclc/clc/lib/generic/integer/clc_clz.cl b/libclc/clc/lib/generic/integer/clc_clz.cl new file mode 100644 index 00000000000000..592b65f262bd6b --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_clz.cl @@ -0,0 +1,44 @@ +#include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> +#include <clc/internal/clc.h> + +_CLC_OVERLOAD _CLC_DEF char __clc_clz(char x) { + return __clc_clz((ushort)(uchar)x) - 8; +} + +_CLC_OVERLOAD _CLC_DEF uchar __clc_clz(uchar x) { + return __clc_clz((ushort)x) - 8; +} + +_CLC_OVERLOAD _CLC_DEF short __clc_clz(short x) { + return x ? __builtin_clzs(x) : 16; +} + +_CLC_OVERLOAD _CLC_DEF ushort __clc_clz(ushort x) { + return x ? __builtin_clzs(x) : 16; +} + +_CLC_OVERLOAD _CLC_DEF int __clc_clz(int x) { + return x ? __builtin_clz(x) : 32; +} + +_CLC_OVERLOAD _CLC_DEF uint __clc_clz(uint x) { + return x ? __builtin_clz(x) : 32; +} + +_CLC_OVERLOAD _CLC_DEF long __clc_clz(long x) { + return x ? __builtin_clzl(x) : 64; +} + +_CLC_OVERLOAD _CLC_DEF ulong __clc_clz(ulong x) { + return x ? __builtin_clzl(x) : 64; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __clc_clz, char) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, __clc_clz, uchar) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, __clc_clz, short) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, __clc_clz, ushort) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __clc_clz, int) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, __clc_clz, uint) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, __clc_clz, long) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, __clc_clz, ulong) diff --git a/libclc/clc/lib/generic/integer/clc_hadd.cl b/libclc/clc/lib/generic/integer/clc_hadd.cl new file mode 100644 index 00000000000000..8e91d41a843aaa --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_hadd.cl @@ -0,0 +1,4 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_hadd.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_hadd.inc b/libclc/clc/lib/generic/integer/clc_hadd.inc new file mode 100644 index 00000000000000..14d921599446b3 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_hadd.inc @@ -0,0 +1,8 @@ +// hadd = (x+y)>>1 +// This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit +// set) This saves us having to do any checks for overflow in the addition sum +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_hadd(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return (x >> (__CLC_GENTYPE)1) + (y >> (__CLC_GENTYPE)1) + + (x & y & (__CLC_GENTYPE)1); +} diff --git a/libclc/clc/lib/generic/integer/clc_mad24.cl b/libclc/clc/lib/generic/integer/clc_mad24.cl new file mode 100644 index 00000000000000..86c319cff6d245 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_mad24.cl @@ -0,0 +1,5 @@ +#include <clc/internal/clc.h> +#include <clc/integer/clc_mul24.h> + +#define __CLC_BODY <clc_mad24.inc> +#include <clc/integer/gentype24.inc> diff --git a/libclc/clc/lib/generic/integer/clc_mad24.inc b/libclc/clc/lib/generic/integer/clc_mad24.inc new file mode 100644 index 00000000000000..61c8587d4f86fc --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_mad24.inc @@ -0,0 +1,5 @@ +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_mad24(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_GENTYPE z) { + return __clc_mul24(x, y) + z; +} diff --git a/libclc/clc/lib/generic/integer/clc_mul24.cl b/libclc/clc/lib/generic/integer/clc_mul24.cl new file mode 100644 index 00000000000000..6513a896a8b1d2 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_mul24.cl @@ -0,0 +1,4 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_mul24.inc> +#include <clc/integer/gentype24.inc> diff --git a/libclc/generic/lib/integer/mul24.inc b/libclc/clc/lib/generic/integer/clc_mul24.inc similarity index 68% rename from libclc/generic/lib/integer/mul24.inc rename to libclc/clc/lib/generic/integer/clc_mul24.inc index 95a2f1d6f31bab..d7e8091c98a314 100644 --- a/libclc/generic/lib/integer/mul24.inc +++ b/libclc/clc/lib/generic/integer/clc_mul24.inc @@ -1,10 +1,10 @@ - // We need to use shifts here in order to mantain the sign bit for signed // integers. The compiler should optimize this to (x & 0x00FFFFFF) for // unsigned integers. #define CONVERT_TO_24BIT(x) (((x) << 8) >> 8) -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y){ +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_mul24(__CLC_GENTYPE x, + __CLC_GENTYPE y) { return CONVERT_TO_24BIT(x) * CONVERT_TO_24BIT(y); } diff --git a/libclc/clc/lib/generic/integer/clc_mul_hi.cl b/libclc/clc/lib/generic/integer/clc_mul_hi.cl new file mode 100644 index 00000000000000..cf4acc5429cb45 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_mul_hi.cl @@ -0,0 +1,132 @@ +#include <clc/integer/clc_hadd.h> +#include <clc/integer/definitions.h> +#include <clc/internal/clc.h> + +// TODO: Replace with __clc_convert_<type> when available +#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY) + +#define __CLC_MUL_HI_VEC_IMPL(BGENTYPE, GENTYPE, GENSIZE) \ + _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \ + BGENTYPE large_x = __CLC_CONVERT_TY(x, BGENTYPE); \ + BGENTYPE large_y = __CLC_CONVERT_TY(y, BGENTYPE); \ + BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE; \ + return __CLC_CONVERT_TY(large_mul_hi, GENTYPE); \ + } + +// For all types EXCEPT long, which is implemented separately +#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \ + _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \ + return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \ + } + +#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \ + __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \ + __CLC_MUL_HI_VEC_IMPL(BTYPE##2, TYPE##2, BITS) \ + __CLC_MUL_HI_VEC_IMPL(BTYPE##3, TYPE##3, BITS) \ + __CLC_MUL_HI_VEC_IMPL(BTYPE##4, TYPE##4, BITS) \ + __CLC_MUL_HI_VEC_IMPL(BTYPE##8, TYPE##8, BITS) \ + __CLC_MUL_HI_VEC_IMPL(BTYPE##16, TYPE##16, BITS) + +_CLC_OVERLOAD _CLC_DEF long __clc_mul_hi(long x, long y) { + long f, o, i; + ulong l; + + // Move the high/low halves of x/y into the lower 32-bits of variables so + // that we can multiply them without worrying about overflow. + long x_hi = x >> 32; + long x_lo = x & UINT_MAX; + long y_hi = y >> 32; + long y_lo = y & UINT_MAX; + + // Multiply all of the components according to FOIL method + f = x_hi * y_hi; + o = x_hi * y_lo; + i = x_lo * y_hi; + l = x_lo * y_lo; + + // Now add the components back together in the following steps: + // F: doesn't need to be modified + // O/I: Need to be added together. + // L: Shift right by 32-bits, then add into the sum of O and I + // Once O/I/L are summed up, then shift the sum by 32-bits and add to F. + // + // We use hadd to give us a bit of extra precision for the intermediate sums + // but as a result, we shift by 31 bits instead of 32 + return (long)(f + (__clc_hadd(o, (i + (long)((ulong)l >> 32))) >> 31)); +} + +_CLC_OVERLOAD _CLC_DEF ulong __clc_mul_hi(ulong x, ulong y) { + ulong f, o, i; + ulong l; + + // Move the high/low halves of x/y into the lower 32-bits of variables so + // that we can multiply them without worrying about overflow. + ulong x_hi = x >> 32; + ulong x_lo = x & UINT_MAX; + ulong y_hi = y >> 32; + ulong y_lo = y & UINT_MAX; + + // Multiply all of the components according to FOIL method + f = x_hi * y_hi; + o = x_hi * y_lo; + i = x_lo * y_hi; + l = x_lo * y_lo; + + // Now add the components back together, taking care to respect the fact that: + // F: doesn't need to be modified + // O/I: Need to be added together. + // L: Shift right by 32-bits, then add into the sum of O and I + // Once O/I/L are summed up, then shift the sum by 32-bits and add to F. + // + // We use hadd to give us a bit of extra precision for the intermediate sums + // but as a result, we shift by 31 bits instead of 32 + return (f + (__clc_hadd(o, (i + (l >> 32))) >> 31)); +} + +// Vector-based mul_hi implementation for logn/ulong. See comments in the scalar +// versions for more detail. +#define __CLC_MUL_HI_LONG_VEC_IMPL(TY, UTY) \ + _CLC_OVERLOAD _CLC_DEF TY __clc_mul_hi(TY x, TY y) { \ + TY f, o, i; \ + UTY l; \ + \ + TY x_hi = x >> 32; \ + TY x_lo = x & UINT_MAX; \ + TY y_hi = y >> 32; \ + TY y_lo = y & UINT_MAX; \ + \ + f = x_hi * y_hi; \ + o = x_hi * y_lo; \ + i = x_lo * y_hi; \ + l = __CLC_CONVERT_TY(x_lo * y_lo, UTY); \ + i += __CLC_CONVERT_TY(l >> (UTY)32, TY); \ + \ + return f + (__clc_hadd(o, i) >> (TY)31); \ + } + +#define __CLC_MUL_HI_LONG_IMPL(BTYPE, UBTYPE) \ + __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##2, UBTYPE##2) \ + __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##3, UBTYPE##3) \ + __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##4, UBTYPE##4) \ + __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##8, UBTYPE##8) \ + __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##16, UBTYPE##16) + +#define __CLC_MUL_HI_TYPES() \ + __CLC_MUL_HI_DEC_IMPL(short, char, 8) \ + __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \ + __CLC_MUL_HI_DEC_IMPL(int, short, 16) \ + __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \ + __CLC_MUL_HI_DEC_IMPL(long, int, 32) \ + __CLC_MUL_HI_DEC_IMPL(ulong, uint, 32) \ + __CLC_MUL_HI_LONG_IMPL(long, ulong) \ + __CLC_MUL_HI_LONG_IMPL(ulong, ulong) + +__CLC_MUL_HI_TYPES() + +#undef __CLC_MUL_HI_TYPES +#undef __CLC_MUL_HI_LONG_IMPL +#undef __CLC_MUL_HI_LONG_VEC_IMPL +#undef __CLC_MUL_HI_DEC_IMPL +#undef __CLC_MUL_HI_IMPL +#undef __CLC_MUL_HI_VEC_IMPL +#undef __CLC_CONVERT_TY diff --git a/libclc/clc/lib/generic/integer/clc_popcount.cl b/libclc/clc/lib/generic/integer/clc_popcount.cl new file mode 100644 index 00000000000000..12e851c15d7959 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_popcount.cl @@ -0,0 +1,7 @@ +#include <clc/internal/clc.h> + +#define FUNCTION __clc_popcount +#define __CLC_FUNCTION(x) __builtin_elementwise_popcount +#define __CLC_BODY <clc/shared/unary_def.inc> + +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_rhadd.cl b/libclc/clc/lib/generic/integer/clc_rhadd.cl new file mode 100644 index 00000000000000..00bd2f0ac8058a --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_rhadd.cl @@ -0,0 +1,4 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_rhadd.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_rhadd.inc b/libclc/clc/lib/generic/integer/clc_rhadd.inc new file mode 100644 index 00000000000000..d363c42061ffe1 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_rhadd.inc @@ -0,0 +1,8 @@ +// rhadd = (x+y+1)>>1 +// This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit +// set) This saves us having to do any checks for overflow in the addition sums +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rhadd(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return (x >> (__CLC_GENTYPE)1) + (y >> (__CLC_GENTYPE)1) + + ((x & (__CLC_GENTYPE)1) | (y & (__CLC_GENTYPE)1)); +} diff --git a/libclc/clc/lib/generic/integer/clc_upsample.cl b/libclc/clc/lib/generic/integer/clc_upsample.cl new file mode 100644 index 00000000000000..d53ef7240bfc22 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_upsample.cl @@ -0,0 +1,35 @@ +#include <clc/internal/clc.h> + +// TODO: Replace with __clc_convert_<type> when available +#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY) + +#define __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \ + BGENTYPE large_hi = __CLC_CONVERT_TY(hi, BGENTYPE); \ + BGENTYPE large_lo = __CLC_CONVERT_TY(lo, BGENTYPE); \ + return (large_hi << (BGENTYPE)GENSIZE) | large_lo; \ + } + +#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \ + return ((BGENTYPE)hi << GENSIZE) | lo; \ + } \ + __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2, GENSIZE) \ + __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3, GENSIZE) \ + __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4, GENSIZE) \ + __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8, GENSIZE) \ + __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16, GENSIZE) + +#define __CLC_UPSAMPLE_TYPES() \ + __CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \ + __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \ + __CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \ + __CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \ + __CLC_UPSAMPLE_IMPL(long, int, uint, 32) \ + __CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32) + +__CLC_UPSAMPLE_TYPES() + +#undef __CLC_UPSAMPLE_TYPES +#undef __CLC_UPSAMPLE_IMPL +#undef __CLC_CONVERT_TY diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES index a87223e8c622c5..ddc9e4c49d8626 100644 --- a/libclc/clc/lib/spirv/SOURCES +++ b/libclc/clc/lib/spirv/SOURCES @@ -3,7 +3,15 @@ ../generic/common/clc_smoothstep.cl ../generic/geometric/clc_dot.cl ../generic/integer/clc_add_sat.cl +../generic/integer/clc_clz.cl +../generic/integer/clc_hadd.cl +../generic/integer/clc_mad24.cl +../generic/integer/clc_mul24.cl +../generic/integer/clc_mul_hi.cl +../generic/integer/clc_popcount.cl +../generic/integer/clc_rhadd.cl ../generic/integer/clc_sub_sat.cl +../generic/integer/clc_upsample.cl ../generic/math/clc_ceil.cl ../generic/math/clc_copysign.cl ../generic/math/clc_fabs.cl diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES index a87223e8c622c5..ddc9e4c49d8626 100644 --- a/libclc/clc/lib/spirv64/SOURCES +++ b/libclc/clc/lib/spirv64/SOURCES @@ -3,7 +3,15 @@ ../generic/common/clc_smoothstep.cl ../generic/geometric/clc_dot.cl ../generic/integer/clc_add_sat.cl +../generic/integer/clc_clz.cl +../generic/integer/clc_hadd.cl +../generic/integer/clc_mad24.cl +../generic/integer/clc_mul24.cl +../generic/integer/clc_mul_hi.cl +../generic/integer/clc_popcount.cl +../generic/integer/clc_rhadd.cl ../generic/integer/clc_sub_sat.cl +../generic/integer/clc_upsample.cl ../generic/math/clc_ceil.cl ../generic/math/clc_copysign.cl ../generic/math/clc_fabs.cl diff --git a/libclc/generic/include/clc/integer/clz.h b/libclc/generic/include/clc/integer/clz.h index f7cdbf78ec0607..5479279eb84a2d 100644 --- a/libclc/generic/include/clc/integer/clz.h +++ b/libclc/generic/include/clc/integer/clz.h @@ -1,2 +1,7 @@ -#define __CLC_BODY <clc/integer/clz.inc> +#define __CLC_FUNCTION clz +#define __CLC_BODY <clc/shared/unary_decl.inc> + #include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/integer/clz.inc b/libclc/generic/include/clc/integer/clz.inc deleted file mode 100644 index 45826d10c9fafe..00000000000000 --- a/libclc/generic/include/clc/integer/clz.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clz(__CLC_GENTYPE x); diff --git a/libclc/generic/include/clc/integer/hadd.h b/libclc/generic/include/clc/integer/hadd.h index 37304e26cc2d62..0d73944ac11bae 100644 --- a/libclc/generic/include/clc/integer/hadd.h +++ b/libclc/generic/include/clc/integer/hadd.h @@ -1,2 +1,7 @@ -#define __CLC_BODY <clc/integer/hadd.inc> +#define __CLC_FUNCTION hadd +#define __CLC_BODY <clc/shared/binary_decl.inc> + #include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/integer/hadd.inc b/libclc/generic/include/clc/integer/hadd.inc deleted file mode 100644 index f698989cef2026..00000000000000 --- a/libclc/generic/include/clc/integer/hadd.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y); diff --git a/libclc/generic/include/clc/integer/mad24.h b/libclc/generic/include/clc/integer/mad24.h index 0c120faac2b15b..f7292f79f821f0 100644 --- a/libclc/generic/include/clc/integer/mad24.h +++ b/libclc/generic/include/clc/integer/mad24.h @@ -1,3 +1,6 @@ -#define __CLC_BODY <clc/integer/mad24.inc> -#include <clc/integer/integer-gentype.inc> -#undef __CLC_BODY +#define __CLC_FUNCTION mad24 +#define __CLC_BODY <clc/shared/ternary_decl.inc> + +#include <clc/integer/gentype24.inc> + +#undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/integer/mad24.inc b/libclc/generic/include/clc/integer/mad24.inc deleted file mode 100644 index 81fe0c2a89266c..00000000000000 --- a/libclc/generic/include/clc/integer/mad24.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); diff --git a/libclc/generic/include/clc/integer/mad_hi.h b/libclc/generic/include/clc/integer/mad_hi.h index 863ce92d9f2d8c..9f9de75df76136 100644 --- a/libclc/generic/include/clc/integer/mad_hi.h +++ b/libclc/generic/include/clc/integer/mad_hi.h @@ -1 +1,7 @@ -#define mad_hi(a, b, c) (mul_hi((a),(b))+(c)) +#define __CLC_FUNCTION mad_hi +#define __CLC_BODY <clc/shared/ternary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_FUNCTION +#undef __CLC_BODY diff --git a/libclc/generic/include/clc/integer/mul24.h b/libclc/generic/include/clc/integer/mul24.h index 4f97098d70f0f4..27e0f20040cb9a 100644 --- a/libclc/generic/include/clc/integer/mul24.h +++ b/libclc/generic/include/clc/integer/mul24.h @@ -1,3 +1,7 @@ -#define __CLC_BODY <clc/integer/mul24.inc> -#include <clc/integer/integer-gentype.inc> +#define __CLC_FUNCTION mul24 +#define __CLC_BODY <clc/shared/binary_decl.inc> + +#include <clc/integer/gentype24.inc> + #undef __CLC_BODY +#undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/integer/mul24.inc b/libclc/generic/include/clc/integer/mul24.inc deleted file mode 100644 index 8cbf7c10ac447d..00000000000000 --- a/libclc/generic/include/clc/integer/mul24.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y); diff --git a/libclc/generic/include/clc/integer/mul_hi.h b/libclc/generic/include/clc/integer/mul_hi.h index 27b95d83442f97..3607a80f439679 100644 --- a/libclc/generic/include/clc/integer/mul_hi.h +++ b/libclc/generic/include/clc/integer/mul_hi.h @@ -1,2 +1,7 @@ -#define __CLC_BODY <clc/integer/mul_hi.inc> +#define __CLC_FUNCTION mul_hi +#define __CLC_BODY <clc/shared/binary_decl.inc> + #include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/integer/mul_hi.inc b/libclc/generic/include/clc/integer/mul_hi.inc deleted file mode 100644 index ce9e5c0b2c18c8..00000000000000 --- a/libclc/generic/include/clc/integer/mul_hi.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul_hi(__CLC_GENTYPE x, __CLC_GENTYPE y); diff --git a/libclc/generic/include/clc/integer/popcount.h b/libclc/generic/include/clc/integer/popcount.h index 23335f45b6fe3f..a3ce8ebb1df1f4 100644 --- a/libclc/generic/include/clc/integer/popcount.h +++ b/libclc/generic/include/clc/integer/popcount.h @@ -1,5 +1,7 @@ #define __CLC_FUNCTION popcount -#define __CLC_BODY <clc/integer/unary.inc> +#define __CLC_BODY <clc/shared/unary_decl.inc> + #include <clc/integer/gentype.inc> -#undef __CLC_FUNCTION + #undef __CLC_BODY +#undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/integer/rhadd.h b/libclc/generic/include/clc/integer/rhadd.h index 69b43faeebd246..a777d4b66f2f81 100644 --- a/libclc/generic/include/clc/integer/rhadd.h +++ b/libclc/generic/include/clc/integer/rhadd.h @@ -1,2 +1,7 @@ -#define __CLC_BODY <clc/integer/rhadd.inc> +#define __CLC_FUNCTION rhadd +#define __CLC_BODY <clc/shared/binary_decl.inc> + #include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/integer/rhadd.inc b/libclc/generic/include/clc/integer/rhadd.inc deleted file mode 100644 index 88ccaf09fd5ef8..00000000000000 --- a/libclc/generic/include/clc/integer/rhadd.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y); diff --git a/libclc/generic/include/clc/integer/upsample.h b/libclc/generic/include/clc/integer/upsample.h index 0b36b692a2c8d3..37038f6ad90344 100644 --- a/libclc/generic/include/clc/integer/upsample.h +++ b/libclc/generic/include/clc/integer/upsample.h @@ -1,25 +1,24 @@ -#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ - _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo); +#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ + _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo); -#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \ - __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) \ +#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \ + __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) -#define __CLC_UPSAMPLE_TYPES() \ - __CLC_UPSAMPLE_VEC(short, char, uchar) \ - __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \ - __CLC_UPSAMPLE_VEC(int, short, ushort) \ - __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \ - __CLC_UPSAMPLE_VEC(long, int, uint) \ - __CLC_UPSAMPLE_VEC(ulong, uint, uint) \ +#define __CLC_UPSAMPLE_TYPES() \ + __CLC_UPSAMPLE_VEC(short, char, uchar) \ + __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \ + __CLC_UPSAMPLE_VEC(int, short, ushort) \ + __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \ + __CLC_UPSAMPLE_VEC(long, int, uint) \ + __CLC_UPSAMPLE_VEC(ulong, uint, uint) __CLC_UPSAMPLE_TYPES() #undef __CLC_UPSAMPLE_TYPES #undef __CLC_UPSAMPLE_DECL #undef __CLC_UPSAMPLE_VEC - diff --git a/libclc/generic/include/clc/math/fma.h b/libclc/generic/include/clc/math/fma.h index c7c23eb2055f68..bedfc5414da002 100644 --- a/libclc/generic/include/clc/math/fma.h +++ b/libclc/generic/include/clc/math/fma.h @@ -1,4 +1,4 @@ -#define __CLC_BODY <clc/math/ternary_decl.inc> +#define __CLC_BODY <clc/shared/ternary_decl.inc> #define __CLC_FUNCTION fma #include <clc/math/gentype.inc> diff --git a/libclc/generic/include/clc/math/mad.h b/libclc/generic/include/clc/math/mad.h index 99b6484193012b..6f3f1f6201807b 100644 --- a/libclc/generic/include/clc/math/mad.h +++ b/libclc/generic/include/clc/math/mad.h @@ -1,4 +1,4 @@ -#define __CLC_BODY <clc/math/ternary_decl.inc> +#define __CLC_BODY <clc/shared/ternary_decl.inc> #define __CLC_FUNCTION mad #include <clc/math/gentype.inc> diff --git a/libclc/generic/include/integer/popcount.h b/libclc/generic/include/integer/popcount.h deleted file mode 100644 index 00c753753bb4e0..00000000000000 --- a/libclc/generic/include/integer/popcount.h +++ /dev/null @@ -1,3 +0,0 @@ -#define __CLC_FUNCTION __clc_native_popcount -#define __CLC_INTRINSIC "llvm.ctpop" -#include <integer/unary_intrin.inc> diff --git a/libclc/generic/include/integer/unary_intrin.inc b/libclc/generic/include/integer/unary_intrin.inc deleted file mode 100644 index ee9862a4c5b3a6..00000000000000 --- a/libclc/generic/include/integer/unary_intrin.inc +++ /dev/null @@ -1,20 +0,0 @@ -#define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \ -_CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm(__CLC_INTRINSIC ".i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) __asm(__CLC_INTRINSIC ".v2i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) __asm(__CLC_INTRINSIC ".v3i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) __asm(__CLC_INTRINSIC ".v4i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) __asm(__CLC_INTRINSIC ".v8i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) __asm(__CLC_INTRINSIC ".v16i" BIT_SIZE); - -__CLC_INTRINSIC_DEF(char, "8") -__CLC_INTRINSIC_DEF(uchar, "8") -__CLC_INTRINSIC_DEF(short, "16") -__CLC_INTRINSIC_DEF(ushort, "16") -__CLC_INTRINSIC_DEF(int, "32") -__CLC_INTRINSIC_DEF(uint, "32") -__CLC_INTRINSIC_DEF(long, "64") -__CLC_INTRINSIC_DEF(ulong, "64") - -#undef __CLC_FUNCTION -#undef __CLC_INTRINSIC -#undef __CLC_INTRINSIC_DEF diff --git a/libclc/generic/include/math/clc_fma.h b/libclc/generic/include/math/clc_fma.h index 490cb9bcefc86b..598df66cf72e95 100644 --- a/libclc/generic/include/math/clc_fma.h +++ b/libclc/generic/include/math/clc_fma.h @@ -4,7 +4,7 @@ #define __FLOAT_ONLY #define __CLC_FUNCTION __clc_sw_fma -#define __CLC_BODY <clc/math/ternary_decl.inc> +#define __CLC_BODY <clc/shared/ternary_decl.inc> #include <clc/math/gentype.inc> #undef __CLC_BODY #undef __CLC_FUNCTION diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 217e3bca48b7ac..a62c87902a6a7e 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -68,6 +68,7 @@ integer/add_sat.cl integer/clz.cl integer/hadd.cl integer/mad24.cl +integer/mad_hi.cl integer/mad_sat.cl integer/mul24.cl integer/mul_hi.cl diff --git a/libclc/generic/lib/integer/clz.cl b/libclc/generic/lib/integer/clz.cl index 904d027d376134..c2d423174b36cb 100644 --- a/libclc/generic/lib/integer/clz.cl +++ b/libclc/generic/lib/integer/clz.cl @@ -1,43 +1,7 @@ #include <clc/clc.h> -#include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> -_CLC_OVERLOAD _CLC_DEF char clz(char x) { - return clz((ushort)(uchar)x) - 8; -} +#define FUNCTION clz +#define __CLC_BODY <clc/shared/unary_def.inc> -_CLC_OVERLOAD _CLC_DEF uchar clz(uchar x) { - return clz((ushort)x) - 8; -} - -_CLC_OVERLOAD _CLC_DEF short clz(short x) { - return x ? __builtin_clzs(x) : 16; -} - -_CLC_OVERLOAD _CLC_DEF ushort clz(ushort x) { - return x ? __builtin_clzs(x) : 16; -} - -_CLC_OVERLOAD _CLC_DEF int clz(int x) { - return x ? __builtin_clz(x) : 32; -} - -_CLC_OVERLOAD _CLC_DEF uint clz(uint x) { - return x ? __builtin_clz(x) : 32; -} - -_CLC_OVERLOAD _CLC_DEF long clz(long x) { - return x ? __builtin_clzl(x) : 64; -} - -_CLC_OVERLOAD _CLC_DEF ulong clz(ulong x) { - return x ? __builtin_clzl(x) : 64; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, clz, char) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, clz, uchar) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, clz, short) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, clz, ushort) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, clz, int) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, clz, uint) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, clz, long) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, clz, ulong) +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/hadd.cl b/libclc/generic/lib/integer/hadd.cl index 749026e5a8ad81..d8461f1d5398b5 100644 --- a/libclc/generic/lib/integer/hadd.cl +++ b/libclc/generic/lib/integer/hadd.cl @@ -1,4 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_hadd.h> + +#define FUNCTION hadd +#define __CLC_BODY <clc/shared/binary_def.inc> -#define __CLC_BODY <hadd.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/hadd.inc b/libclc/generic/lib/integer/hadd.inc deleted file mode 100644 index ea59d9bd7db5f8..00000000000000 --- a/libclc/generic/lib/integer/hadd.inc +++ /dev/null @@ -1,6 +0,0 @@ -//hadd = (x+y)>>1 -//This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit set) -//This saves us having to do any checks for overflow in the addition sum -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) { - return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1); -} diff --git a/libclc/generic/lib/integer/mad24.cl b/libclc/generic/lib/integer/mad24.cl index e29e99f28b56fc..24935b54f24cd6 100644 --- a/libclc/generic/lib/integer/mad24.cl +++ b/libclc/generic/lib/integer/mad24.cl @@ -1,4 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_mad24.h> -#define __CLC_BODY <mad24.inc> -#include <clc/integer/integer-gentype.inc> +#define FUNCTION mad24 +#define __CLC_BODY <clc/shared/ternary_def.inc> + +#include <clc/integer/gentype24.inc> diff --git a/libclc/generic/lib/integer/mad24.inc b/libclc/generic/lib/integer/mad24.inc deleted file mode 100644 index 902b0aafe4c874..00000000000000 --- a/libclc/generic/lib/integer/mad24.inc +++ /dev/null @@ -1,3 +0,0 @@ -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z){ - return mul24(x, y) + z; -} diff --git a/libclc/generic/lib/integer/mad_hi.cl b/libclc/generic/lib/integer/mad_hi.cl new file mode 100644 index 00000000000000..7915faa966ec40 --- /dev/null +++ b/libclc/generic/lib/integer/mad_hi.cl @@ -0,0 +1,7 @@ +#include <clc/clc.h> +#include <clc/integer/clc_mad_hi.h> + +#define FUNCTION mad_hi +#define __CLC_BODY <clc/shared/ternary_def.inc> + +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/mul24.cl b/libclc/generic/lib/integer/mul24.cl index 8aedca64b85905..b8cec2c62c337f 100644 --- a/libclc/generic/lib/integer/mul24.cl +++ b/libclc/generic/lib/integer/mul24.cl @@ -1,4 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_mul24.h> -#define __CLC_BODY <mul24.inc> -#include <clc/integer/integer-gentype.inc> +#define FUNCTION mul24 +#define __CLC_BODY <clc/shared/binary_def.inc> + +#include <clc/integer/gentype24.inc> diff --git a/libclc/generic/lib/integer/mul_hi.cl b/libclc/generic/lib/integer/mul_hi.cl index 174d893afb14f9..a19ef7ecda2b9e 100644 --- a/libclc/generic/lib/integer/mul_hi.cl +++ b/libclc/generic/lib/integer/mul_hi.cl @@ -1,109 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_mul_hi.h> -//For all types EXCEPT long, which is implemented separately -#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \ - _CLC_OVERLOAD _CLC_DEF GENTYPE mul_hi(GENTYPE x, GENTYPE y){ \ - return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \ - } \ +#define FUNCTION mul_hi +#define __CLC_BODY <clc/shared/binary_def.inc> -//FOIL-based long mul_hi -// -// Summary: Treat mul_hi(long x, long y) as: -// (a+b) * (c+d) where a and c are the high-order parts of x and y respectively -// and b and d are the low-order parts of x and y. -// Thinking back to algebra, we use FOIL to do the work. - -_CLC_OVERLOAD _CLC_DEF long mul_hi(long x, long y){ - long f, o, i; - ulong l; - - //Move the high/low halves of x/y into the lower 32-bits of variables so - //that we can multiply them without worrying about overflow. - long x_hi = x >> 32; - long x_lo = x & UINT_MAX; - long y_hi = y >> 32; - long y_lo = y & UINT_MAX; - - //Multiply all of the components according to FOIL method - f = x_hi * y_hi; - o = x_hi * y_lo; - i = x_lo * y_hi; - l = x_lo * y_lo; - - //Now add the components back together in the following steps: - //F: doesn't need to be modified - //O/I: Need to be added together. - //L: Shift right by 32-bits, then add into the sum of O and I - //Once O/I/L are summed up, then shift the sum by 32-bits and add to F. - // - //We use hadd to give us a bit of extra precision for the intermediate sums - //but as a result, we shift by 31 bits instead of 32 - return (long)(f + (hadd(o, (i + (long)((ulong)l>>32))) >> 31)); -} - -_CLC_OVERLOAD _CLC_DEF ulong mul_hi(ulong x, ulong y){ - ulong f, o, i; - ulong l; - - //Move the high/low halves of x/y into the lower 32-bits of variables so - //that we can multiply them without worrying about overflow. - ulong x_hi = x >> 32; - ulong x_lo = x & UINT_MAX; - ulong y_hi = y >> 32; - ulong y_lo = y & UINT_MAX; - - //Multiply all of the components according to FOIL method - f = x_hi * y_hi; - o = x_hi * y_lo; - i = x_lo * y_hi; - l = x_lo * y_lo; - - //Now add the components back together, taking care to respect the fact that: - //F: doesn't need to be modified - //O/I: Need to be added together. - //L: Shift right by 32-bits, then add into the sum of O and I - //Once O/I/L are summed up, then shift the sum by 32-bits and add to F. - // - //We use hadd to give us a bit of extra precision for the intermediate sums - //but as a result, we shift by 31 bits instead of 32 - return (f + (hadd(o, (i + (l>>32))) >> 31)); -} - -#define __CLC_MUL_HI_VEC(GENTYPE) \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##2 mul_hi(GENTYPE##2 x, GENTYPE##2 y){ \ - return (GENTYPE##2){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##3 mul_hi(GENTYPE##3 x, GENTYPE##3 y){ \ - return (GENTYPE##3){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1), mul_hi(x.s2, y.s2)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##4 mul_hi(GENTYPE##4 x, GENTYPE##4 y){ \ - return (GENTYPE##4){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##8 mul_hi(GENTYPE##8 x, GENTYPE##8 y){ \ - return (GENTYPE##8){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##16 mul_hi(GENTYPE##16 x, GENTYPE##16 y){ \ - return (GENTYPE##16){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \ - } \ - -#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \ - __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \ - __CLC_MUL_HI_VEC(TYPE) - -#define __CLC_MUL_HI_TYPES() \ - __CLC_MUL_HI_DEC_IMPL(short, char, 8) \ - __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \ - __CLC_MUL_HI_DEC_IMPL(int, short, 16) \ - __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \ - __CLC_MUL_HI_DEC_IMPL(long, int, 32) \ - __CLC_MUL_HI_DEC_IMPL(ulong, uint, 32) \ - __CLC_MUL_HI_VEC(long) \ - __CLC_MUL_HI_VEC(ulong) - -__CLC_MUL_HI_TYPES() - -#undef __CLC_MUL_HI_TYPES -#undef __CLC_MUL_HI_DEC_IMPL -#undef __CLC_MUL_HI_IMPL -#undef __CLC_MUL_HI_VEC -#undef __CLC_B32 +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/popcount.cl b/libclc/generic/lib/integer/popcount.cl index ca83b1afaf9dab..4e8a6ebab65967 100644 --- a/libclc/generic/lib/integer/popcount.cl +++ b/libclc/generic/lib/integer/popcount.cl @@ -1,8 +1,7 @@ #include <clc/clc.h> -#include <integer/popcount.h> +#include <clc/integer/clc_popcount.h> -#define __CLC_FUNC popcount -#define __CLC_IMPL_FUNC __clc_native_popcount +#define FUNCTION popcount +#define __CLC_BODY <clc/shared/unary_def.inc> -#define __CLC_BODY "../clc_unary.inc" #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/rhadd.cl b/libclc/generic/lib/integer/rhadd.cl index c985870f7c7a24..b1cb492cad8b86 100644 --- a/libclc/generic/lib/integer/rhadd.cl +++ b/libclc/generic/lib/integer/rhadd.cl @@ -1,4 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_rhadd.h> + +#define FUNCTION rhadd +#define __CLC_BODY <clc/shared/binary_def.inc> -#define __CLC_BODY <rhadd.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/rhadd.inc b/libclc/generic/lib/integer/rhadd.inc deleted file mode 100644 index 3d6076874808e6..00000000000000 --- a/libclc/generic/lib/integer/rhadd.inc +++ /dev/null @@ -1,6 +0,0 @@ -//rhadd = (x+y+1)>>1 -//This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit set) -//This saves us having to do any checks for overflow in the addition sums -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y) { - return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+((x&(__CLC_GENTYPE)1)|(y&(__CLC_GENTYPE)1)); -} diff --git a/libclc/generic/lib/integer/upsample.cl b/libclc/generic/lib/integer/upsample.cl index da77315f8f9344..984a731e3b4d12 100644 --- a/libclc/generic/lib/integer/upsample.cl +++ b/libclc/generic/lib/integer/upsample.cl @@ -1,32 +1,34 @@ #include <clc/clc.h> +#include <clc/integer/clc_upsample.h> -#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE upsample(GENTYPE hi, UGENTYPE lo){ \ - return ((BGENTYPE)hi << GENSIZE) | lo; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##2 upsample(GENTYPE##2 hi, UGENTYPE##2 lo){ \ - return (BGENTYPE##2){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##3 upsample(GENTYPE##3 hi, UGENTYPE##3 lo){ \ - return (BGENTYPE##3){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1), upsample(hi.s2, lo.s2)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##4 upsample(GENTYPE##4 hi, UGENTYPE##4 lo){ \ - return (BGENTYPE##4){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##8 upsample(GENTYPE##8 hi, UGENTYPE##8 lo){ \ - return (BGENTYPE##8){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##16 upsample(GENTYPE##16 hi, UGENTYPE##16 lo){ \ - return (BGENTYPE##16){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \ - } \ +#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE) \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE upsample(GENTYPE hi, UGENTYPE lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##2 upsample(GENTYPE##2 hi, UGENTYPE##2 lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##3 upsample(GENTYPE##3 hi, UGENTYPE##3 lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##4 upsample(GENTYPE##4 hi, UGENTYPE##4 lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##8 upsample(GENTYPE##8 hi, UGENTYPE##8 lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##16 upsample(GENTYPE##16 hi, \ + UGENTYPE##16 lo) { \ + return __clc_upsample(hi, lo); \ + } -#define __CLC_UPSAMPLE_TYPES() \ - __CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \ - __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \ - __CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \ - __CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \ - __CLC_UPSAMPLE_IMPL(long, int, uint, 32) \ - __CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32) \ +#define __CLC_UPSAMPLE_TYPES() \ + __CLC_UPSAMPLE_IMPL(short, char, uchar) \ + __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar) \ + __CLC_UPSAMPLE_IMPL(int, short, ushort) \ + __CLC_UPSAMPLE_IMPL(uint, ushort, ushort) \ + __CLC_UPSAMPLE_IMPL(long, int, uint) \ + __CLC_UPSAMPLE_IMPL(ulong, uint, uint) __CLC_UPSAMPLE_TYPES() diff --git a/libclc/generic/lib/math/clc_fma.cl b/libclc/generic/lib/math/clc_fma.cl index 3f29e7f92615a9..33f5072425d986 100644 --- a/libclc/generic/lib/math/clc_fma.cl +++ b/libclc/generic/lib/math/clc_fma.cl @@ -23,6 +23,7 @@ #include <clc/clc.h> #include <clc/clcmacro.h> #include <clc/integer/clc_abs.h> +#include <clc/integer/clc_clz.h> #include <clc/math/clc_subnormal_config.h> #include <clc/math/math.h> #include <clc/relational/clc_isinf.h> @@ -118,7 +119,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) { } // detect overflow/underflow - int overflow_bits = 3 - clz(st_fma.mantissa); + int overflow_bits = 3 - __clc_clz(st_fma.mantissa); // adjust exponent st_fma.exponent += overflow_bits; diff --git a/libclc/generic/lib/math/clc_fmod.cl b/libclc/generic/lib/math/clc_fmod.cl index db47536833342f..35298b7e42d5c0 100644 --- a/libclc/generic/lib/math/clc_fmod.cl +++ b/libclc/generic/lib/math/clc_fmod.cl @@ -22,6 +22,7 @@ #include <clc/clc.h> #include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> #include <clc/math/clc_floor.h> #include <clc/math/clc_subnormal_config.h> #include <clc/math/clc_trunc.h> @@ -87,14 +88,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) ulong xsgn = ux ^ ax; double dx = as_double(ax); int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); - int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64); + int xexp1 = 11 - (int) __clc_clz(ax & MANTBITS_DP64); xexp1 = xexp < 1 ? xexp1 : xexp; ulong uy = as_ulong(y); ulong ay = uy & ~SIGNBIT_DP64; double dy = as_double(ay); int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); - int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64); + int yexp1 = 11 - (int) __clc_clz(ay & MANTBITS_DP64); yexp1 = yexp < 1 ? yexp1 : yexp; // First assume |x| > |y| diff --git a/libclc/generic/lib/math/clc_remainder.cl b/libclc/generic/lib/math/clc_remainder.cl index e9d2e382d9beb3..3a357de6f1962f 100644 --- a/libclc/generic/lib/math/clc_remainder.cl +++ b/libclc/generic/lib/math/clc_remainder.cl @@ -22,6 +22,7 @@ #include <clc/clc.h> #include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> #include <clc/math/clc_floor.h> #include <clc/math/clc_subnormal_config.h> #include <clc/math/clc_trunc.h> @@ -95,14 +96,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) ulong xsgn = ux ^ ax; double dx = as_double(ax); int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); - int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64); + int xexp1 = 11 - (int) __clc_clz(ax & MANTBITS_DP64); xexp1 = xexp < 1 ? xexp1 : xexp; ulong uy = as_ulong(y); ulong ay = uy & ~SIGNBIT_DP64; double dy = as_double(ay); int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); - int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64); + int yexp1 = 11 - (int) __clc_clz(ay & MANTBITS_DP64); yexp1 = yexp < 1 ? yexp1 : yexp; int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1; diff --git a/libclc/generic/lib/math/clc_remquo.cl b/libclc/generic/lib/math/clc_remquo.cl index 9cbda094294ad1..af3e7a2b07500d 100644 --- a/libclc/generic/lib/math/clc_remquo.cl +++ b/libclc/generic/lib/math/clc_remquo.cl @@ -22,6 +22,7 @@ #include <clc/clc.h> #include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> #include <clc/math/clc_floor.h> #include <clc/math/clc_subnormal_config.h> #include <clc/math/clc_trunc.h> @@ -134,14 +135,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, ulong xsgn = ux ^ ax; double dx = as_double(ax); int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); - int xexp1 = 11 - (int)clz(ax & MANTBITS_DP64); + int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64); xexp1 = xexp < 1 ? xexp1 : xexp; ulong uy = as_ulong(y); ulong ay = uy & ~SIGNBIT_DP64; double dy = as_double(ay); int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); - int yexp1 = 11 - (int)clz(ay & MANTBITS_DP64); + int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64); yexp1 = yexp < 1 ? yexp1 : yexp; int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1; diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl index 6afb1887d77979..22f2bf61bf27d7 100644 --- a/libclc/generic/lib/math/sincos_helpers.cl +++ b/libclc/generic/lib/math/sincos_helpers.cl @@ -22,6 +22,8 @@ #include "sincos_helpers.h" #include <clc/clc.h> +#include <clc/integer/clc_clz.h> +#include <clc/integer/clc_mul_hi.h> #include <clc/math/clc_mad.h> #include <clc/math/clc_trunc.h> #include <clc/math/math.h> @@ -179,11 +181,11 @@ _CLC_DEF int __clc_argReductionSmallS(float *r, float *rr, float x) { #define FULL_MUL(A, B, HI, LO) \ LO = A * B; \ - HI = mul_hi(A, B) + HI = __clc_mul_hi(A, B) #define FULL_MAD(A, B, C, HI, LO) \ LO = ((A) * (B) + (C)); \ - HI = mul_hi(A, B); \ + HI = __clc_mul_hi(A, B); \ HI += LO < C _CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, float x) { @@ -277,7 +279,7 @@ _CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, float x) { p5 = p5 ^ flip; // Find exponent and shift away leading zeroes and hidden bit - xe = clz(p7) + 1; + xe = __clc_clz(p7) + 1; shift = 32 - xe; p7 = bitalign(p7, p6, shift); p6 = bitalign(p6, p5, shift); @@ -290,7 +292,7 @@ _CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, float x) { // Get 24 more bits of fraction in another float, there are not long strings // of zeroes here - int xxe = clz(p7) + 1; + int xxe = __clc_clz(p7) + 1; p7 = bitalign(p7, p6, 32 - xxe); float q0 = as_float(sign | ((127 - (xe + 23 + xxe)) << 23) | (p7 >> 9)); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits