https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/116786
This commit moves over the OpenCL clz, hadd, mad24, mad_hi, mul24, mul_hi, popcount, rhadd, and upsample builtins to the CLC library. There are no changes to any target's CLC libraries. The OpenCL mad_hi builtin wasn't previously publicly available from the CLC libraries, as it was hash-defined to mul_hi in the header files. That issue has been fixed, and mad_hi is now exposed. The custom AMD implementation/workaround for popcount has been removed as it was only valid for clang < 7. There are still three integer functions which haven't been moved over. The OpenCL add_sat, sub_sat, and mad_sat builtins require saturating conversion builtins which haven't yet been ported. >From 3f05aee5651a8364d4f3ba45bfc8024ff5beec8c Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 18 Nov 2024 17:29:42 +0000 Subject: [PATCH] [libclc] Move several integer functions to CLC library This commit moves over the OpenCL clz, hadd, mad24, mad_hi, mul24, mul_hi, popcount, rhadd, and upsample builtins to the CLC library. There are no changes to any target's CLC libraries. The OpenCL mad_hi builtin wasn't previously publicly available from the CLC libraries, as it was hash-defined to mul_hi in the header files. That issue has been fixed, and mad_hi is now exposed. The custom AMD implementation/workaround for popcount has been removed as it was only valid for clang < 7. There are still two integer functions which haven't been moved over. The OpenCL add_sat, sub_sat, and mad_sat builtins require saturating conversion builtins which haven't yet been ported. --- libclc/amdgcn/lib/SOURCES | 1 - libclc/amdgcn/lib/integer/popcount.cl | 6 - libclc/amdgcn/lib/integer/popcount.inc | 17 --- libclc/clc/include/clc/integer/binary_decl.h | 2 + libclc/clc/include/clc/integer/clc_clz.h | 21 +++ libclc/clc/include/clc/integer/clc_hadd.h | 21 +++ libclc/clc/include/clc/integer/clc_mad24.h | 21 +++ libclc/clc/include/clc/integer/clc_mad_hi.h | 8 ++ libclc/clc/include/clc/integer/clc_mul24.h | 21 +++ libclc/clc/include/clc/integer/clc_mul_hi.h | 21 +++ libclc/clc/include/clc/integer/clc_popcount.h | 19 +++ libclc/clc/include/clc/integer/clc_rhadd.h | 21 +++ libclc/clc/include/clc/integer/clc_upsample.h | 38 +++++ .../include/clc/integer/definitions.h | 7 +- libclc/clc/include/clc/integer/gentype24.inc | 134 ++++++++++++++++++ libclc/clc/include/clc/integer/ternary_decl.h | 2 + libclc/clc/include/clc/integer/unary_decl.h | 1 + .../clc/include/clc/integer/unary_intrin.inc | 26 ++++ libclc/clc/lib/generic/SOURCES | 7 + libclc/clc/lib/generic/integer/clc_clz.cl | 44 ++++++ libclc/clc/lib/generic/integer/clc_hadd.cl | 4 + libclc/clc/lib/generic/integer/clc_hadd.inc | 8 ++ libclc/clc/lib/generic/integer/clc_mad24.cl | 5 + libclc/clc/lib/generic/integer/clc_mad24.inc | 5 + libclc/clc/lib/generic/integer/clc_mul24.cl | 4 + .../lib/generic/integer/clc_mul24.inc} | 4 +- libclc/clc/lib/generic/integer/clc_mul_hi.cl | 113 +++++++++++++++ libclc/clc/lib/generic/integer/clc_rhadd.cl | 4 + libclc/clc/lib/generic/integer/clc_rhadd.inc | 8 ++ .../clc/lib/generic/integer/clc_upsample.cl | 45 ++++++ libclc/generic/include/clc/integer/clz.h | 6 +- libclc/generic/include/clc/integer/clz.inc | 1 - libclc/generic/include/clc/integer/hadd.h | 6 +- libclc/generic/include/clc/integer/hadd.inc | 1 - libclc/generic/include/clc/integer/mad24.h | 9 +- libclc/generic/include/clc/integer/mad24.inc | 1 - libclc/generic/include/clc/integer/mad_hi.h | 7 +- libclc/generic/include/clc/integer/mul24.h | 9 +- libclc/generic/include/clc/integer/mul24.inc | 1 - libclc/generic/include/clc/integer/mul_hi.h | 6 +- libclc/generic/include/clc/integer/mul_hi.inc | 1 - libclc/generic/include/clc/integer/popcount.h | 9 +- libclc/generic/include/clc/integer/rhadd.h | 6 +- libclc/generic/include/clc/integer/rhadd.inc | 1 - libclc/generic/include/clc/integer/upsample.h | 33 +++-- libclc/generic/include/integer/popcount.h | 3 - .../generic/include/integer/unary_intrin.inc | 20 --- libclc/generic/lib/SOURCES | 1 + libclc/generic/lib/integer/binary_def.inc | 8 ++ libclc/generic/lib/integer/clz.cl | 44 +----- libclc/generic/lib/integer/hadd.cl | 5 +- libclc/generic/lib/integer/hadd.inc | 6 - libclc/generic/lib/integer/mad24.cl | 7 +- libclc/generic/lib/integer/mad24.inc | 3 - libclc/generic/lib/integer/mad_hi.cl | 7 + libclc/generic/lib/integer/mul24.cl | 7 +- libclc/generic/lib/integer/mul_hi.cl | 110 +------------- libclc/generic/lib/integer/popcount.cl | 7 +- libclc/generic/lib/integer/rhadd.cl | 5 +- libclc/generic/lib/integer/rhadd.inc | 6 - libclc/generic/lib/integer/ternary_def.inc | 8 ++ libclc/generic/lib/integer/unary_def.inc | 7 + libclc/generic/lib/integer/upsample.cl | 54 +++---- libclc/generic/lib/math/clc_fma.cl | 3 +- libclc/generic/lib/math/clc_fmod.cl | 5 +- libclc/generic/lib/math/clc_remainder.cl | 5 +- libclc/generic/lib/math/clc_remquo.cl | 5 +- libclc/generic/lib/math/sincos_helpers.cl | 20 +-- 68 files changed, 780 insertions(+), 301 deletions(-) delete mode 100644 libclc/amdgcn/lib/integer/popcount.cl delete mode 100644 libclc/amdgcn/lib/integer/popcount.inc create mode 100644 libclc/clc/include/clc/integer/binary_decl.h create mode 100644 libclc/clc/include/clc/integer/clc_clz.h create mode 100644 libclc/clc/include/clc/integer/clc_hadd.h create mode 100644 libclc/clc/include/clc/integer/clc_mad24.h create mode 100644 libclc/clc/include/clc/integer/clc_mad_hi.h create mode 100644 libclc/clc/include/clc/integer/clc_mul24.h create mode 100644 libclc/clc/include/clc/integer/clc_mul_hi.h create mode 100644 libclc/clc/include/clc/integer/clc_popcount.h create mode 100644 libclc/clc/include/clc/integer/clc_rhadd.h create mode 100644 libclc/clc/include/clc/integer/clc_upsample.h rename libclc/{generic => clc}/include/clc/integer/definitions.h (71%) create mode 100644 libclc/clc/include/clc/integer/gentype24.inc create mode 100644 libclc/clc/include/clc/integer/ternary_decl.h create mode 100644 libclc/clc/include/clc/integer/unary_decl.h create mode 100644 libclc/clc/include/clc/integer/unary_intrin.inc create mode 100644 libclc/clc/lib/generic/integer/clc_clz.cl create mode 100644 libclc/clc/lib/generic/integer/clc_hadd.cl create mode 100644 libclc/clc/lib/generic/integer/clc_hadd.inc create mode 100644 libclc/clc/lib/generic/integer/clc_mad24.cl create mode 100644 libclc/clc/lib/generic/integer/clc_mad24.inc create mode 100644 libclc/clc/lib/generic/integer/clc_mul24.cl rename libclc/{generic/lib/integer/mul24.inc => clc/lib/generic/integer/clc_mul24.inc} (68%) create mode 100644 libclc/clc/lib/generic/integer/clc_mul_hi.cl create mode 100644 libclc/clc/lib/generic/integer/clc_rhadd.cl create mode 100644 libclc/clc/lib/generic/integer/clc_rhadd.inc create mode 100644 libclc/clc/lib/generic/integer/clc_upsample.cl delete mode 100644 libclc/generic/include/clc/integer/clz.inc delete mode 100644 libclc/generic/include/clc/integer/hadd.inc delete mode 100644 libclc/generic/include/clc/integer/mad24.inc delete mode 100644 libclc/generic/include/clc/integer/mul24.inc delete mode 100644 libclc/generic/include/clc/integer/mul_hi.inc delete mode 100644 libclc/generic/include/clc/integer/rhadd.inc delete mode 100644 libclc/generic/include/integer/popcount.h delete mode 100644 libclc/generic/include/integer/unary_intrin.inc create mode 100644 libclc/generic/lib/integer/binary_def.inc delete mode 100644 libclc/generic/lib/integer/hadd.inc delete mode 100644 libclc/generic/lib/integer/mad24.inc create mode 100644 libclc/generic/lib/integer/mad_hi.cl delete mode 100644 libclc/generic/lib/integer/rhadd.inc create mode 100644 libclc/generic/lib/integer/ternary_def.inc create mode 100644 libclc/generic/lib/integer/unary_def.inc diff --git a/libclc/amdgcn/lib/SOURCES b/libclc/amdgcn/lib/SOURCES index b235457f9ab7c3..4ea66385fe50ee 100644 --- a/libclc/amdgcn/lib/SOURCES +++ b/libclc/amdgcn/lib/SOURCES @@ -1,5 +1,4 @@ cl_khr_int64_extended_atomics/minmax_helpers.ll -integer/popcount.cl math/fmax.cl math/fmin.cl math/ldexp.cl diff --git a/libclc/amdgcn/lib/integer/popcount.cl b/libclc/amdgcn/lib/integer/popcount.cl deleted file mode 100644 index 3b493fbd146f01..00000000000000 --- a/libclc/amdgcn/lib/integer/popcount.cl +++ /dev/null @@ -1,6 +0,0 @@ -#include <clc/clc.h> -#include <clc/utils.h> -#include <integer/popcount.h> - -#define __CLC_BODY "popcount.inc" -#include <clc/integer/gentype.inc> diff --git a/libclc/amdgcn/lib/integer/popcount.inc b/libclc/amdgcn/lib/integer/popcount.inc deleted file mode 100644 index 402ddb768c6a6f..00000000000000 --- a/libclc/amdgcn/lib/integer/popcount.inc +++ /dev/null @@ -1,17 +0,0 @@ -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE popcount(__CLC_GENTYPE x) { -/* LLVM-4+ implements i16 ops for VI+ ASICs. However, ctpop implementation - * is missing until r326535. Therefore we have to convert sub i32 types to uint - * as a workaround. */ -#if __clang_major__ < 7 && __clang_major__ > 3 && __CLC_GENSIZE < 32 - /* Prevent sign extension on uint conversion */ - const __CLC_U_GENTYPE y = __CLC_XCONCAT(as_, __CLC_U_GENTYPE)(x); - /* Convert to uintX */ - const __CLC_XCONCAT(uint, __CLC_VECSIZE) z = __CLC_XCONCAT(convert_uint, __CLC_VECSIZE)(y); - /* Call popcount on uintX type */ - const __CLC_XCONCAT(uint, __CLC_VECSIZE) res = __clc_native_popcount(z); - /* Convert the result back to gentype. */ - return __CLC_XCONCAT(convert_, __CLC_GENTYPE)(res); -#else - return __clc_native_popcount(x); -#endif -} diff --git a/libclc/clc/include/clc/integer/binary_decl.h b/libclc/clc/include/clc/integer/binary_decl.h new file mode 100644 index 00000000000000..b54f36ba9b6c3c --- /dev/null +++ b/libclc/clc/include/clc/integer/binary_decl.h @@ -0,0 +1,2 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, + __CLC_GENTYPE y); diff --git a/libclc/clc/include/clc/integer/clc_clz.h b/libclc/clc/include/clc/integer/clc_clz.h new file mode 100644 index 00000000000000..1e2d23084bf7e3 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_clz.h @@ -0,0 +1,21 @@ +#ifndef __CLC_INTEGER_CLC_CLZ_H__ +#define __CLC_INTEGER_CLC_CLZ_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible clz +#define __clc_clz clz +#else + +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + +#define FUNCTION __clc_clz +#define __CLC_BODY "unary_decl.h" + +#include <clc/integer/gentype.inc> + +#undef FUNCTION + +#endif + +#endif // __CLC_INTEGER_CLC_CLZ_H__ diff --git a/libclc/clc/include/clc/integer/clc_hadd.h b/libclc/clc/include/clc/integer/clc_hadd.h new file mode 100644 index 00000000000000..7eb91ae45a8085 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_hadd.h @@ -0,0 +1,21 @@ +#ifndef __CLC_INTEGER_CLC_HADD_H__ +#define __CLC_INTEGER_CLC_HADD_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible hadd +#define __clc_hadd hadd +#else + +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + +#define FUNCTION __clc_hadd +#define __CLC_BODY "binary_decl.h" + +#include <clc/integer/gentype.inc> + +#undef FUNCTION + +#endif + +#endif // __CLC_INTEGER_CLC_HADD_H__ diff --git a/libclc/clc/include/clc/integer/clc_mad24.h b/libclc/clc/include/clc/integer/clc_mad24.h new file mode 100644 index 00000000000000..354b019e86688c --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_mad24.h @@ -0,0 +1,21 @@ +#ifndef __CLC_INTEGER_CLC_MAD24_H__ +#define __CLC_INTEGER_CLC_MAD24_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible mad24 +#define __clc_mad24 mad24 +#else + +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + +#define FUNCTION __clc_mad24 +#define __CLC_BODY "ternary_decl.h" + +#include <clc/integer/gentype24.inc> + +#undef FUNCTION + +#endif + +#endif // __CLC_INTEGER_CLC_MAD24_H__ diff --git a/libclc/clc/include/clc/integer/clc_mad_hi.h b/libclc/clc/include/clc/integer/clc_mad_hi.h new file mode 100644 index 00000000000000..24a590df6027a8 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_mad_hi.h @@ -0,0 +1,8 @@ +#ifndef __CLC_INTEGER_CLC_MAD_HI_H__ +#define __CLC_INTEGER_CLC_MAD_HI_H__ + +#include <clc/integer/clc_mul_hi.h> + +#define __clc_mad_hi(a, b, c) (__clc_mul_hi((a), (b)) + (c)) + +#endif // __CLC_INTEGER_CLC_MAD_HI_H__ diff --git a/libclc/clc/include/clc/integer/clc_mul24.h b/libclc/clc/include/clc/integer/clc_mul24.h new file mode 100644 index 00000000000000..3355a97affea3c --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_mul24.h @@ -0,0 +1,21 @@ +#ifndef __CLC_INTEGER_CLC_MUL24_H__ +#define __CLC_INTEGER_CLC_MUL24_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible mul24 +#define __clc_mul24 mul24 +#else + +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + +#define FUNCTION __clc_mul24 +#define __CLC_BODY "binary_decl.h" + +#include <clc/integer/gentype24.inc> + +#undef FUNCTION + +#endif + +#endif // __CLC_INTEGER_CLC_MUL24_H__ diff --git a/libclc/clc/include/clc/integer/clc_mul_hi.h b/libclc/clc/include/clc/integer/clc_mul_hi.h new file mode 100644 index 00000000000000..65b5dce04ab9fb --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_mul_hi.h @@ -0,0 +1,21 @@ +#ifndef __CLC_INTEGER_CLC_MUL_HI_H__ +#define __CLC_INTEGER_CLC_MUL_HI_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible mul_hi +#define __clc_mul_hi mul_hi +#else + +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + +#define FUNCTION __clc_mul_hi +#define __CLC_BODY "binary_decl.h" + +#include <clc/integer/gentype.inc> + +#undef FUNCTION + +#endif + +#endif // __CLC_INTEGER_CLC_MUL_HI_H__ diff --git a/libclc/clc/include/clc/integer/clc_popcount.h b/libclc/clc/include/clc/integer/clc_popcount.h new file mode 100644 index 00000000000000..7e785a5c1ebe7e --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_popcount.h @@ -0,0 +1,19 @@ +#ifndef __CLC_INTEGER_CLC_POPCOUNT_H__ +#define __CLC_INTEGER_CLC_POPCOUNT_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible popcount +#define __clc_popcount popcount +#else + +// Map the function to an LLVM intrinsic +#define __CLC_FUNCTION __clc_popcount +#define __CLC_INTRINSIC "llvm.ctpop" +#include <clc/integer/unary_intrin.inc> + +#undef __CLC_INTRINSIC +#undef __CLC_FUNCTION + +#endif + +#endif // __CLC_INTEGER_CLC_POPCOUNT_H__ diff --git a/libclc/clc/include/clc/integer/clc_rhadd.h b/libclc/clc/include/clc/integer/clc_rhadd.h new file mode 100644 index 00000000000000..1fe3920a320ffb --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_rhadd.h @@ -0,0 +1,21 @@ +#ifndef __CLC_INTEGER_CLC_RHADD_H__ +#define __CLC_INTEGER_CLC_RHADD_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible rhadd +#define __clc_rhadd rhadd +#else + +#include <clc/clcfunc.h> +#include <clc/clctypes.h> + +#define FUNCTION __clc_rhadd +#define __CLC_BODY "binary_decl.h" + +#include <clc/integer/gentype.inc> + +#undef FUNCTION + +#endif + +#endif // __CLC_INTEGER_CLC_RHADD_H__ diff --git a/libclc/clc/include/clc/integer/clc_upsample.h b/libclc/clc/include/clc/integer/clc_upsample.h new file mode 100644 index 00000000000000..aebda96434fb41 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_upsample.h @@ -0,0 +1,38 @@ +#ifndef __CLC_INTEGER_CLC_UPSAMPLE_H__ +#define __CLC_INTEGER_CLC_UPSAMPLE_H__ + +#if defined(CLC_CLSPV) || defined(CLC_SPIRV) +// clspv and spir-v targets provide their own OpenCL-compatible upsample +#define __clc_upsample upsample +#else + +#include <clc/clctypes.h> + +#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ + _CLC_OVERLOAD _CLC_DECL BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo); + +#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \ + __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) + +#define __CLC_UPSAMPLE_TYPES() \ + __CLC_UPSAMPLE_VEC(short, char, uchar) \ + __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \ + __CLC_UPSAMPLE_VEC(int, short, ushort) \ + __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \ + __CLC_UPSAMPLE_VEC(long, int, uint) \ + __CLC_UPSAMPLE_VEC(ulong, uint, uint) + +__CLC_UPSAMPLE_TYPES() + +#undef __CLC_UPSAMPLE_TYPES +#undef __CLC_UPSAMPLE_DECL +#undef __CLC_UPSAMPLE_VEC + +#endif + +#endif // __CLC_INTEGER_CLC_UPSAMPLE_H__ diff --git a/libclc/generic/include/clc/integer/definitions.h b/libclc/clc/include/clc/integer/definitions.h similarity index 71% rename from libclc/generic/include/clc/integer/definitions.h rename to libclc/clc/include/clc/integer/definitions.h index 0079c30123db80..18a9e54dec75c6 100644 --- a/libclc/generic/include/clc/integer/definitions.h +++ b/libclc/clc/include/clc/integer/definitions.h @@ -1,7 +1,10 @@ +#ifndef __CLC_INTEGER_DEFINITIONS_H__ +#define __CLC_INTEGER_DEFINITIONS_H__ + #define CHAR_BIT 8 #define INT_MAX 2147483647 #define INT_MIN (-2147483647 - 1) -#define LONG_MAX 0x7fffffffffffffffL +#define LONG_MAX 0x7fffffffffffffffL #define LONG_MIN (-0x7fffffffffffffffL - 1) #define CHAR_MAX SCHAR_MAX #define CHAR_MIN SCHAR_MIN @@ -13,3 +16,5 @@ #define USHRT_MAX 65535 #define UINT_MAX 0xffffffff #define ULONG_MAX 0xffffffffffffffffUL + +#endif // __CLC_INTEGER_DEFINITIONS_H__ diff --git a/libclc/clc/include/clc/integer/gentype24.inc b/libclc/clc/include/clc/integer/gentype24.inc new file mode 100644 index 00000000000000..12859029312405 --- /dev/null +++ b/libclc/clc/include/clc/integer/gentype24.inc @@ -0,0 +1,134 @@ +#define __CLC_GENSIZE 32 +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE int + +#define __CLC_GENTYPE int +#define __CLC_U_GENTYPE uint +#define __CLC_S_GENTYPE int +#define __CLC_SCALAR 1 +#define __CLC_VECSIZE +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int2 +#define __CLC_U_GENTYPE uint2 +#define __CLC_S_GENTYPE int2 +#define __CLC_VECSIZE 2 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int3 +#define __CLC_U_GENTYPE uint3 +#define __CLC_S_GENTYPE int3 +#define __CLC_VECSIZE 3 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int4 +#define __CLC_U_GENTYPE uint4 +#define __CLC_S_GENTYPE int4 +#define __CLC_VECSIZE 4 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int8 +#define __CLC_U_GENTYPE uint8 +#define __CLC_S_GENTYPE int8 +#define __CLC_VECSIZE 8 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE int16 +#define __CLC_U_GENTYPE uint16 +#define __CLC_S_GENTYPE int16 +#define __CLC_VECSIZE 16 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_SCALAR_GENTYPE +#define __CLC_SCALAR_GENTYPE uint + +#define __CLC_GENTYPE uint +#define __CLC_U_GENTYPE uint +#define __CLC_S_GENTYPE int +#define __CLC_SCALAR 1 +#define __CLC_VECSIZE +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_SCALAR +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint2 +#define __CLC_U_GENTYPE uint2 +#define __CLC_S_GENTYPE int2 +#define __CLC_VECSIZE 2 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint3 +#define __CLC_U_GENTYPE uint3 +#define __CLC_S_GENTYPE int3 +#define __CLC_VECSIZE 3 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint4 +#define __CLC_U_GENTYPE uint4 +#define __CLC_S_GENTYPE int4 +#define __CLC_VECSIZE 4 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint8 +#define __CLC_U_GENTYPE uint8 +#define __CLC_S_GENTYPE int8 +#define __CLC_VECSIZE 8 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#define __CLC_GENTYPE uint16 +#define __CLC_U_GENTYPE uint16 +#define __CLC_S_GENTYPE int16 +#define __CLC_VECSIZE 16 +#include __CLC_BODY +#undef __CLC_VECSIZE +#undef __CLC_GENTYPE +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE + +#undef __CLC_GENSIZE +#undef __CLC_SCALAR_GENTYPE +#undef __CLC_BODY diff --git a/libclc/clc/include/clc/integer/ternary_decl.h b/libclc/clc/include/clc/integer/ternary_decl.h new file mode 100644 index 00000000000000..495d5c800c62ad --- /dev/null +++ b/libclc/clc/include/clc/integer/ternary_decl.h @@ -0,0 +1,2 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, __CLC_GENTYPE y, + __CLC_GENTYPE z); diff --git a/libclc/clc/include/clc/integer/unary_decl.h b/libclc/clc/include/clc/integer/unary_decl.h new file mode 100644 index 00000000000000..cf482efb55183e --- /dev/null +++ b/libclc/clc/include/clc/integer/unary_decl.h @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x); diff --git a/libclc/clc/include/clc/integer/unary_intrin.inc b/libclc/clc/include/clc/integer/unary_intrin.inc new file mode 100644 index 00000000000000..169999a8260485 --- /dev/null +++ b/libclc/clc/include/clc/integer/unary_intrin.inc @@ -0,0 +1,26 @@ +#define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \ + _CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm( \ + __CLC_INTRINSIC ".i" BIT_SIZE); \ + _CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) __asm( \ + __CLC_INTRINSIC ".v2i" BIT_SIZE); \ + _CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) __asm( \ + __CLC_INTRINSIC ".v3i" BIT_SIZE); \ + _CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) __asm( \ + __CLC_INTRINSIC ".v4i" BIT_SIZE); \ + _CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) __asm( \ + __CLC_INTRINSIC ".v8i" BIT_SIZE); \ + _CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) __asm( \ + __CLC_INTRINSIC ".v16i" BIT_SIZE); + +__CLC_INTRINSIC_DEF(char, "8") +__CLC_INTRINSIC_DEF(uchar, "8") +__CLC_INTRINSIC_DEF(short, "16") +__CLC_INTRINSIC_DEF(ushort, "16") +__CLC_INTRINSIC_DEF(int, "32") +__CLC_INTRINSIC_DEF(uint, "32") +__CLC_INTRINSIC_DEF(long, "64") +__CLC_INTRINSIC_DEF(ulong, "64") + +#undef __CLC_FUNCTION +#undef __CLC_INTRINSIC +#undef __CLC_INTRINSIC_DEF diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index d7ffaaf6dc3f42..72c5821176ce8e 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -1,6 +1,13 @@ geometric/clc_dot.cl integer/clc_abs.cl integer/clc_abs_diff.cl +integer/clc_clz.cl +integer/clc_hadd.cl +integer/clc_mad24.cl +integer/clc_mul24.cl +integer/clc_mul_hi.cl +integer/clc_rhadd.cl +integer/clc_upsample.cl relational/clc_all.cl relational/clc_any.cl relational/clc_bitselect.cl diff --git a/libclc/clc/lib/generic/integer/clc_clz.cl b/libclc/clc/lib/generic/integer/clc_clz.cl new file mode 100644 index 00000000000000..592b65f262bd6b --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_clz.cl @@ -0,0 +1,44 @@ +#include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> +#include <clc/internal/clc.h> + +_CLC_OVERLOAD _CLC_DEF char __clc_clz(char x) { + return __clc_clz((ushort)(uchar)x) - 8; +} + +_CLC_OVERLOAD _CLC_DEF uchar __clc_clz(uchar x) { + return __clc_clz((ushort)x) - 8; +} + +_CLC_OVERLOAD _CLC_DEF short __clc_clz(short x) { + return x ? __builtin_clzs(x) : 16; +} + +_CLC_OVERLOAD _CLC_DEF ushort __clc_clz(ushort x) { + return x ? __builtin_clzs(x) : 16; +} + +_CLC_OVERLOAD _CLC_DEF int __clc_clz(int x) { + return x ? __builtin_clz(x) : 32; +} + +_CLC_OVERLOAD _CLC_DEF uint __clc_clz(uint x) { + return x ? __builtin_clz(x) : 32; +} + +_CLC_OVERLOAD _CLC_DEF long __clc_clz(long x) { + return x ? __builtin_clzl(x) : 64; +} + +_CLC_OVERLOAD _CLC_DEF ulong __clc_clz(ulong x) { + return x ? __builtin_clzl(x) : 64; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __clc_clz, char) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, __clc_clz, uchar) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, __clc_clz, short) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, __clc_clz, ushort) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __clc_clz, int) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, __clc_clz, uint) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, __clc_clz, long) +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, __clc_clz, ulong) diff --git a/libclc/clc/lib/generic/integer/clc_hadd.cl b/libclc/clc/lib/generic/integer/clc_hadd.cl new file mode 100644 index 00000000000000..8e91d41a843aaa --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_hadd.cl @@ -0,0 +1,4 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_hadd.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_hadd.inc b/libclc/clc/lib/generic/integer/clc_hadd.inc new file mode 100644 index 00000000000000..14d921599446b3 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_hadd.inc @@ -0,0 +1,8 @@ +// hadd = (x+y)>>1 +// This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit +// set) This saves us having to do any checks for overflow in the addition sum +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_hadd(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return (x >> (__CLC_GENTYPE)1) + (y >> (__CLC_GENTYPE)1) + + (x & y & (__CLC_GENTYPE)1); +} diff --git a/libclc/clc/lib/generic/integer/clc_mad24.cl b/libclc/clc/lib/generic/integer/clc_mad24.cl new file mode 100644 index 00000000000000..86c319cff6d245 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_mad24.cl @@ -0,0 +1,5 @@ +#include <clc/internal/clc.h> +#include <clc/integer/clc_mul24.h> + +#define __CLC_BODY <clc_mad24.inc> +#include <clc/integer/gentype24.inc> diff --git a/libclc/clc/lib/generic/integer/clc_mad24.inc b/libclc/clc/lib/generic/integer/clc_mad24.inc new file mode 100644 index 00000000000000..61c8587d4f86fc --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_mad24.inc @@ -0,0 +1,5 @@ +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_mad24(__CLC_GENTYPE x, + __CLC_GENTYPE y, + __CLC_GENTYPE z) { + return __clc_mul24(x, y) + z; +} diff --git a/libclc/clc/lib/generic/integer/clc_mul24.cl b/libclc/clc/lib/generic/integer/clc_mul24.cl new file mode 100644 index 00000000000000..6513a896a8b1d2 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_mul24.cl @@ -0,0 +1,4 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_mul24.inc> +#include <clc/integer/gentype24.inc> diff --git a/libclc/generic/lib/integer/mul24.inc b/libclc/clc/lib/generic/integer/clc_mul24.inc similarity index 68% rename from libclc/generic/lib/integer/mul24.inc rename to libclc/clc/lib/generic/integer/clc_mul24.inc index 95a2f1d6f31bab..d7e8091c98a314 100644 --- a/libclc/generic/lib/integer/mul24.inc +++ b/libclc/clc/lib/generic/integer/clc_mul24.inc @@ -1,10 +1,10 @@ - // We need to use shifts here in order to mantain the sign bit for signed // integers. The compiler should optimize this to (x & 0x00FFFFFF) for // unsigned integers. #define CONVERT_TO_24BIT(x) (((x) << 8) >> 8) -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y){ +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_mul24(__CLC_GENTYPE x, + __CLC_GENTYPE y) { return CONVERT_TO_24BIT(x) * CONVERT_TO_24BIT(y); } diff --git a/libclc/clc/lib/generic/integer/clc_mul_hi.cl b/libclc/clc/lib/generic/integer/clc_mul_hi.cl new file mode 100644 index 00000000000000..07486abac52b73 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_mul_hi.cl @@ -0,0 +1,113 @@ +#include <clc/integer/clc_hadd.h> +#include <clc/integer/definitions.h> +#include <clc/internal/clc.h> + +// For all types EXCEPT long, which is implemented separately +#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \ + _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \ + return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \ + } + +// FOIL-based long mul_hi +// +// Summary: Treat mul_hi(long x, long y) as: +// (a+b) * (c+d) where a and c are the high-order parts of x and y respectively +// and b and d are the low-order parts of x and y. +// Thinking back to algebra, we use FOIL to do the work. + +_CLC_OVERLOAD _CLC_DEF long __clc_mul_hi(long x, long y) { + long f, o, i; + ulong l; + + // Move the high/low halves of x/y into the lower 32-bits of variables so + // that we can multiply them without worrying about overflow. + long x_hi = x >> 32; + long x_lo = x & UINT_MAX; + long y_hi = y >> 32; + long y_lo = y & UINT_MAX; + + // Multiply all of the components according to FOIL method + f = x_hi * y_hi; + o = x_hi * y_lo; + i = x_lo * y_hi; + l = x_lo * y_lo; + + // Now add the components back together in the following steps: + // F: doesn't need to be modified + // O/I: Need to be added together. + // L: Shift right by 32-bits, then add into the sum of O and I + // Once O/I/L are summed up, then shift the sum by 32-bits and add to F. + // + // We use hadd to give us a bit of extra precision for the intermediate sums + // but as a result, we shift by 31 bits instead of 32 + return (long)(f + (__clc_hadd(o, (i + (long)((ulong)l >> 32))) >> 31)); +} + +_CLC_OVERLOAD _CLC_DEF ulong __clc_mul_hi(ulong x, ulong y) { + ulong f, o, i; + ulong l; + + // Move the high/low halves of x/y into the lower 32-bits of variables so + // that we can multiply them without worrying about overflow. + ulong x_hi = x >> 32; + ulong x_lo = x & UINT_MAX; + ulong y_hi = y >> 32; + ulong y_lo = y & UINT_MAX; + + // Multiply all of the components according to FOIL method + f = x_hi * y_hi; + o = x_hi * y_lo; + i = x_lo * y_hi; + l = x_lo * y_lo; + + // Now add the components back together, taking care to respect the fact that: + // F: doesn't need to be modified + // O/I: Need to be added together. + // L: Shift right by 32-bits, then add into the sum of O and I + // Once O/I/L are summed up, then shift the sum by 32-bits and add to F. + // + // We use hadd to give us a bit of extra precision for the intermediate sums + // but as a result, we shift by 31 bits instead of 32 + return (f + (__clc_hadd(o, (i + (l >> 32))) >> 31)); +} + +#define __CLC_MUL_HI_VEC(GENTYPE) \ + _CLC_OVERLOAD _CLC_DEF GENTYPE##2 __clc_mul_hi(GENTYPE##2 x, GENTYPE##2 y) { \ + return (GENTYPE##2){__clc_mul_hi(x.s0, y.s0), __clc_mul_hi(x.s1, y.s1)}; \ + } \ + _CLC_OVERLOAD _CLC_DEF GENTYPE##3 __clc_mul_hi(GENTYPE##3 x, GENTYPE##3 y) { \ + return (GENTYPE##3){__clc_mul_hi(x.s0, y.s0), __clc_mul_hi(x.s1, y.s1), \ + __clc_mul_hi(x.s2, y.s2)}; \ + } \ + _CLC_OVERLOAD _CLC_DEF GENTYPE##4 __clc_mul_hi(GENTYPE##4 x, GENTYPE##4 y) { \ + return (GENTYPE##4){__clc_mul_hi(x.lo, y.lo), __clc_mul_hi(x.hi, y.hi)}; \ + } \ + _CLC_OVERLOAD _CLC_DEF GENTYPE##8 __clc_mul_hi(GENTYPE##8 x, GENTYPE##8 y) { \ + return (GENTYPE##8){__clc_mul_hi(x.lo, y.lo), __clc_mul_hi(x.hi, y.hi)}; \ + } \ + _CLC_OVERLOAD _CLC_DEF GENTYPE##16 __clc_mul_hi(GENTYPE##16 x, \ + GENTYPE##16 y) { \ + return (GENTYPE##16){__clc_mul_hi(x.lo, y.lo), __clc_mul_hi(x.hi, y.hi)}; \ + } + +#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \ + __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \ + __CLC_MUL_HI_VEC(TYPE) + +#define __CLC_MUL_HI_TYPES() \ + __CLC_MUL_HI_DEC_IMPL(short, char, 8) \ + __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \ + __CLC_MUL_HI_DEC_IMPL(int, short, 16) \ + __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \ + __CLC_MUL_HI_DEC_IMPL(long, int, 32) \ + __CLC_MUL_HI_DEC_IMPL(ulong, uint, 32) \ + __CLC_MUL_HI_VEC(long) \ + __CLC_MUL_HI_VEC(ulong) + +__CLC_MUL_HI_TYPES() + +#undef __CLC_MUL_HI_TYPES +#undef __CLC_MUL_HI_DEC_IMPL +#undef __CLC_MUL_HI_IMPL +#undef __CLC_MUL_HI_VEC +#undef __CLC_B32 diff --git a/libclc/clc/lib/generic/integer/clc_rhadd.cl b/libclc/clc/lib/generic/integer/clc_rhadd.cl new file mode 100644 index 00000000000000..00bd2f0ac8058a --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_rhadd.cl @@ -0,0 +1,4 @@ +#include <clc/internal/clc.h> + +#define __CLC_BODY <clc_rhadd.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_rhadd.inc b/libclc/clc/lib/generic/integer/clc_rhadd.inc new file mode 100644 index 00000000000000..d363c42061ffe1 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_rhadd.inc @@ -0,0 +1,8 @@ +// rhadd = (x+y+1)>>1 +// This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit +// set) This saves us having to do any checks for overflow in the addition sums +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rhadd(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return (x >> (__CLC_GENTYPE)1) + (y >> (__CLC_GENTYPE)1) + + ((x & (__CLC_GENTYPE)1) | (y & (__CLC_GENTYPE)1)); +} diff --git a/libclc/clc/lib/generic/integer/clc_upsample.cl b/libclc/clc/lib/generic/integer/clc_upsample.cl new file mode 100644 index 00000000000000..303bb4aa39330a --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_upsample.cl @@ -0,0 +1,45 @@ +#include <clc/internal/clc.h> + +#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \ + return ((BGENTYPE)hi << GENSIZE) | lo; \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##2 __clc_upsample(GENTYPE##2 hi, \ + UGENTYPE##2 lo) { \ + return (BGENTYPE##2){__clc_upsample(hi.s0, lo.s0), \ + __clc_upsample(hi.s1, lo.s1)}; \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##3 __clc_upsample(GENTYPE##3 hi, \ + UGENTYPE##3 lo) { \ + return (BGENTYPE##3){__clc_upsample(hi.s0, lo.s0), \ + __clc_upsample(hi.s1, lo.s1), \ + __clc_upsample(hi.s2, lo.s2)}; \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##4 __clc_upsample(GENTYPE##4 hi, \ + UGENTYPE##4 lo) { \ + return (BGENTYPE##4){__clc_upsample(hi.lo, lo.lo), \ + __clc_upsample(hi.hi, lo.hi)}; \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##8 __clc_upsample(GENTYPE##8 hi, \ + UGENTYPE##8 lo) { \ + return (BGENTYPE##8){__clc_upsample(hi.lo, lo.lo), \ + __clc_upsample(hi.hi, lo.hi)}; \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##16 __clc_upsample(GENTYPE##16 hi, \ + UGENTYPE##16 lo) { \ + return (BGENTYPE##16){__clc_upsample(hi.lo, lo.lo), \ + __clc_upsample(hi.hi, lo.hi)}; \ + } + +#define __CLC_UPSAMPLE_TYPES() \ + __CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \ + __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \ + __CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \ + __CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \ + __CLC_UPSAMPLE_IMPL(long, int, uint, 32) \ + __CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32) + +__CLC_UPSAMPLE_TYPES() + +#undef __CLC_UPSAMPLE_TYPES +#undef __CLC_UPSAMPLE_IMPL diff --git a/libclc/generic/include/clc/integer/clz.h b/libclc/generic/include/clc/integer/clz.h index f7cdbf78ec0607..c50e7878810ec9 100644 --- a/libclc/generic/include/clc/integer/clz.h +++ b/libclc/generic/include/clc/integer/clz.h @@ -1,2 +1,6 @@ -#define __CLC_BODY <clc/integer/clz.inc> +#define FUNCTION clz +#define __CLC_BODY "unary_decl.h" + #include <clc/integer/gentype.inc> + +#undef FUNCTION diff --git a/libclc/generic/include/clc/integer/clz.inc b/libclc/generic/include/clc/integer/clz.inc deleted file mode 100644 index 45826d10c9fafe..00000000000000 --- a/libclc/generic/include/clc/integer/clz.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clz(__CLC_GENTYPE x); diff --git a/libclc/generic/include/clc/integer/hadd.h b/libclc/generic/include/clc/integer/hadd.h index 37304e26cc2d62..24ce4604c88e42 100644 --- a/libclc/generic/include/clc/integer/hadd.h +++ b/libclc/generic/include/clc/integer/hadd.h @@ -1,2 +1,6 @@ -#define __CLC_BODY <clc/integer/hadd.inc> +#define FUNCTION hadd +#define __CLC_BODY "binary_decl.h" + #include <clc/integer/gentype.inc> + +#undef FUNCTION diff --git a/libclc/generic/include/clc/integer/hadd.inc b/libclc/generic/include/clc/integer/hadd.inc deleted file mode 100644 index f698989cef2026..00000000000000 --- a/libclc/generic/include/clc/integer/hadd.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y); diff --git a/libclc/generic/include/clc/integer/mad24.h b/libclc/generic/include/clc/integer/mad24.h index 0c120faac2b15b..7166f3df509cc0 100644 --- a/libclc/generic/include/clc/integer/mad24.h +++ b/libclc/generic/include/clc/integer/mad24.h @@ -1,3 +1,6 @@ -#define __CLC_BODY <clc/integer/mad24.inc> -#include <clc/integer/integer-gentype.inc> -#undef __CLC_BODY +#define FUNCTION mad24 +#define __CLC_BODY "ternary_decl.h" + +#include <clc/integer/gentype24.inc> + +#undef FUNCTION diff --git a/libclc/generic/include/clc/integer/mad24.inc b/libclc/generic/include/clc/integer/mad24.inc deleted file mode 100644 index 81fe0c2a89266c..00000000000000 --- a/libclc/generic/include/clc/integer/mad24.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z); diff --git a/libclc/generic/include/clc/integer/mad_hi.h b/libclc/generic/include/clc/integer/mad_hi.h index 863ce92d9f2d8c..9ff70851a738ad 100644 --- a/libclc/generic/include/clc/integer/mad_hi.h +++ b/libclc/generic/include/clc/integer/mad_hi.h @@ -1 +1,6 @@ -#define mad_hi(a, b, c) (mul_hi((a),(b))+(c)) +#define FUNCTION mad_hi +#define __CLC_BODY "ternary_decl.h" + +#include <clc/integer/gentype.inc> + +#undef FUNCTION diff --git a/libclc/generic/include/clc/integer/mul24.h b/libclc/generic/include/clc/integer/mul24.h index 4f97098d70f0f4..5a84b039260851 100644 --- a/libclc/generic/include/clc/integer/mul24.h +++ b/libclc/generic/include/clc/integer/mul24.h @@ -1,3 +1,6 @@ -#define __CLC_BODY <clc/integer/mul24.inc> -#include <clc/integer/integer-gentype.inc> -#undef __CLC_BODY +#define FUNCTION mul24 +#define __CLC_BODY "binary_decl.h" + +#include <clc/integer/gentype24.inc> + +#undef FUNCTION diff --git a/libclc/generic/include/clc/integer/mul24.inc b/libclc/generic/include/clc/integer/mul24.inc deleted file mode 100644 index 8cbf7c10ac447d..00000000000000 --- a/libclc/generic/include/clc/integer/mul24.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y); diff --git a/libclc/generic/include/clc/integer/mul_hi.h b/libclc/generic/include/clc/integer/mul_hi.h index 27b95d83442f97..89afdead91f173 100644 --- a/libclc/generic/include/clc/integer/mul_hi.h +++ b/libclc/generic/include/clc/integer/mul_hi.h @@ -1,2 +1,6 @@ -#define __CLC_BODY <clc/integer/mul_hi.inc> +#define FUNCTION mul_hi +#define __CLC_BODY "binary_decl.h" + #include <clc/integer/gentype.inc> + +#undef FUNCTION diff --git a/libclc/generic/include/clc/integer/mul_hi.inc b/libclc/generic/include/clc/integer/mul_hi.inc deleted file mode 100644 index ce9e5c0b2c18c8..00000000000000 --- a/libclc/generic/include/clc/integer/mul_hi.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul_hi(__CLC_GENTYPE x, __CLC_GENTYPE y); diff --git a/libclc/generic/include/clc/integer/popcount.h b/libclc/generic/include/clc/integer/popcount.h index 23335f45b6fe3f..1706822794ffb7 100644 --- a/libclc/generic/include/clc/integer/popcount.h +++ b/libclc/generic/include/clc/integer/popcount.h @@ -1,5 +1,6 @@ -#define __CLC_FUNCTION popcount -#define __CLC_BODY <clc/integer/unary.inc> +#define FUNCTION popcount +#define __CLC_BODY "unary_decl.h" + #include <clc/integer/gentype.inc> -#undef __CLC_FUNCTION -#undef __CLC_BODY + +#undef FUNCTION diff --git a/libclc/generic/include/clc/integer/rhadd.h b/libclc/generic/include/clc/integer/rhadd.h index 69b43faeebd246..8ea537a932ef1a 100644 --- a/libclc/generic/include/clc/integer/rhadd.h +++ b/libclc/generic/include/clc/integer/rhadd.h @@ -1,2 +1,6 @@ -#define __CLC_BODY <clc/integer/rhadd.inc> +#define FUNCTION rhadd +#define __CLC_BODY "binary_decl.h" + #include <clc/integer/gentype.inc> + +#undef FUNCTION diff --git a/libclc/generic/include/clc/integer/rhadd.inc b/libclc/generic/include/clc/integer/rhadd.inc deleted file mode 100644 index 88ccaf09fd5ef8..00000000000000 --- a/libclc/generic/include/clc/integer/rhadd.inc +++ /dev/null @@ -1 +0,0 @@ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y); diff --git a/libclc/generic/include/clc/integer/upsample.h b/libclc/generic/include/clc/integer/upsample.h index 0b36b692a2c8d3..37038f6ad90344 100644 --- a/libclc/generic/include/clc/integer/upsample.h +++ b/libclc/generic/include/clc/integer/upsample.h @@ -1,25 +1,24 @@ -#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ - _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo); +#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ + _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo); -#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \ - __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \ - __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) \ +#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \ + __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \ + __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) -#define __CLC_UPSAMPLE_TYPES() \ - __CLC_UPSAMPLE_VEC(short, char, uchar) \ - __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \ - __CLC_UPSAMPLE_VEC(int, short, ushort) \ - __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \ - __CLC_UPSAMPLE_VEC(long, int, uint) \ - __CLC_UPSAMPLE_VEC(ulong, uint, uint) \ +#define __CLC_UPSAMPLE_TYPES() \ + __CLC_UPSAMPLE_VEC(short, char, uchar) \ + __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \ + __CLC_UPSAMPLE_VEC(int, short, ushort) \ + __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \ + __CLC_UPSAMPLE_VEC(long, int, uint) \ + __CLC_UPSAMPLE_VEC(ulong, uint, uint) __CLC_UPSAMPLE_TYPES() #undef __CLC_UPSAMPLE_TYPES #undef __CLC_UPSAMPLE_DECL #undef __CLC_UPSAMPLE_VEC - diff --git a/libclc/generic/include/integer/popcount.h b/libclc/generic/include/integer/popcount.h deleted file mode 100644 index 00c753753bb4e0..00000000000000 --- a/libclc/generic/include/integer/popcount.h +++ /dev/null @@ -1,3 +0,0 @@ -#define __CLC_FUNCTION __clc_native_popcount -#define __CLC_INTRINSIC "llvm.ctpop" -#include <integer/unary_intrin.inc> diff --git a/libclc/generic/include/integer/unary_intrin.inc b/libclc/generic/include/integer/unary_intrin.inc deleted file mode 100644 index ee9862a4c5b3a6..00000000000000 --- a/libclc/generic/include/integer/unary_intrin.inc +++ /dev/null @@ -1,20 +0,0 @@ -#define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \ -_CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm(__CLC_INTRINSIC ".i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) __asm(__CLC_INTRINSIC ".v2i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) __asm(__CLC_INTRINSIC ".v3i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) __asm(__CLC_INTRINSIC ".v4i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) __asm(__CLC_INTRINSIC ".v8i" BIT_SIZE); \ -_CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) __asm(__CLC_INTRINSIC ".v16i" BIT_SIZE); - -__CLC_INTRINSIC_DEF(char, "8") -__CLC_INTRINSIC_DEF(uchar, "8") -__CLC_INTRINSIC_DEF(short, "16") -__CLC_INTRINSIC_DEF(ushort, "16") -__CLC_INTRINSIC_DEF(int, "32") -__CLC_INTRINSIC_DEF(uint, "32") -__CLC_INTRINSIC_DEF(long, "64") -__CLC_INTRINSIC_DEF(ulong, "64") - -#undef __CLC_FUNCTION -#undef __CLC_INTRINSIC -#undef __CLC_INTRINSIC_DEF diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 579e909e53d462..b862e6aa54b996 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -68,6 +68,7 @@ integer/add_sat.cl integer/clz.cl integer/hadd.cl integer/mad24.cl +integer/mad_hi.cl integer/mad_sat.cl integer/mul24.cl integer/mul_hi.cl diff --git a/libclc/generic/lib/integer/binary_def.inc b/libclc/generic/lib/integer/binary_def.inc new file mode 100644 index 00000000000000..0f14a8c5c35f41 --- /dev/null +++ b/libclc/generic/lib/integer/binary_def.inc @@ -0,0 +1,8 @@ +#include <clc/utils.h> + +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a, + __CLC_GENTYPE b) { + return __CLC_FUNCTION(FUNCTION)(a, b); +} diff --git a/libclc/generic/lib/integer/clz.cl b/libclc/generic/lib/integer/clz.cl index 904d027d376134..bbbe8b4c642d80 100644 --- a/libclc/generic/lib/integer/clz.cl +++ b/libclc/generic/lib/integer/clz.cl @@ -1,43 +1,7 @@ #include <clc/clc.h> -#include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> -_CLC_OVERLOAD _CLC_DEF char clz(char x) { - return clz((ushort)(uchar)x) - 8; -} +#define FUNCTION clz +#define __CLC_BODY "unary_def.inc" -_CLC_OVERLOAD _CLC_DEF uchar clz(uchar x) { - return clz((ushort)x) - 8; -} - -_CLC_OVERLOAD _CLC_DEF short clz(short x) { - return x ? __builtin_clzs(x) : 16; -} - -_CLC_OVERLOAD _CLC_DEF ushort clz(ushort x) { - return x ? __builtin_clzs(x) : 16; -} - -_CLC_OVERLOAD _CLC_DEF int clz(int x) { - return x ? __builtin_clz(x) : 32; -} - -_CLC_OVERLOAD _CLC_DEF uint clz(uint x) { - return x ? __builtin_clz(x) : 32; -} - -_CLC_OVERLOAD _CLC_DEF long clz(long x) { - return x ? __builtin_clzl(x) : 64; -} - -_CLC_OVERLOAD _CLC_DEF ulong clz(ulong x) { - return x ? __builtin_clzl(x) : 64; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, clz, char) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, clz, uchar) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, clz, short) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, clz, ushort) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, clz, int) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, clz, uint) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, clz, long) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, clz, ulong) +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/hadd.cl b/libclc/generic/lib/integer/hadd.cl index 749026e5a8ad81..9fd53422b76cc3 100644 --- a/libclc/generic/lib/integer/hadd.cl +++ b/libclc/generic/lib/integer/hadd.cl @@ -1,4 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_hadd.h> + +#define FUNCTION hadd +#define __CLC_BODY "binary_def.inc" -#define __CLC_BODY <hadd.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/hadd.inc b/libclc/generic/lib/integer/hadd.inc deleted file mode 100644 index ea59d9bd7db5f8..00000000000000 --- a/libclc/generic/lib/integer/hadd.inc +++ /dev/null @@ -1,6 +0,0 @@ -//hadd = (x+y)>>1 -//This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit set) -//This saves us having to do any checks for overflow in the addition sum -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) { - return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1); -} diff --git a/libclc/generic/lib/integer/mad24.cl b/libclc/generic/lib/integer/mad24.cl index e29e99f28b56fc..db49c4aa7f10b0 100644 --- a/libclc/generic/lib/integer/mad24.cl +++ b/libclc/generic/lib/integer/mad24.cl @@ -1,4 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_mad24.h> -#define __CLC_BODY <mad24.inc> -#include <clc/integer/integer-gentype.inc> +#define FUNCTION mad24 +#define __CLC_BODY "ternary_def.inc" + +#include <clc/integer/gentype24.inc> diff --git a/libclc/generic/lib/integer/mad24.inc b/libclc/generic/lib/integer/mad24.inc deleted file mode 100644 index 902b0aafe4c874..00000000000000 --- a/libclc/generic/lib/integer/mad24.inc +++ /dev/null @@ -1,3 +0,0 @@ -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z){ - return mul24(x, y) + z; -} diff --git a/libclc/generic/lib/integer/mad_hi.cl b/libclc/generic/lib/integer/mad_hi.cl new file mode 100644 index 00000000000000..21a084cc2fb31d --- /dev/null +++ b/libclc/generic/lib/integer/mad_hi.cl @@ -0,0 +1,7 @@ +#include <clc/clc.h> +#include <clc/integer/clc_mad_hi.h> + +#define FUNCTION mad_hi +#define __CLC_BODY "ternary_def.inc" + +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/mul24.cl b/libclc/generic/lib/integer/mul24.cl index 8aedca64b85905..b4a5eca148f2b2 100644 --- a/libclc/generic/lib/integer/mul24.cl +++ b/libclc/generic/lib/integer/mul24.cl @@ -1,4 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_mul24.h> -#define __CLC_BODY <mul24.inc> -#include <clc/integer/integer-gentype.inc> +#define FUNCTION mul24 +#define __CLC_BODY "binary_def.inc" + +#include <clc/integer/gentype24.inc> diff --git a/libclc/generic/lib/integer/mul_hi.cl b/libclc/generic/lib/integer/mul_hi.cl index 174d893afb14f9..249e7a713f67f5 100644 --- a/libclc/generic/lib/integer/mul_hi.cl +++ b/libclc/generic/lib/integer/mul_hi.cl @@ -1,109 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_mul_hi.h> -//For all types EXCEPT long, which is implemented separately -#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \ - _CLC_OVERLOAD _CLC_DEF GENTYPE mul_hi(GENTYPE x, GENTYPE y){ \ - return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \ - } \ +#define FUNCTION mul_hi +#define __CLC_BODY "binary_def.inc" -//FOIL-based long mul_hi -// -// Summary: Treat mul_hi(long x, long y) as: -// (a+b) * (c+d) where a and c are the high-order parts of x and y respectively -// and b and d are the low-order parts of x and y. -// Thinking back to algebra, we use FOIL to do the work. - -_CLC_OVERLOAD _CLC_DEF long mul_hi(long x, long y){ - long f, o, i; - ulong l; - - //Move the high/low halves of x/y into the lower 32-bits of variables so - //that we can multiply them without worrying about overflow. - long x_hi = x >> 32; - long x_lo = x & UINT_MAX; - long y_hi = y >> 32; - long y_lo = y & UINT_MAX; - - //Multiply all of the components according to FOIL method - f = x_hi * y_hi; - o = x_hi * y_lo; - i = x_lo * y_hi; - l = x_lo * y_lo; - - //Now add the components back together in the following steps: - //F: doesn't need to be modified - //O/I: Need to be added together. - //L: Shift right by 32-bits, then add into the sum of O and I - //Once O/I/L are summed up, then shift the sum by 32-bits and add to F. - // - //We use hadd to give us a bit of extra precision for the intermediate sums - //but as a result, we shift by 31 bits instead of 32 - return (long)(f + (hadd(o, (i + (long)((ulong)l>>32))) >> 31)); -} - -_CLC_OVERLOAD _CLC_DEF ulong mul_hi(ulong x, ulong y){ - ulong f, o, i; - ulong l; - - //Move the high/low halves of x/y into the lower 32-bits of variables so - //that we can multiply them without worrying about overflow. - ulong x_hi = x >> 32; - ulong x_lo = x & UINT_MAX; - ulong y_hi = y >> 32; - ulong y_lo = y & UINT_MAX; - - //Multiply all of the components according to FOIL method - f = x_hi * y_hi; - o = x_hi * y_lo; - i = x_lo * y_hi; - l = x_lo * y_lo; - - //Now add the components back together, taking care to respect the fact that: - //F: doesn't need to be modified - //O/I: Need to be added together. - //L: Shift right by 32-bits, then add into the sum of O and I - //Once O/I/L are summed up, then shift the sum by 32-bits and add to F. - // - //We use hadd to give us a bit of extra precision for the intermediate sums - //but as a result, we shift by 31 bits instead of 32 - return (f + (hadd(o, (i + (l>>32))) >> 31)); -} - -#define __CLC_MUL_HI_VEC(GENTYPE) \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##2 mul_hi(GENTYPE##2 x, GENTYPE##2 y){ \ - return (GENTYPE##2){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##3 mul_hi(GENTYPE##3 x, GENTYPE##3 y){ \ - return (GENTYPE##3){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1), mul_hi(x.s2, y.s2)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##4 mul_hi(GENTYPE##4 x, GENTYPE##4 y){ \ - return (GENTYPE##4){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##8 mul_hi(GENTYPE##8 x, GENTYPE##8 y){ \ - return (GENTYPE##8){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF GENTYPE##16 mul_hi(GENTYPE##16 x, GENTYPE##16 y){ \ - return (GENTYPE##16){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \ - } \ - -#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \ - __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \ - __CLC_MUL_HI_VEC(TYPE) - -#define __CLC_MUL_HI_TYPES() \ - __CLC_MUL_HI_DEC_IMPL(short, char, 8) \ - __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \ - __CLC_MUL_HI_DEC_IMPL(int, short, 16) \ - __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \ - __CLC_MUL_HI_DEC_IMPL(long, int, 32) \ - __CLC_MUL_HI_DEC_IMPL(ulong, uint, 32) \ - __CLC_MUL_HI_VEC(long) \ - __CLC_MUL_HI_VEC(ulong) - -__CLC_MUL_HI_TYPES() - -#undef __CLC_MUL_HI_TYPES -#undef __CLC_MUL_HI_DEC_IMPL -#undef __CLC_MUL_HI_IMPL -#undef __CLC_MUL_HI_VEC -#undef __CLC_B32 +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/popcount.cl b/libclc/generic/lib/integer/popcount.cl index ca83b1afaf9dab..f646e838351767 100644 --- a/libclc/generic/lib/integer/popcount.cl +++ b/libclc/generic/lib/integer/popcount.cl @@ -1,8 +1,7 @@ #include <clc/clc.h> -#include <integer/popcount.h> +#include <clc/integer/clc_popcount.h> -#define __CLC_FUNC popcount -#define __CLC_IMPL_FUNC __clc_native_popcount +#define FUNCTION popcount +#define __CLC_BODY "unary_def.inc" -#define __CLC_BODY "../clc_unary.inc" #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/rhadd.cl b/libclc/generic/lib/integer/rhadd.cl index c985870f7c7a24..a919bd33f4a6cf 100644 --- a/libclc/generic/lib/integer/rhadd.cl +++ b/libclc/generic/lib/integer/rhadd.cl @@ -1,4 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_rhadd.h> + +#define FUNCTION rhadd +#define __CLC_BODY "binary_def.inc" -#define __CLC_BODY <rhadd.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/rhadd.inc b/libclc/generic/lib/integer/rhadd.inc deleted file mode 100644 index 3d6076874808e6..00000000000000 --- a/libclc/generic/lib/integer/rhadd.inc +++ /dev/null @@ -1,6 +0,0 @@ -//rhadd = (x+y+1)>>1 -//This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit set) -//This saves us having to do any checks for overflow in the addition sums -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y) { - return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+((x&(__CLC_GENTYPE)1)|(y&(__CLC_GENTYPE)1)); -} diff --git a/libclc/generic/lib/integer/ternary_def.inc b/libclc/generic/lib/integer/ternary_def.inc new file mode 100644 index 00000000000000..2c43b486685b8f --- /dev/null +++ b/libclc/generic/lib/integer/ternary_def.inc @@ -0,0 +1,8 @@ +#include <clc/utils.h> + +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b, + __CLC_GENTYPE c) { + return __CLC_FUNCTION(FUNCTION)(a, b, c); +} diff --git a/libclc/generic/lib/integer/unary_def.inc b/libclc/generic/lib/integer/unary_def.inc new file mode 100644 index 00000000000000..762f85eedead1b --- /dev/null +++ b/libclc/generic/lib/integer/unary_def.inc @@ -0,0 +1,7 @@ +#include <clc/utils.h> + +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a) { + return __CLC_FUNCTION(FUNCTION)(a); +} diff --git a/libclc/generic/lib/integer/upsample.cl b/libclc/generic/lib/integer/upsample.cl index da77315f8f9344..984a731e3b4d12 100644 --- a/libclc/generic/lib/integer/upsample.cl +++ b/libclc/generic/lib/integer/upsample.cl @@ -1,32 +1,34 @@ #include <clc/clc.h> +#include <clc/integer/clc_upsample.h> -#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE upsample(GENTYPE hi, UGENTYPE lo){ \ - return ((BGENTYPE)hi << GENSIZE) | lo; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##2 upsample(GENTYPE##2 hi, UGENTYPE##2 lo){ \ - return (BGENTYPE##2){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##3 upsample(GENTYPE##3 hi, UGENTYPE##3 lo){ \ - return (BGENTYPE##3){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1), upsample(hi.s2, lo.s2)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##4 upsample(GENTYPE##4 hi, UGENTYPE##4 lo){ \ - return (BGENTYPE##4){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##8 upsample(GENTYPE##8 hi, UGENTYPE##8 lo){ \ - return (BGENTYPE##8){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \ - } \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE##16 upsample(GENTYPE##16 hi, UGENTYPE##16 lo){ \ - return (BGENTYPE##16){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \ - } \ +#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE) \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE upsample(GENTYPE hi, UGENTYPE lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##2 upsample(GENTYPE##2 hi, UGENTYPE##2 lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##3 upsample(GENTYPE##3 hi, UGENTYPE##3 lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##4 upsample(GENTYPE##4 hi, UGENTYPE##4 lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##8 upsample(GENTYPE##8 hi, UGENTYPE##8 lo) { \ + return __clc_upsample(hi, lo); \ + } \ + _CLC_OVERLOAD _CLC_DEF BGENTYPE##16 upsample(GENTYPE##16 hi, \ + UGENTYPE##16 lo) { \ + return __clc_upsample(hi, lo); \ + } -#define __CLC_UPSAMPLE_TYPES() \ - __CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \ - __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \ - __CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \ - __CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \ - __CLC_UPSAMPLE_IMPL(long, int, uint, 32) \ - __CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32) \ +#define __CLC_UPSAMPLE_TYPES() \ + __CLC_UPSAMPLE_IMPL(short, char, uchar) \ + __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar) \ + __CLC_UPSAMPLE_IMPL(int, short, ushort) \ + __CLC_UPSAMPLE_IMPL(uint, ushort, ushort) \ + __CLC_UPSAMPLE_IMPL(long, int, uint) \ + __CLC_UPSAMPLE_IMPL(ulong, uint, uint) __CLC_UPSAMPLE_TYPES() diff --git a/libclc/generic/lib/math/clc_fma.cl b/libclc/generic/lib/math/clc_fma.cl index 15de4c8032a932..ed23b3eba26a58 100644 --- a/libclc/generic/lib/math/clc_fma.cl +++ b/libclc/generic/lib/math/clc_fma.cl @@ -23,6 +23,7 @@ #include <clc/clc.h> #include <clc/clcmacro.h> #include <clc/integer/clc_abs.h> +#include <clc/integer/clc_clz.h> #include <clc/relational/clc_isinf.h> #include <clc/relational/clc_isnan.h> #include <clc/shared/clc_max.h> @@ -119,7 +120,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) { } // detect overflow/underflow - int overflow_bits = 3 - clz(st_fma.mantissa); + int overflow_bits = 3 - __clc_clz(st_fma.mantissa); // adjust exponent st_fma.exponent += overflow_bits; diff --git a/libclc/generic/lib/math/clc_fmod.cl b/libclc/generic/lib/math/clc_fmod.cl index 5d101373178dd1..efe3422c475265 100644 --- a/libclc/generic/lib/math/clc_fmod.cl +++ b/libclc/generic/lib/math/clc_fmod.cl @@ -22,6 +22,7 @@ #include <clc/clc.h> #include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> #include <clc/math/clc_floor.h> #include <clc/math/clc_trunc.h> #include <clc/shared/clc_max.h> @@ -88,14 +89,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) ulong xsgn = ux ^ ax; double dx = as_double(ax); int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); - int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64); + int xexp1 = 11 - (int) __clc_clz(ax & MANTBITS_DP64); xexp1 = xexp < 1 ? xexp1 : xexp; ulong uy = as_ulong(y); ulong ay = uy & ~SIGNBIT_DP64; double dy = as_double(ay); int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); - int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64); + int yexp1 = 11 - (int) __clc_clz(ay & MANTBITS_DP64); yexp1 = yexp < 1 ? yexp1 : yexp; // First assume |x| > |y| diff --git a/libclc/generic/lib/math/clc_remainder.cl b/libclc/generic/lib/math/clc_remainder.cl index 8a0ce8816fcb38..e88a2ff91e9896 100644 --- a/libclc/generic/lib/math/clc_remainder.cl +++ b/libclc/generic/lib/math/clc_remainder.cl @@ -22,6 +22,7 @@ #include <clc/clc.h> #include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> #include <clc/math/clc_floor.h> #include <clc/math/clc_trunc.h> #include <clc/shared/clc_max.h> @@ -96,14 +97,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) ulong xsgn = ux ^ ax; double dx = as_double(ax); int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); - int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64); + int xexp1 = 11 - (int) __clc_clz(ax & MANTBITS_DP64); xexp1 = xexp < 1 ? xexp1 : xexp; ulong uy = as_ulong(y); ulong ay = uy & ~SIGNBIT_DP64; double dy = as_double(ay); int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); - int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64); + int yexp1 = 11 - (int) __clc_clz(ay & MANTBITS_DP64); yexp1 = yexp < 1 ? yexp1 : yexp; int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1; diff --git a/libclc/generic/lib/math/clc_remquo.cl b/libclc/generic/lib/math/clc_remquo.cl index 8d2e5f9a74bfef..f0f69e9e192259 100644 --- a/libclc/generic/lib/math/clc_remquo.cl +++ b/libclc/generic/lib/math/clc_remquo.cl @@ -22,6 +22,7 @@ #include <clc/clc.h> #include <clc/clcmacro.h> +#include <clc/integer/clc_clz.h> #include <clc/math/clc_floor.h> #include <clc/math/clc_trunc.h> #include <clc/shared/clc_max.h> @@ -135,14 +136,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, ulong xsgn = ux ^ ax; double dx = as_double(ax); int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); - int xexp1 = 11 - (int)clz(ax & MANTBITS_DP64); + int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64); xexp1 = xexp < 1 ? xexp1 : xexp; ulong uy = as_ulong(y); ulong ay = uy & ~SIGNBIT_DP64; double dy = as_double(ay); int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); - int yexp1 = 11 - (int)clz(ay & MANTBITS_DP64); + int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64); yexp1 = yexp < 1 ? yexp1 : yexp; int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1; diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl index 0adecf6978bcab..c9e04e8b00e79f 100644 --- a/libclc/generic/lib/math/sincos_helpers.cl +++ b/libclc/generic/lib/math/sincos_helpers.cl @@ -21,6 +21,8 @@ */ #include <clc/clc.h> +#include <clc/integer/clc_clz.h> +#include <clc/integer/clc_mul_hi.h> #include <clc/shared/clc_max.h> #include "math.h" @@ -169,14 +171,14 @@ _CLC_DEF int __clc_argReductionSmallS(float *r, float *rr, float x) return (int)fnpi2 & 0x3; } -#define FULL_MUL(A, B, HI, LO) \ - LO = A * B; \ - HI = mul_hi(A, B) +#define FULL_MUL(A, B, HI, LO) \ + LO = A * B; \ + HI = __clc_mul_hi(A, B) -#define FULL_MAD(A, B, C, HI, LO) \ - LO = ((A) * (B) + (C)); \ - HI = mul_hi(A, B); \ - HI += LO < C +#define FULL_MAD(A, B, C, HI, LO) \ + LO = ((A) * (B) + (C)); \ + HI = __clc_mul_hi(A, B); \ + HI += LO < C _CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, float x) { @@ -269,7 +271,7 @@ _CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, float x) p5 = p5 ^ flip; // Find exponent and shift away leading zeroes and hidden bit - xe = clz(p7) + 1; + xe = __clc_clz(p7) + 1; shift = 32 - xe; p7 = bitalign(p7, p6, shift); p6 = bitalign(p6, p5, shift); @@ -281,7 +283,7 @@ _CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, float x) p7 = bitalign(p7, p6, 32-23); // Get 24 more bits of fraction in another float, there are not long strings of zeroes here - int xxe = clz(p7) + 1; + int xxe = __clc_clz(p7) + 1; p7 = bitalign(p7, p6, 32-xxe); float q0 = as_float(sign | ((127 - (xe + 23 + xxe)) << 23) | (p7 >> 9)); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits