[libclc] [libclc] Move several integer functions to CLC library (PR #116786)

Fraser Cormack via cfe-commits Wed, 29 Jan 2025 03:53:36 -0800

https://github.com/frasercrmck updated 
https://github.com/llvm/llvm-project/pull/116786


>From 00f8980411e74a3071e6efbf553f78363ac92bb2 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Tue, 17 Dec 2024 16:52:51 +0000
Subject: [PATCH] [libclc] Move several integer functions to CLC library

This commit moves over the OpenCL clz, hadd, mad24, mad_hi, mul24,
mul_hi, popcount, rhadd, and upsample builtins to the CLC library. There
are no changes to any target's CLC libraries.

The OpenCL mad_hi builtin wasn't previously publicly available from the
CLC libraries, as it was hash-defined to mul_hi in the header files.
That issue has been fixed, and mad_hi is now exposed.

The custom AMD implementation/workaround for popcount has been removed
as it was only valid for clang < 7.

There are still two integer functions which haven't been moved over. The
OpenCL add_sat, sub_sat, and mad_sat builtins require saturating
conversion builtins which haven't yet been ported.
---
 libclc/amdgcn/lib/SOURCES                     |   1 -
 libclc/amdgcn/lib/integer/popcount.cl         |   6 -
 libclc/amdgcn/lib/integer/popcount.inc        |  17 ---
 libclc/clc/include/clc/integer/clc_clz.h      |  12 ++
 libclc/clc/include/clc/integer/clc_hadd.h     |  12 ++
 libclc/clc/include/clc/integer/clc_mad24.h    |  12 ++
 libclc/clc/include/clc/integer/clc_mad_hi.h   |   8 +
 libclc/clc/include/clc/integer/clc_mul24.h    |  12 ++
 libclc/clc/include/clc/integer/clc_mul_hi.h   |  12 ++
 libclc/clc/include/clc/integer/clc_popcount.h |  12 ++
 libclc/clc/include/clc/integer/clc_rhadd.h    |  12 ++
 libclc/clc/include/clc/integer/clc_upsample.h |  32 ++++
 .../include/clc/integer/definitions.h         |   7 +-
 libclc/clc/include/clc/integer/gentype24.inc  | 137 ++++++++++++++++++
 libclc/clc/include/clc/math/clc_mad.h         |   2 +-
 .../clc/{math => shared}/ternary_decl.inc     |   0
 libclc/clc/include/clc/shared/ternary_def.inc |  10 ++
 libclc/clc/include/clc/shared/unary_decl.inc  |   1 +
 libclc/clc/include/clc/shared/unary_def.inc   |   9 ++
 libclc/clc/lib/clspv/SOURCES                  |   8 +
 libclc/clc/lib/generic/SOURCES                |   8 +
 libclc/clc/lib/generic/integer/clc_clz.cl     |  44 ++++++
 libclc/clc/lib/generic/integer/clc_hadd.cl    |   4 +
 libclc/clc/lib/generic/integer/clc_hadd.inc   |   8 +
 libclc/clc/lib/generic/integer/clc_mad24.cl   |   5 +
 libclc/clc/lib/generic/integer/clc_mad24.inc  |   5 +
 libclc/clc/lib/generic/integer/clc_mul24.cl   |   4 +
 .../lib/generic/integer/clc_mul24.inc}        |   4 +-
 libclc/clc/lib/generic/integer/clc_mul_hi.cl  | 132 +++++++++++++++++
 .../clc/lib/generic/integer/clc_popcount.cl   |   7 +
 libclc/clc/lib/generic/integer/clc_rhadd.cl   |   4 +
 libclc/clc/lib/generic/integer/clc_rhadd.inc  |   8 +
 .../clc/lib/generic/integer/clc_upsample.cl   |  35 +++++
 libclc/clc/lib/spirv/SOURCES                  |   8 +
 libclc/clc/lib/spirv64/SOURCES                |   8 +
 libclc/generic/include/clc/integer/clz.h      |   7 +-
 libclc/generic/include/clc/integer/clz.inc    |   1 -
 libclc/generic/include/clc/integer/hadd.h     |   7 +-
 libclc/generic/include/clc/integer/hadd.inc   |   1 -
 libclc/generic/include/clc/integer/mad24.h    |   9 +-
 libclc/generic/include/clc/integer/mad24.inc  |   1 -
 libclc/generic/include/clc/integer/mad_hi.h   |   8 +-
 libclc/generic/include/clc/integer/mul24.h    |   8 +-
 libclc/generic/include/clc/integer/mul24.inc  |   1 -
 libclc/generic/include/clc/integer/mul_hi.h   |   7 +-
 libclc/generic/include/clc/integer/mul_hi.inc |   1 -
 libclc/generic/include/clc/integer/popcount.h |   6 +-
 libclc/generic/include/clc/integer/rhadd.h    |   7 +-
 libclc/generic/include/clc/integer/rhadd.inc  |   1 -
 libclc/generic/include/clc/integer/upsample.h |  33 ++---
 libclc/generic/include/clc/math/fma.h         |   2 +-
 libclc/generic/include/clc/math/mad.h         |   2 +-
 libclc/generic/include/integer/popcount.h     |   3 -
 .../generic/include/integer/unary_intrin.inc  |  20 ---
 libclc/generic/include/math/clc_fma.h         |   2 +-
 libclc/generic/lib/SOURCES                    |   1 +
 libclc/generic/lib/integer/clz.cl             |  44 +-----
 libclc/generic/lib/integer/hadd.cl            |   5 +-
 libclc/generic/lib/integer/hadd.inc           |   6 -
 libclc/generic/lib/integer/mad24.cl           |   7 +-
 libclc/generic/lib/integer/mad24.inc          |   3 -
 libclc/generic/lib/integer/mad_hi.cl          |   7 +
 libclc/generic/lib/integer/mul24.cl           |   7 +-
 libclc/generic/lib/integer/mul_hi.cl          | 110 +-------------
 libclc/generic/lib/integer/popcount.cl        |   7 +-
 libclc/generic/lib/integer/rhadd.cl           |   5 +-
 libclc/generic/lib/integer/rhadd.inc          |   6 -
 libclc/generic/lib/integer/upsample.cl        |  54 +++----
 libclc/generic/lib/math/clc_fma.cl            |   3 +-
 libclc/generic/lib/math/clc_fmod.cl           |   5 +-
 libclc/generic/lib/math/clc_remainder.cl      |   5 +-
 libclc/generic/lib/math/clc_remquo.cl         |   5 +-
 libclc/generic/lib/math/sincos_helpers.cl     |  10 +-
 73 files changed, 726 insertions(+), 297 deletions(-)
 delete mode 100644 libclc/amdgcn/lib/integer/popcount.cl
 delete mode 100644 libclc/amdgcn/lib/integer/popcount.inc
 create mode 100644 libclc/clc/include/clc/integer/clc_clz.h
 create mode 100644 libclc/clc/include/clc/integer/clc_hadd.h
 create mode 100644 libclc/clc/include/clc/integer/clc_mad24.h
 create mode 100644 libclc/clc/include/clc/integer/clc_mad_hi.h
 create mode 100644 libclc/clc/include/clc/integer/clc_mul24.h
 create mode 100644 libclc/clc/include/clc/integer/clc_mul_hi.h
 create mode 100644 libclc/clc/include/clc/integer/clc_popcount.h
 create mode 100644 libclc/clc/include/clc/integer/clc_rhadd.h
 create mode 100644 libclc/clc/include/clc/integer/clc_upsample.h
 rename libclc/{generic => clc}/include/clc/integer/definitions.h (71%)
 create mode 100644 libclc/clc/include/clc/integer/gentype24.inc
 rename libclc/clc/include/clc/{math => shared}/ternary_decl.inc (100%)
 create mode 100644 libclc/clc/include/clc/shared/ternary_def.inc
 create mode 100644 libclc/clc/include/clc/shared/unary_decl.inc
 create mode 100644 libclc/clc/include/clc/shared/unary_def.inc
 create mode 100644 libclc/clc/lib/generic/integer/clc_clz.cl
 create mode 100644 libclc/clc/lib/generic/integer/clc_hadd.cl
 create mode 100644 libclc/clc/lib/generic/integer/clc_hadd.inc
 create mode 100644 libclc/clc/lib/generic/integer/clc_mad24.cl
 create mode 100644 libclc/clc/lib/generic/integer/clc_mad24.inc
 create mode 100644 libclc/clc/lib/generic/integer/clc_mul24.cl
 rename libclc/{generic/lib/integer/mul24.inc => 
clc/lib/generic/integer/clc_mul24.inc} (68%)
 create mode 100644 libclc/clc/lib/generic/integer/clc_mul_hi.cl
 create mode 100644 libclc/clc/lib/generic/integer/clc_popcount.cl
 create mode 100644 libclc/clc/lib/generic/integer/clc_rhadd.cl
 create mode 100644 libclc/clc/lib/generic/integer/clc_rhadd.inc
 create mode 100644 libclc/clc/lib/generic/integer/clc_upsample.cl
 delete mode 100644 libclc/generic/include/clc/integer/clz.inc
 delete mode 100644 libclc/generic/include/clc/integer/hadd.inc
 delete mode 100644 libclc/generic/include/clc/integer/mad24.inc
 delete mode 100644 libclc/generic/include/clc/integer/mul24.inc
 delete mode 100644 libclc/generic/include/clc/integer/mul_hi.inc
 delete mode 100644 libclc/generic/include/clc/integer/rhadd.inc
 delete mode 100644 libclc/generic/include/integer/popcount.h
 delete mode 100644 libclc/generic/include/integer/unary_intrin.inc
 delete mode 100644 libclc/generic/lib/integer/hadd.inc
 delete mode 100644 libclc/generic/lib/integer/mad24.inc
 create mode 100644 libclc/generic/lib/integer/mad_hi.cl
 delete mode 100644 libclc/generic/lib/integer/rhadd.inc

diff --git a/libclc/amdgcn/lib/SOURCES b/libclc/amdgcn/lib/SOURCES
index b235457f9ab7c3..4ea66385fe50ee 100644
--- a/libclc/amdgcn/lib/SOURCES
+++ b/libclc/amdgcn/lib/SOURCES
@@ -1,5 +1,4 @@
 cl_khr_int64_extended_atomics/minmax_helpers.ll
-integer/popcount.cl
 math/fmax.cl
 math/fmin.cl
 math/ldexp.cl
diff --git a/libclc/amdgcn/lib/integer/popcount.cl 
b/libclc/amdgcn/lib/integer/popcount.cl
deleted file mode 100644
index 3b493fbd146f01..00000000000000
--- a/libclc/amdgcn/lib/integer/popcount.cl
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <clc/clc.h>
-#include <clc/utils.h>
-#include <integer/popcount.h>
-
-#define __CLC_BODY "popcount.inc"
-#include <clc/integer/gentype.inc>
diff --git a/libclc/amdgcn/lib/integer/popcount.inc 
b/libclc/amdgcn/lib/integer/popcount.inc
deleted file mode 100644
index 402ddb768c6a6f..00000000000000
--- a/libclc/amdgcn/lib/integer/popcount.inc
+++ /dev/null
@@ -1,17 +0,0 @@
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE popcount(__CLC_GENTYPE x) {
-/* LLVM-4+ implements i16 ops for VI+ ASICs. However, ctpop implementation
- * is missing until r326535. Therefore we have to convert sub i32 types to uint
- * as a workaround. */
-#if __clang_major__ < 7 && __clang_major__ > 3 && __CLC_GENSIZE < 32
-       /* Prevent sign extension on uint conversion */
-       const __CLC_U_GENTYPE y = __CLC_XCONCAT(as_, __CLC_U_GENTYPE)(x);
-       /* Convert to uintX */
-       const __CLC_XCONCAT(uint, __CLC_VECSIZE) z = 
__CLC_XCONCAT(convert_uint, __CLC_VECSIZE)(y);
-       /* Call popcount on uintX type */
-       const __CLC_XCONCAT(uint, __CLC_VECSIZE) res = __clc_native_popcount(z);
-       /* Convert the result back to gentype. */
-       return __CLC_XCONCAT(convert_, __CLC_GENTYPE)(res);
-#else
-       return __clc_native_popcount(x);
-#endif
-}
diff --git a/libclc/clc/include/clc/integer/clc_clz.h 
b/libclc/clc/include/clc/integer/clc_clz.h
new file mode 100644
index 00000000000000..a642391e46960c
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_clz.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_INTEGER_CLC_CLZ_H__
+#define __CLC_INTEGER_CLC_CLZ_H__
+
+#define __CLC_FUNCTION __clc_clz
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_INTEGER_CLC_CLZ_H__
diff --git a/libclc/clc/include/clc/integer/clc_hadd.h 
b/libclc/clc/include/clc/integer/clc_hadd.h
new file mode 100644
index 00000000000000..6693afbb8b8037
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_hadd.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_INTEGER_CLC_HADD_H__
+#define __CLC_INTEGER_CLC_HADD_H__
+
+#define __CLC_FUNCTION __clc_hadd
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_INTEGER_CLC_HADD_H__
diff --git a/libclc/clc/include/clc/integer/clc_mad24.h 
b/libclc/clc/include/clc/integer/clc_mad24.h
new file mode 100644
index 00000000000000..13d39d04d1a2a6
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_mad24.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_INTEGER_CLC_MAD24_H__
+#define __CLC_INTEGER_CLC_MAD24_H__
+
+#define __CLC_FUNCTION __clc_mad24
+#define __CLC_BODY <clc/shared/ternary_decl.inc>
+
+#include <clc/integer/gentype24.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_INTEGER_CLC_MAD24_H__
diff --git a/libclc/clc/include/clc/integer/clc_mad_hi.h 
b/libclc/clc/include/clc/integer/clc_mad_hi.h
new file mode 100644
index 00000000000000..24a590df6027a8
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_mad_hi.h
@@ -0,0 +1,8 @@
+#ifndef __CLC_INTEGER_CLC_MAD_HI_H__
+#define __CLC_INTEGER_CLC_MAD_HI_H__
+
+#include <clc/integer/clc_mul_hi.h>
+
+#define __clc_mad_hi(a, b, c) (__clc_mul_hi((a), (b)) + (c))
+
+#endif // __CLC_INTEGER_CLC_MAD_HI_H__
diff --git a/libclc/clc/include/clc/integer/clc_mul24.h 
b/libclc/clc/include/clc/integer/clc_mul24.h
new file mode 100644
index 00000000000000..acab4e9da9eaa7
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_mul24.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_INTEGER_CLC_MUL24_H__
+#define __CLC_INTEGER_CLC_MUL24_H__
+
+#define __CLC_FUNCTION __clc_mul24
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
+#include <clc/integer/gentype24.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_INTEGER_CLC_MUL24_H__
diff --git a/libclc/clc/include/clc/integer/clc_mul_hi.h 
b/libclc/clc/include/clc/integer/clc_mul_hi.h
new file mode 100644
index 00000000000000..e395794066f96a
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_mul_hi.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_INTEGER_CLC_MUL_HI_H__
+#define __CLC_INTEGER_CLC_MUL_HI_H__
+
+#define __CLC_FUNCTION __clc_mul_hi
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_INTEGER_CLC_MUL_HI_H__
diff --git a/libclc/clc/include/clc/integer/clc_popcount.h 
b/libclc/clc/include/clc/integer/clc_popcount.h
new file mode 100644
index 00000000000000..50f6d173c70b42
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_popcount.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_INTEGER_CLC_POPCOUNT_H__
+#define __CLC_INTEGER_CLC_POPCOUNT_H__
+
+#define __CLC_FUNCTION __clc_popcount
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif // __CLC_INTEGER_CLC_POPCOUNT_H__
diff --git a/libclc/clc/include/clc/integer/clc_rhadd.h 
b/libclc/clc/include/clc/integer/clc_rhadd.h
new file mode 100644
index 00000000000000..9f8d1f367ac053
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_rhadd.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_INTEGER_CLC_RHADD_H__
+#define __CLC_INTEGER_CLC_RHADD_H__
+
+#define __CLC_FUNCTION __clc_rhadd
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_INTEGER_CLC_RHADD_H__
diff --git a/libclc/clc/include/clc/integer/clc_upsample.h 
b/libclc/clc/include/clc/integer/clc_upsample.h
new file mode 100644
index 00000000000000..262e33af954889
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_upsample.h
@@ -0,0 +1,32 @@
+#ifndef __CLC_INTEGER_CLC_UPSAMPLE_H__
+#define __CLC_INTEGER_CLC_UPSAMPLE_H__
+
+#include <clc/clcfunc.h>
+#include <clc/clctypes.h>
+
+#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE)                       
\
+  _CLC_OVERLOAD _CLC_DECL BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo);
+
+#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE)                        
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE)                             
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2)                    
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3)                    
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4)                    
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8)                    
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16)
+
+#define __CLC_UPSAMPLE_TYPES()                                                 
\
+  __CLC_UPSAMPLE_VEC(short, char, uchar)                                       
\
+  __CLC_UPSAMPLE_VEC(ushort, uchar, uchar)                                     
\
+  __CLC_UPSAMPLE_VEC(int, short, ushort)                                       
\
+  __CLC_UPSAMPLE_VEC(uint, ushort, ushort)                                     
\
+  __CLC_UPSAMPLE_VEC(long, int, uint)                                          
\
+  __CLC_UPSAMPLE_VEC(ulong, uint, uint)
+
+__CLC_UPSAMPLE_TYPES()
+
+#undef __CLC_UPSAMPLE_TYPES
+#undef __CLC_UPSAMPLE_DECL
+#undef __CLC_UPSAMPLE_VEC
+
+#endif // __CLC_INTEGER_CLC_UPSAMPLE_H__
diff --git a/libclc/generic/include/clc/integer/definitions.h 
b/libclc/clc/include/clc/integer/definitions.h
similarity index 71%
rename from libclc/generic/include/clc/integer/definitions.h
rename to libclc/clc/include/clc/integer/definitions.h
index 0079c30123db80..18a9e54dec75c6 100644
--- a/libclc/generic/include/clc/integer/definitions.h
+++ b/libclc/clc/include/clc/integer/definitions.h
@@ -1,7 +1,10 @@
+#ifndef __CLC_INTEGER_DEFINITIONS_H__
+#define __CLC_INTEGER_DEFINITIONS_H__
+
 #define CHAR_BIT 8
 #define INT_MAX 2147483647
 #define INT_MIN (-2147483647 - 1)
-#define LONG_MAX  0x7fffffffffffffffL
+#define LONG_MAX 0x7fffffffffffffffL
 #define LONG_MIN (-0x7fffffffffffffffL - 1)
 #define CHAR_MAX SCHAR_MAX
 #define CHAR_MIN SCHAR_MIN
@@ -13,3 +16,5 @@
 #define USHRT_MAX 65535
 #define UINT_MAX 0xffffffff
 #define ULONG_MAX 0xffffffffffffffffUL
+
+#endif // __CLC_INTEGER_DEFINITIONS_H__
diff --git a/libclc/clc/include/clc/integer/gentype24.inc 
b/libclc/clc/include/clc/integer/gentype24.inc
new file mode 100644
index 00000000000000..ad34c3818b1d60
--- /dev/null
+++ b/libclc/clc/include/clc/integer/gentype24.inc
@@ -0,0 +1,137 @@
+#include <clc/clcfunc.h>
+#include <clc/clctypes.h>
+
+#define __CLC_GENSIZE 32
+#undef __CLC_SCALAR_GENTYPE
+#define __CLC_SCALAR_GENTYPE int
+
+#define __CLC_GENTYPE int
+#define __CLC_U_GENTYPE uint
+#define __CLC_S_GENTYPE int
+#define __CLC_SCALAR 1
+#define __CLC_VECSIZE
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int2
+#define __CLC_U_GENTYPE uint2
+#define __CLC_S_GENTYPE int2
+#define __CLC_VECSIZE 2
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int3
+#define __CLC_U_GENTYPE uint3
+#define __CLC_S_GENTYPE int3
+#define __CLC_VECSIZE 3
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int4
+#define __CLC_U_GENTYPE uint4
+#define __CLC_S_GENTYPE int4
+#define __CLC_VECSIZE 4
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int8
+#define __CLC_U_GENTYPE uint8
+#define __CLC_S_GENTYPE int8
+#define __CLC_VECSIZE 8
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE int16
+#define __CLC_U_GENTYPE uint16
+#define __CLC_S_GENTYPE int16
+#define __CLC_VECSIZE 16
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_SCALAR_GENTYPE
+#define __CLC_SCALAR_GENTYPE uint
+
+#define __CLC_GENTYPE uint
+#define __CLC_U_GENTYPE uint
+#define __CLC_S_GENTYPE int
+#define __CLC_SCALAR 1
+#define __CLC_VECSIZE
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_SCALAR
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint2
+#define __CLC_U_GENTYPE uint2
+#define __CLC_S_GENTYPE int2
+#define __CLC_VECSIZE 2
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint3
+#define __CLC_U_GENTYPE uint3
+#define __CLC_S_GENTYPE int3
+#define __CLC_VECSIZE 3
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint4
+#define __CLC_U_GENTYPE uint4
+#define __CLC_S_GENTYPE int4
+#define __CLC_VECSIZE 4
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint8
+#define __CLC_U_GENTYPE uint8
+#define __CLC_S_GENTYPE int8
+#define __CLC_VECSIZE 8
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#define __CLC_GENTYPE uint16
+#define __CLC_U_GENTYPE uint16
+#define __CLC_S_GENTYPE int16
+#define __CLC_VECSIZE 16
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_U_GENTYPE
+#undef __CLC_S_GENTYPE
+
+#undef __CLC_GENSIZE
+#undef __CLC_SCALAR_GENTYPE
+#undef __CLC_BODY
diff --git a/libclc/clc/include/clc/math/clc_mad.h 
b/libclc/clc/include/clc/math/clc_mad.h
index 3eb718e87f3705..5427c0215688e8 100644
--- a/libclc/clc/include/clc/math/clc_mad.h
+++ b/libclc/clc/include/clc/math/clc_mad.h
@@ -1,7 +1,7 @@
 #ifndef __CLC_MATH_CLC_MAD_H__
 #define __CLC_MATH_CLC_MAD_H__
 
-#define __CLC_BODY <clc/math/ternary_decl.inc>
+#define __CLC_BODY <clc/shared/ternary_decl.inc>
 #define __CLC_FUNCTION __clc_mad
 
 #include <clc/math/gentype.inc>
diff --git a/libclc/clc/include/clc/math/ternary_decl.inc 
b/libclc/clc/include/clc/shared/ternary_decl.inc
similarity index 100%
rename from libclc/clc/include/clc/math/ternary_decl.inc
rename to libclc/clc/include/clc/shared/ternary_decl.inc
diff --git a/libclc/clc/include/clc/shared/ternary_def.inc 
b/libclc/clc/include/clc/shared/ternary_def.inc
new file mode 100644
index 00000000000000..59528d825a1715
--- /dev/null
+++ b/libclc/clc/include/clc/shared/ternary_def.inc
@@ -0,0 +1,10 @@
+#include <clc/utils.h>
+
+#ifndef __CLC_FUNCTION
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b,
+                                              __CLC_GENTYPE c) {
+  return __CLC_FUNCTION(FUNCTION)(a, b, c);
+}
diff --git a/libclc/clc/include/clc/shared/unary_decl.inc 
b/libclc/clc/include/clc/shared/unary_decl.inc
new file mode 100644
index 00000000000000..9858d908da09fe
--- /dev/null
+++ b/libclc/clc/include/clc/shared/unary_decl.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x);
diff --git a/libclc/clc/include/clc/shared/unary_def.inc 
b/libclc/clc/include/clc/shared/unary_def.inc
new file mode 100644
index 00000000000000..e0f34ad817e8b0
--- /dev/null
+++ b/libclc/clc/include/clc/shared/unary_def.inc
@@ -0,0 +1,9 @@
+#include <clc/utils.h>
+
+#ifndef __CLC_FUNCTION
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a) {
+  return __CLC_FUNCTION(FUNCTION)(a);
+}
diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
index 6efa3c59b53e70..2581abe64f1443 100644
--- a/libclc/clc/lib/clspv/SOURCES
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -1,5 +1,13 @@
 ../generic/integer/clc_add_sat.cl
+../generic/integer/clc_clz.cl
+../generic/integer/clc_hadd.cl
+../generic/integer/clc_mad24.cl
+../generic/integer/clc_mul24.cl
+../generic/integer/clc_mul_hi.cl
+../generic/integer/clc_popcount.cl
+../generic/integer/clc_rhadd.cl
 ../generic/integer/clc_sub_sat.cl
+../generic/integer/clc_upsample.cl
 ../generic/math/clc_ceil.cl
 ../generic/math/clc_copysign.cl
 ../generic/math/clc_fabs.cl
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index 1ef6636be90b62..2f4df168f70745 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -5,7 +5,15 @@ geometric/clc_dot.cl
 integer/clc_abs.cl
 integer/clc_abs_diff.cl
 integer/clc_add_sat.cl
+integer/clc_clz.cl
+integer/clc_hadd.cl
+integer/clc_mad24.cl
+integer/clc_mul24.cl
+integer/clc_mul_hi.cl
+integer/clc_popcount.cl
+integer/clc_rhadd.cl
 integer/clc_sub_sat.cl
+integer/clc_upsample.cl
 math/clc_ceil.cl
 math/clc_copysign.cl
 math/clc_fabs.cl
diff --git a/libclc/clc/lib/generic/integer/clc_clz.cl 
b/libclc/clc/lib/generic/integer/clc_clz.cl
new file mode 100644
index 00000000000000..592b65f262bd6b
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_clz.cl
@@ -0,0 +1,44 @@
+#include <clc/clcmacro.h>
+#include <clc/integer/clc_clz.h>
+#include <clc/internal/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF char __clc_clz(char x) {
+  return __clc_clz((ushort)(uchar)x) - 8;
+}
+
+_CLC_OVERLOAD _CLC_DEF uchar __clc_clz(uchar x) {
+  return __clc_clz((ushort)x) - 8;
+}
+
+_CLC_OVERLOAD _CLC_DEF short __clc_clz(short x) {
+  return x ? __builtin_clzs(x) : 16;
+}
+
+_CLC_OVERLOAD _CLC_DEF ushort __clc_clz(ushort x) {
+  return x ? __builtin_clzs(x) : 16;
+}
+
+_CLC_OVERLOAD _CLC_DEF int __clc_clz(int x) {
+  return x ? __builtin_clz(x) : 32;
+}
+
+_CLC_OVERLOAD _CLC_DEF uint __clc_clz(uint x) {
+  return x ? __builtin_clz(x) : 32;
+}
+
+_CLC_OVERLOAD _CLC_DEF long __clc_clz(long x) {
+  return x ? __builtin_clzl(x) : 64;
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong __clc_clz(ulong x) {
+  return x ? __builtin_clzl(x) : 64;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __clc_clz, char)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, __clc_clz, uchar)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, __clc_clz, short)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, __clc_clz, ushort)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __clc_clz, int)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, __clc_clz, uint)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, __clc_clz, long)
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, __clc_clz, ulong)
diff --git a/libclc/clc/lib/generic/integer/clc_hadd.cl 
b/libclc/clc/lib/generic/integer/clc_hadd.cl
new file mode 100644
index 00000000000000..8e91d41a843aaa
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_hadd.cl
@@ -0,0 +1,4 @@
+#include <clc/internal/clc.h>
+
+#define __CLC_BODY <clc_hadd.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/clc/lib/generic/integer/clc_hadd.inc 
b/libclc/clc/lib/generic/integer/clc_hadd.inc
new file mode 100644
index 00000000000000..14d921599446b3
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_hadd.inc
@@ -0,0 +1,8 @@
+// hadd = (x+y)>>1
+// This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit
+// set) This saves us having to do any checks for overflow in the addition sum
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_hadd(__CLC_GENTYPE x,
+                                                __CLC_GENTYPE y) {
+  return (x >> (__CLC_GENTYPE)1) + (y >> (__CLC_GENTYPE)1) +
+         (x & y & (__CLC_GENTYPE)1);
+}
diff --git a/libclc/clc/lib/generic/integer/clc_mad24.cl 
b/libclc/clc/lib/generic/integer/clc_mad24.cl
new file mode 100644
index 00000000000000..86c319cff6d245
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_mad24.cl
@@ -0,0 +1,5 @@
+#include <clc/internal/clc.h>
+#include <clc/integer/clc_mul24.h>
+
+#define __CLC_BODY <clc_mad24.inc>
+#include <clc/integer/gentype24.inc>
diff --git a/libclc/clc/lib/generic/integer/clc_mad24.inc 
b/libclc/clc/lib/generic/integer/clc_mad24.inc
new file mode 100644
index 00000000000000..61c8587d4f86fc
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_mad24.inc
@@ -0,0 +1,5 @@
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_mad24(__CLC_GENTYPE x,
+                                                 __CLC_GENTYPE y,
+                                                 __CLC_GENTYPE z) {
+  return __clc_mul24(x, y) + z;
+}
diff --git a/libclc/clc/lib/generic/integer/clc_mul24.cl 
b/libclc/clc/lib/generic/integer/clc_mul24.cl
new file mode 100644
index 00000000000000..6513a896a8b1d2
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_mul24.cl
@@ -0,0 +1,4 @@
+#include <clc/internal/clc.h>
+
+#define __CLC_BODY <clc_mul24.inc>
+#include <clc/integer/gentype24.inc>
diff --git a/libclc/generic/lib/integer/mul24.inc 
b/libclc/clc/lib/generic/integer/clc_mul24.inc
similarity index 68%
rename from libclc/generic/lib/integer/mul24.inc
rename to libclc/clc/lib/generic/integer/clc_mul24.inc
index 95a2f1d6f31bab..d7e8091c98a314 100644
--- a/libclc/generic/lib/integer/mul24.inc
+++ b/libclc/clc/lib/generic/integer/clc_mul24.inc
@@ -1,10 +1,10 @@
-
 // We need to use shifts here in order to mantain the sign bit for signed
 // integers.  The compiler should optimize this to (x & 0x00FFFFFF) for
 // unsigned integers.
 #define CONVERT_TO_24BIT(x) (((x) << 8) >> 8)
 
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y){
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_mul24(__CLC_GENTYPE x,
+                                                 __CLC_GENTYPE y) {
   return CONVERT_TO_24BIT(x) * CONVERT_TO_24BIT(y);
 }
 
diff --git a/libclc/clc/lib/generic/integer/clc_mul_hi.cl 
b/libclc/clc/lib/generic/integer/clc_mul_hi.cl
new file mode 100644
index 00000000000000..cf4acc5429cb45
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_mul_hi.cl
@@ -0,0 +1,132 @@
+#include <clc/integer/clc_hadd.h>
+#include <clc/integer/definitions.h>
+#include <clc/internal/clc.h>
+
+// TODO: Replace with __clc_convert_<type> when available
+#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)
+
+#define __CLC_MUL_HI_VEC_IMPL(BGENTYPE, GENTYPE, GENSIZE)                      
\
+  _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) {          
\
+    BGENTYPE large_x = __CLC_CONVERT_TY(x, BGENTYPE);                          
\
+    BGENTYPE large_y = __CLC_CONVERT_TY(y, BGENTYPE);                          
\
+    BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE;          
\
+    return __CLC_CONVERT_TY(large_mul_hi, GENTYPE);                            
\
+  }
+
+// For all types EXCEPT long, which is implemented separately
+#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE)                          
\
+  _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) {          
\
+    return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE);                  
\
+  }
+
+#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS)                               
\
+  __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS)                                         
\
+  __CLC_MUL_HI_VEC_IMPL(BTYPE##2, TYPE##2, BITS)                               
\
+  __CLC_MUL_HI_VEC_IMPL(BTYPE##3, TYPE##3, BITS)                               
\
+  __CLC_MUL_HI_VEC_IMPL(BTYPE##4, TYPE##4, BITS)                               
\
+  __CLC_MUL_HI_VEC_IMPL(BTYPE##8, TYPE##8, BITS)                               
\
+  __CLC_MUL_HI_VEC_IMPL(BTYPE##16, TYPE##16, BITS)
+
+_CLC_OVERLOAD _CLC_DEF long __clc_mul_hi(long x, long y) {
+  long f, o, i;
+  ulong l;
+
+  // Move the high/low halves of x/y into the lower 32-bits of variables so
+  // that we can multiply them without worrying about overflow.
+  long x_hi = x >> 32;
+  long x_lo = x & UINT_MAX;
+  long y_hi = y >> 32;
+  long y_lo = y & UINT_MAX;
+
+  // Multiply all of the components according to FOIL method
+  f = x_hi * y_hi;
+  o = x_hi * y_lo;
+  i = x_lo * y_hi;
+  l = x_lo * y_lo;
+
+  // Now add the components back together in the following steps:
+  // F: doesn't need to be modified
+  // O/I: Need to be added together.
+  // L: Shift right by 32-bits, then add into the sum of O and I
+  // Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
+  //
+  // We use hadd to give us a bit of extra precision for the intermediate sums
+  // but as a result, we shift by 31 bits instead of 32
+  return (long)(f + (__clc_hadd(o, (i + (long)((ulong)l >> 32))) >> 31));
+}
+
+_CLC_OVERLOAD _CLC_DEF ulong __clc_mul_hi(ulong x, ulong y) {
+  ulong f, o, i;
+  ulong l;
+
+  // Move the high/low halves of x/y into the lower 32-bits of variables so
+  // that we can multiply them without worrying about overflow.
+  ulong x_hi = x >> 32;
+  ulong x_lo = x & UINT_MAX;
+  ulong y_hi = y >> 32;
+  ulong y_lo = y & UINT_MAX;
+
+  // Multiply all of the components according to FOIL method
+  f = x_hi * y_hi;
+  o = x_hi * y_lo;
+  i = x_lo * y_hi;
+  l = x_lo * y_lo;
+
+  // Now add the components back together, taking care to respect the fact 
that:
+  // F: doesn't need to be modified
+  // O/I: Need to be added together.
+  // L: Shift right by 32-bits, then add into the sum of O and I
+  // Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
+  //
+  // We use hadd to give us a bit of extra precision for the intermediate sums
+  // but as a result, we shift by 31 bits instead of 32
+  return (f + (__clc_hadd(o, (i + (l >> 32))) >> 31));
+}
+
+// Vector-based mul_hi implementation for logn/ulong. See comments in the 
scalar
+// versions for more detail.
+#define __CLC_MUL_HI_LONG_VEC_IMPL(TY, UTY)                                    
\
+  _CLC_OVERLOAD _CLC_DEF TY __clc_mul_hi(TY x, TY y) {                         
\
+    TY f, o, i;                                                                
\
+    UTY l;                                                                     
\
+                                                                               
\
+    TY x_hi = x >> 32;                                                         
\
+    TY x_lo = x & UINT_MAX;                                                    
\
+    TY y_hi = y >> 32;                                                         
\
+    TY y_lo = y & UINT_MAX;                                                    
\
+                                                                               
\
+    f = x_hi * y_hi;                                                           
\
+    o = x_hi * y_lo;                                                           
\
+    i = x_lo * y_hi;                                                           
\
+    l = __CLC_CONVERT_TY(x_lo * y_lo, UTY);                                    
\
+    i += __CLC_CONVERT_TY(l >> (UTY)32, TY);                                   
\
+                                                                               
\
+    return f + (__clc_hadd(o, i) >> (TY)31);                                   
\
+  }
+
+#define __CLC_MUL_HI_LONG_IMPL(BTYPE, UBTYPE)                                  
\
+  __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##2, UBTYPE##2)                              
\
+  __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##3, UBTYPE##3)                              
\
+  __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##4, UBTYPE##4)                              
\
+  __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##8, UBTYPE##8)                              
\
+  __CLC_MUL_HI_LONG_VEC_IMPL(BTYPE##16, UBTYPE##16)
+
+#define __CLC_MUL_HI_TYPES()                                                   
\
+  __CLC_MUL_HI_DEC_IMPL(short, char, 8)                                        
\
+  __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8)                                      
\
+  __CLC_MUL_HI_DEC_IMPL(int, short, 16)                                        
\
+  __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16)                                      
\
+  __CLC_MUL_HI_DEC_IMPL(long, int, 32)                                         
\
+  __CLC_MUL_HI_DEC_IMPL(ulong, uint, 32)                                       
\
+  __CLC_MUL_HI_LONG_IMPL(long, ulong)                                          
\
+  __CLC_MUL_HI_LONG_IMPL(ulong, ulong)
+
+__CLC_MUL_HI_TYPES()
+
+#undef __CLC_MUL_HI_TYPES
+#undef __CLC_MUL_HI_LONG_IMPL
+#undef __CLC_MUL_HI_LONG_VEC_IMPL
+#undef __CLC_MUL_HI_DEC_IMPL
+#undef __CLC_MUL_HI_IMPL
+#undef __CLC_MUL_HI_VEC_IMPL
+#undef __CLC_CONVERT_TY
diff --git a/libclc/clc/lib/generic/integer/clc_popcount.cl 
b/libclc/clc/lib/generic/integer/clc_popcount.cl
new file mode 100644
index 00000000000000..12e851c15d7959
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_popcount.cl
@@ -0,0 +1,7 @@
+#include <clc/internal/clc.h>
+
+#define FUNCTION __clc_popcount
+#define __CLC_FUNCTION(x) __builtin_elementwise_popcount
+#define __CLC_BODY <clc/shared/unary_def.inc>
+
+#include <clc/integer/gentype.inc>
diff --git a/libclc/clc/lib/generic/integer/clc_rhadd.cl 
b/libclc/clc/lib/generic/integer/clc_rhadd.cl
new file mode 100644
index 00000000000000..00bd2f0ac8058a
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_rhadd.cl
@@ -0,0 +1,4 @@
+#include <clc/internal/clc.h>
+
+#define __CLC_BODY <clc_rhadd.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/clc/lib/generic/integer/clc_rhadd.inc 
b/libclc/clc/lib/generic/integer/clc_rhadd.inc
new file mode 100644
index 00000000000000..d363c42061ffe1
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_rhadd.inc
@@ -0,0 +1,8 @@
+// rhadd = (x+y+1)>>1
+// This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit
+// set) This saves us having to do any checks for overflow in the addition sums
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rhadd(__CLC_GENTYPE x,
+                                                 __CLC_GENTYPE y) {
+  return (x >> (__CLC_GENTYPE)1) + (y >> (__CLC_GENTYPE)1) +
+         ((x & (__CLC_GENTYPE)1) | (y & (__CLC_GENTYPE)1));
+}
diff --git a/libclc/clc/lib/generic/integer/clc_upsample.cl 
b/libclc/clc/lib/generic/integer/clc_upsample.cl
new file mode 100644
index 00000000000000..d53ef7240bfc22
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_upsample.cl
@@ -0,0 +1,35 @@
+#include <clc/internal/clc.h>
+
+// TODO: Replace with __clc_convert_<type> when available
+#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)
+
+#define __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE)          
\
+  _CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) {    
\
+    BGENTYPE large_hi = __CLC_CONVERT_TY(hi, BGENTYPE);                        
\
+    BGENTYPE large_lo = __CLC_CONVERT_TY(lo, BGENTYPE);                        
\
+    return (large_hi << (BGENTYPE)GENSIZE) | large_lo;                         
\
+  }
+
+#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE)              
\
+  _CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) {    
\
+    return ((BGENTYPE)hi << GENSIZE) | lo;                                     
\
+  }                                                                            
\
+  __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2, GENSIZE)       
\
+  __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3, GENSIZE)       
\
+  __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4, GENSIZE)       
\
+  __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8, GENSIZE)       
\
+  __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16, GENSIZE)
+
+#define __CLC_UPSAMPLE_TYPES()                                                 
\
+  __CLC_UPSAMPLE_IMPL(short, char, uchar, 8)                                   
\
+  __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8)                                 
\
+  __CLC_UPSAMPLE_IMPL(int, short, ushort, 16)                                  
\
+  __CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16)                                
\
+  __CLC_UPSAMPLE_IMPL(long, int, uint, 32)                                     
\
+  __CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32)
+
+__CLC_UPSAMPLE_TYPES()
+
+#undef __CLC_UPSAMPLE_TYPES
+#undef __CLC_UPSAMPLE_IMPL
+#undef __CLC_CONVERT_TY
diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
index a87223e8c622c5..ddc9e4c49d8626 100644
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -3,7 +3,15 @@
 ../generic/common/clc_smoothstep.cl
 ../generic/geometric/clc_dot.cl
 ../generic/integer/clc_add_sat.cl
+../generic/integer/clc_clz.cl
+../generic/integer/clc_hadd.cl
+../generic/integer/clc_mad24.cl
+../generic/integer/clc_mul24.cl
+../generic/integer/clc_mul_hi.cl
+../generic/integer/clc_popcount.cl
+../generic/integer/clc_rhadd.cl
 ../generic/integer/clc_sub_sat.cl
+../generic/integer/clc_upsample.cl
 ../generic/math/clc_ceil.cl
 ../generic/math/clc_copysign.cl
 ../generic/math/clc_fabs.cl
diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES
index a87223e8c622c5..ddc9e4c49d8626 100644
--- a/libclc/clc/lib/spirv64/SOURCES
+++ b/libclc/clc/lib/spirv64/SOURCES
@@ -3,7 +3,15 @@
 ../generic/common/clc_smoothstep.cl
 ../generic/geometric/clc_dot.cl
 ../generic/integer/clc_add_sat.cl
+../generic/integer/clc_clz.cl
+../generic/integer/clc_hadd.cl
+../generic/integer/clc_mad24.cl
+../generic/integer/clc_mul24.cl
+../generic/integer/clc_mul_hi.cl
+../generic/integer/clc_popcount.cl
+../generic/integer/clc_rhadd.cl
 ../generic/integer/clc_sub_sat.cl
+../generic/integer/clc_upsample.cl
 ../generic/math/clc_ceil.cl
 ../generic/math/clc_copysign.cl
 ../generic/math/clc_fabs.cl
diff --git a/libclc/generic/include/clc/integer/clz.h 
b/libclc/generic/include/clc/integer/clz.h
index f7cdbf78ec0607..5479279eb84a2d 100644
--- a/libclc/generic/include/clc/integer/clz.h
+++ b/libclc/generic/include/clc/integer/clz.h
@@ -1,2 +1,7 @@
-#define __CLC_BODY <clc/integer/clz.inc>
+#define __CLC_FUNCTION clz
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
 #include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/integer/clz.inc 
b/libclc/generic/include/clc/integer/clz.inc
deleted file mode 100644
index 45826d10c9fafe..00000000000000
--- a/libclc/generic/include/clc/integer/clz.inc
+++ /dev/null
@@ -1 +0,0 @@
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clz(__CLC_GENTYPE x);
diff --git a/libclc/generic/include/clc/integer/hadd.h 
b/libclc/generic/include/clc/integer/hadd.h
index 37304e26cc2d62..0d73944ac11bae 100644
--- a/libclc/generic/include/clc/integer/hadd.h
+++ b/libclc/generic/include/clc/integer/hadd.h
@@ -1,2 +1,7 @@
-#define __CLC_BODY <clc/integer/hadd.inc>
+#define __CLC_FUNCTION hadd
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
 #include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/integer/hadd.inc 
b/libclc/generic/include/clc/integer/hadd.inc
deleted file mode 100644
index f698989cef2026..00000000000000
--- a/libclc/generic/include/clc/integer/hadd.inc
+++ /dev/null
@@ -1 +0,0 @@
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/mad24.h 
b/libclc/generic/include/clc/integer/mad24.h
index 0c120faac2b15b..f7292f79f821f0 100644
--- a/libclc/generic/include/clc/integer/mad24.h
+++ b/libclc/generic/include/clc/integer/mad24.h
@@ -1,3 +1,6 @@
-#define __CLC_BODY <clc/integer/mad24.inc>
-#include <clc/integer/integer-gentype.inc>
-#undef __CLC_BODY
+#define __CLC_FUNCTION mad24
+#define __CLC_BODY <clc/shared/ternary_decl.inc>
+
+#include <clc/integer/gentype24.inc>
+
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/integer/mad24.inc 
b/libclc/generic/include/clc/integer/mad24.inc
deleted file mode 100644
index 81fe0c2a89266c..00000000000000
--- a/libclc/generic/include/clc/integer/mad24.inc
+++ /dev/null
@@ -1 +0,0 @@
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, 
__CLC_GENTYPE z);
diff --git a/libclc/generic/include/clc/integer/mad_hi.h 
b/libclc/generic/include/clc/integer/mad_hi.h
index 863ce92d9f2d8c..9f9de75df76136 100644
--- a/libclc/generic/include/clc/integer/mad_hi.h
+++ b/libclc/generic/include/clc/integer/mad_hi.h
@@ -1 +1,7 @@
-#define mad_hi(a, b, c) (mul_hi((a),(b))+(c))
+#define __CLC_FUNCTION mad_hi
+#define __CLC_BODY <clc/shared/ternary_decl.inc>
+
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_FUNCTION
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/integer/mul24.h 
b/libclc/generic/include/clc/integer/mul24.h
index 4f97098d70f0f4..27e0f20040cb9a 100644
--- a/libclc/generic/include/clc/integer/mul24.h
+++ b/libclc/generic/include/clc/integer/mul24.h
@@ -1,3 +1,7 @@
-#define __CLC_BODY <clc/integer/mul24.inc>
-#include <clc/integer/integer-gentype.inc>
+#define __CLC_FUNCTION mul24
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
+#include <clc/integer/gentype24.inc>
+
 #undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/integer/mul24.inc 
b/libclc/generic/include/clc/integer/mul24.inc
deleted file mode 100644
index 8cbf7c10ac447d..00000000000000
--- a/libclc/generic/include/clc/integer/mul24.inc
+++ /dev/null
@@ -1 +0,0 @@
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/mul_hi.h 
b/libclc/generic/include/clc/integer/mul_hi.h
index 27b95d83442f97..3607a80f439679 100644
--- a/libclc/generic/include/clc/integer/mul_hi.h
+++ b/libclc/generic/include/clc/integer/mul_hi.h
@@ -1,2 +1,7 @@
-#define __CLC_BODY <clc/integer/mul_hi.inc>
+#define __CLC_FUNCTION mul_hi
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
 #include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/integer/mul_hi.inc 
b/libclc/generic/include/clc/integer/mul_hi.inc
deleted file mode 100644
index ce9e5c0b2c18c8..00000000000000
--- a/libclc/generic/include/clc/integer/mul_hi.inc
+++ /dev/null
@@ -1 +0,0 @@
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul_hi(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/popcount.h 
b/libclc/generic/include/clc/integer/popcount.h
index 23335f45b6fe3f..a3ce8ebb1df1f4 100644
--- a/libclc/generic/include/clc/integer/popcount.h
+++ b/libclc/generic/include/clc/integer/popcount.h
@@ -1,5 +1,7 @@
 #define __CLC_FUNCTION popcount
-#define __CLC_BODY <clc/integer/unary.inc>
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
 #include <clc/integer/gentype.inc>
-#undef __CLC_FUNCTION
+
 #undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/integer/rhadd.h 
b/libclc/generic/include/clc/integer/rhadd.h
index 69b43faeebd246..a777d4b66f2f81 100644
--- a/libclc/generic/include/clc/integer/rhadd.h
+++ b/libclc/generic/include/clc/integer/rhadd.h
@@ -1,2 +1,7 @@
-#define __CLC_BODY <clc/integer/rhadd.inc>
+#define __CLC_FUNCTION rhadd
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
 #include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
diff --git a/libclc/generic/include/clc/integer/rhadd.inc 
b/libclc/generic/include/clc/integer/rhadd.inc
deleted file mode 100644
index 88ccaf09fd5ef8..00000000000000
--- a/libclc/generic/include/clc/integer/rhadd.inc
+++ /dev/null
@@ -1 +0,0 @@
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/libclc/generic/include/clc/integer/upsample.h 
b/libclc/generic/include/clc/integer/upsample.h
index 0b36b692a2c8d3..37038f6ad90344 100644
--- a/libclc/generic/include/clc/integer/upsample.h
+++ b/libclc/generic/include/clc/integer/upsample.h
@@ -1,25 +1,24 @@
-#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \
-    _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo);
+#define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE)                       
\
+  _CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo);
 
-#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \
-    __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \
-    __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \
-    __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \
-    __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \
-    __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \
-    __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) \
+#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE)                        
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE)                             
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2)                    
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3)                    
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4)                    
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8)                    
\
+  __CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16)
 
-#define __CLC_UPSAMPLE_TYPES() \
-    __CLC_UPSAMPLE_VEC(short, char, uchar) \
-    __CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \
-    __CLC_UPSAMPLE_VEC(int, short, ushort) \
-    __CLC_UPSAMPLE_VEC(uint, ushort, ushort) \
-    __CLC_UPSAMPLE_VEC(long, int, uint) \
-    __CLC_UPSAMPLE_VEC(ulong, uint, uint) \
+#define __CLC_UPSAMPLE_TYPES()                                                 
\
+  __CLC_UPSAMPLE_VEC(short, char, uchar)                                       
\
+  __CLC_UPSAMPLE_VEC(ushort, uchar, uchar)                                     
\
+  __CLC_UPSAMPLE_VEC(int, short, ushort)                                       
\
+  __CLC_UPSAMPLE_VEC(uint, ushort, ushort)                                     
\
+  __CLC_UPSAMPLE_VEC(long, int, uint)                                          
\
+  __CLC_UPSAMPLE_VEC(ulong, uint, uint)
 
 __CLC_UPSAMPLE_TYPES()
 
 #undef __CLC_UPSAMPLE_TYPES
 #undef __CLC_UPSAMPLE_DECL
 #undef __CLC_UPSAMPLE_VEC
-
diff --git a/libclc/generic/include/clc/math/fma.h 
b/libclc/generic/include/clc/math/fma.h
index c7c23eb2055f68..bedfc5414da002 100644
--- a/libclc/generic/include/clc/math/fma.h
+++ b/libclc/generic/include/clc/math/fma.h
@@ -1,4 +1,4 @@
-#define __CLC_BODY <clc/math/ternary_decl.inc>
+#define __CLC_BODY <clc/shared/ternary_decl.inc>
 #define __CLC_FUNCTION fma
 
 #include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/clc/math/mad.h 
b/libclc/generic/include/clc/math/mad.h
index 99b6484193012b..6f3f1f6201807b 100644
--- a/libclc/generic/include/clc/math/mad.h
+++ b/libclc/generic/include/clc/math/mad.h
@@ -1,4 +1,4 @@
-#define __CLC_BODY <clc/math/ternary_decl.inc>
+#define __CLC_BODY <clc/shared/ternary_decl.inc>
 #define __CLC_FUNCTION mad
 
 #include <clc/math/gentype.inc>
diff --git a/libclc/generic/include/integer/popcount.h 
b/libclc/generic/include/integer/popcount.h
deleted file mode 100644
index 00c753753bb4e0..00000000000000
--- a/libclc/generic/include/integer/popcount.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#define __CLC_FUNCTION __clc_native_popcount
-#define __CLC_INTRINSIC "llvm.ctpop"
-#include <integer/unary_intrin.inc>
diff --git a/libclc/generic/include/integer/unary_intrin.inc 
b/libclc/generic/include/integer/unary_intrin.inc
deleted file mode 100644
index ee9862a4c5b3a6..00000000000000
--- a/libclc/generic/include/integer/unary_intrin.inc
+++ /dev/null
@@ -1,20 +0,0 @@
-#define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \
-_CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm(__CLC_INTRINSIC 
".i" BIT_SIZE); \
-_CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) 
__asm(__CLC_INTRINSIC ".v2i" BIT_SIZE); \
-_CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) 
__asm(__CLC_INTRINSIC ".v3i" BIT_SIZE); \
-_CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) 
__asm(__CLC_INTRINSIC ".v4i" BIT_SIZE); \
-_CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) 
__asm(__CLC_INTRINSIC ".v8i" BIT_SIZE); \
-_CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) 
__asm(__CLC_INTRINSIC ".v16i" BIT_SIZE);
-
-__CLC_INTRINSIC_DEF(char, "8")
-__CLC_INTRINSIC_DEF(uchar, "8")
-__CLC_INTRINSIC_DEF(short, "16")
-__CLC_INTRINSIC_DEF(ushort, "16")
-__CLC_INTRINSIC_DEF(int, "32")
-__CLC_INTRINSIC_DEF(uint, "32")
-__CLC_INTRINSIC_DEF(long, "64")
-__CLC_INTRINSIC_DEF(ulong, "64")
-
-#undef __CLC_FUNCTION
-#undef __CLC_INTRINSIC
-#undef __CLC_INTRINSIC_DEF
diff --git a/libclc/generic/include/math/clc_fma.h 
b/libclc/generic/include/math/clc_fma.h
index 490cb9bcefc86b..598df66cf72e95 100644
--- a/libclc/generic/include/math/clc_fma.h
+++ b/libclc/generic/include/math/clc_fma.h
@@ -4,7 +4,7 @@
 
 #define __FLOAT_ONLY
 #define __CLC_FUNCTION __clc_sw_fma
-#define __CLC_BODY <clc/math/ternary_decl.inc>
+#define __CLC_BODY <clc/shared/ternary_decl.inc>
 #include <clc/math/gentype.inc>
 #undef __CLC_BODY
 #undef __CLC_FUNCTION
diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES
index 217e3bca48b7ac..a62c87902a6a7e 100644
--- a/libclc/generic/lib/SOURCES
+++ b/libclc/generic/lib/SOURCES
@@ -68,6 +68,7 @@ integer/add_sat.cl
 integer/clz.cl
 integer/hadd.cl
 integer/mad24.cl
+integer/mad_hi.cl
 integer/mad_sat.cl
 integer/mul24.cl
 integer/mul_hi.cl
diff --git a/libclc/generic/lib/integer/clz.cl 
b/libclc/generic/lib/integer/clz.cl
index 904d027d376134..c2d423174b36cb 100644
--- a/libclc/generic/lib/integer/clz.cl
+++ b/libclc/generic/lib/integer/clz.cl
@@ -1,43 +1,7 @@
 #include <clc/clc.h>
-#include <clc/clcmacro.h>
+#include <clc/integer/clc_clz.h>
 
-_CLC_OVERLOAD _CLC_DEF char clz(char x) {
-  return clz((ushort)(uchar)x) - 8;
-}
+#define FUNCTION clz
+#define __CLC_BODY <clc/shared/unary_def.inc>
 
-_CLC_OVERLOAD _CLC_DEF uchar clz(uchar x) {
-  return clz((ushort)x) - 8;
-}
-
-_CLC_OVERLOAD _CLC_DEF short clz(short x) {
-  return x ? __builtin_clzs(x) : 16;
-}
-
-_CLC_OVERLOAD _CLC_DEF ushort clz(ushort x) {
-  return x ? __builtin_clzs(x) : 16;
-}
-
-_CLC_OVERLOAD _CLC_DEF int clz(int x) {
-  return x ? __builtin_clz(x) : 32;
-}
-
-_CLC_OVERLOAD _CLC_DEF uint clz(uint x) {
-  return x ? __builtin_clz(x) : 32;
-}
-
-_CLC_OVERLOAD _CLC_DEF long clz(long x) {
-  return x ? __builtin_clzl(x) : 64;
-}
-
-_CLC_OVERLOAD _CLC_DEF ulong clz(ulong x) {
-  return x ? __builtin_clzl(x) : 64;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, clz, char)
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, clz, uchar)
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, clz, short)
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, clz, ushort)
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, clz, int)
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, clz, uint)
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, clz, long)
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, clz, ulong)
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/hadd.cl 
b/libclc/generic/lib/integer/hadd.cl
index 749026e5a8ad81..d8461f1d5398b5 100644
--- a/libclc/generic/lib/integer/hadd.cl
+++ b/libclc/generic/lib/integer/hadd.cl
@@ -1,4 +1,7 @@
 #include <clc/clc.h>
+#include <clc/integer/clc_hadd.h>
+
+#define FUNCTION hadd
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#define __CLC_BODY <hadd.inc>
 #include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/hadd.inc 
b/libclc/generic/lib/integer/hadd.inc
deleted file mode 100644
index ea59d9bd7db5f8..00000000000000
--- a/libclc/generic/lib/integer/hadd.inc
+++ /dev/null
@@ -1,6 +0,0 @@
-//hadd = (x+y)>>1
-//This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit 
set)
-//This saves us having to do any checks for overflow in the addition sum
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) {
-    return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1);
-}
diff --git a/libclc/generic/lib/integer/mad24.cl 
b/libclc/generic/lib/integer/mad24.cl
index e29e99f28b56fc..24935b54f24cd6 100644
--- a/libclc/generic/lib/integer/mad24.cl
+++ b/libclc/generic/lib/integer/mad24.cl
@@ -1,4 +1,7 @@
 #include <clc/clc.h>
+#include <clc/integer/clc_mad24.h>
 
-#define __CLC_BODY <mad24.inc>
-#include <clc/integer/integer-gentype.inc>
+#define FUNCTION mad24
+#define __CLC_BODY <clc/shared/ternary_def.inc>
+
+#include <clc/integer/gentype24.inc>
diff --git a/libclc/generic/lib/integer/mad24.inc 
b/libclc/generic/lib/integer/mad24.inc
deleted file mode 100644
index 902b0aafe4c874..00000000000000
--- a/libclc/generic/lib/integer/mad24.inc
+++ /dev/null
@@ -1,3 +0,0 @@
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, 
__CLC_GENTYPE z){
-  return mul24(x, y) + z;
-}
diff --git a/libclc/generic/lib/integer/mad_hi.cl 
b/libclc/generic/lib/integer/mad_hi.cl
new file mode 100644
index 00000000000000..7915faa966ec40
--- /dev/null
+++ b/libclc/generic/lib/integer/mad_hi.cl
@@ -0,0 +1,7 @@
+#include <clc/clc.h>
+#include <clc/integer/clc_mad_hi.h>
+
+#define FUNCTION mad_hi
+#define __CLC_BODY <clc/shared/ternary_def.inc>
+
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/mul24.cl 
b/libclc/generic/lib/integer/mul24.cl
index 8aedca64b85905..b8cec2c62c337f 100644
--- a/libclc/generic/lib/integer/mul24.cl
+++ b/libclc/generic/lib/integer/mul24.cl
@@ -1,4 +1,7 @@
 #include <clc/clc.h>
+#include <clc/integer/clc_mul24.h>
 
-#define __CLC_BODY <mul24.inc>
-#include <clc/integer/integer-gentype.inc>
+#define FUNCTION mul24
+#define __CLC_BODY <clc/shared/binary_def.inc>
+
+#include <clc/integer/gentype24.inc>
diff --git a/libclc/generic/lib/integer/mul_hi.cl 
b/libclc/generic/lib/integer/mul_hi.cl
index 174d893afb14f9..a19ef7ecda2b9e 100644
--- a/libclc/generic/lib/integer/mul_hi.cl
+++ b/libclc/generic/lib/integer/mul_hi.cl
@@ -1,109 +1,7 @@
 #include <clc/clc.h>
+#include <clc/integer/clc_mul_hi.h>
 
-//For all types EXCEPT long, which is implemented separately
-#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \
-    _CLC_OVERLOAD _CLC_DEF GENTYPE mul_hi(GENTYPE x, GENTYPE y){ \
-        return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \
-    } \
+#define FUNCTION mul_hi
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-//FOIL-based long mul_hi
-//
-// Summary: Treat mul_hi(long x, long y) as:
-// (a+b) * (c+d) where a and c are the high-order parts of x and y respectively
-// and b and d are the low-order parts of x and y.
-// Thinking back to algebra, we use FOIL to do the work.
-
-_CLC_OVERLOAD _CLC_DEF long mul_hi(long x, long y){
-    long f, o, i;
-    ulong l;
-
-    //Move the high/low halves of x/y into the lower 32-bits of variables so
-    //that we can multiply them without worrying about overflow.
-    long x_hi = x >> 32;
-    long x_lo = x & UINT_MAX;
-    long y_hi = y >> 32;
-    long y_lo = y & UINT_MAX;
-
-    //Multiply all of the components according to FOIL method
-    f = x_hi * y_hi;
-    o = x_hi * y_lo;
-    i = x_lo * y_hi;
-    l = x_lo * y_lo;
-
-    //Now add the components back together in the following steps:
-    //F: doesn't need to be modified
-    //O/I: Need to be added together.
-    //L: Shift right by 32-bits, then add into the sum of O and I
-    //Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
-    //
-    //We use hadd to give us a bit of extra precision for the intermediate sums
-    //but as a result, we shift by 31 bits instead of 32
-    return (long)(f + (hadd(o, (i + (long)((ulong)l>>32))) >> 31));
-}
-
-_CLC_OVERLOAD _CLC_DEF ulong mul_hi(ulong x, ulong y){
-    ulong f, o, i;
-    ulong l;
-
-    //Move the high/low halves of x/y into the lower 32-bits of variables so
-    //that we can multiply them without worrying about overflow.
-    ulong x_hi = x >> 32;
-    ulong x_lo = x & UINT_MAX;
-    ulong y_hi = y >> 32;
-    ulong y_lo = y & UINT_MAX;
-
-    //Multiply all of the components according to FOIL method
-    f = x_hi * y_hi;
-    o = x_hi * y_lo;
-    i = x_lo * y_hi;
-    l = x_lo * y_lo;
-
-    //Now add the components back together, taking care to respect the fact 
that:
-    //F: doesn't need to be modified
-    //O/I: Need to be added together.
-    //L: Shift right by 32-bits, then add into the sum of O and I
-    //Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
-    //
-    //We use hadd to give us a bit of extra precision for the intermediate sums
-    //but as a result, we shift by 31 bits instead of 32
-    return (f + (hadd(o, (i + (l>>32))) >> 31));
-}
-
-#define __CLC_MUL_HI_VEC(GENTYPE) \
-    _CLC_OVERLOAD _CLC_DEF GENTYPE##2 mul_hi(GENTYPE##2 x, GENTYPE##2 y){ \
-        return (GENTYPE##2){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1)}; \
-    } \
-    _CLC_OVERLOAD _CLC_DEF GENTYPE##3 mul_hi(GENTYPE##3 x, GENTYPE##3 y){ \
-        return (GENTYPE##3){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1), 
mul_hi(x.s2, y.s2)}; \
-    } \
-    _CLC_OVERLOAD _CLC_DEF GENTYPE##4 mul_hi(GENTYPE##4 x, GENTYPE##4 y){ \
-        return (GENTYPE##4){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
-    } \
-    _CLC_OVERLOAD _CLC_DEF GENTYPE##8 mul_hi(GENTYPE##8 x, GENTYPE##8 y){ \
-        return (GENTYPE##8){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
-    } \
-    _CLC_OVERLOAD _CLC_DEF GENTYPE##16 mul_hi(GENTYPE##16 x, GENTYPE##16 y){ \
-        return (GENTYPE##16){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
-    } \
-
-#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \
-    __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \
-    __CLC_MUL_HI_VEC(TYPE)
-
-#define __CLC_MUL_HI_TYPES() \
-    __CLC_MUL_HI_DEC_IMPL(short, char, 8) \
-    __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \
-    __CLC_MUL_HI_DEC_IMPL(int, short, 16) \
-    __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \
-    __CLC_MUL_HI_DEC_IMPL(long, int, 32) \
-    __CLC_MUL_HI_DEC_IMPL(ulong, uint, 32) \
-    __CLC_MUL_HI_VEC(long) \
-    __CLC_MUL_HI_VEC(ulong)
-
-__CLC_MUL_HI_TYPES()
-
-#undef __CLC_MUL_HI_TYPES
-#undef __CLC_MUL_HI_DEC_IMPL
-#undef __CLC_MUL_HI_IMPL
-#undef __CLC_MUL_HI_VEC
-#undef __CLC_B32
+#include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/popcount.cl 
b/libclc/generic/lib/integer/popcount.cl
index ca83b1afaf9dab..4e8a6ebab65967 100644
--- a/libclc/generic/lib/integer/popcount.cl
+++ b/libclc/generic/lib/integer/popcount.cl
@@ -1,8 +1,7 @@
 #include <clc/clc.h>
-#include <integer/popcount.h>
+#include <clc/integer/clc_popcount.h>
 
-#define __CLC_FUNC popcount
-#define __CLC_IMPL_FUNC __clc_native_popcount
+#define FUNCTION popcount
+#define __CLC_BODY <clc/shared/unary_def.inc>
 
-#define __CLC_BODY "../clc_unary.inc"
 #include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/rhadd.cl 
b/libclc/generic/lib/integer/rhadd.cl
index c985870f7c7a24..b1cb492cad8b86 100644
--- a/libclc/generic/lib/integer/rhadd.cl
+++ b/libclc/generic/lib/integer/rhadd.cl
@@ -1,4 +1,7 @@
 #include <clc/clc.h>
+#include <clc/integer/clc_rhadd.h>
+
+#define FUNCTION rhadd
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#define __CLC_BODY <rhadd.inc>
 #include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/rhadd.inc 
b/libclc/generic/lib/integer/rhadd.inc
deleted file mode 100644
index 3d6076874808e6..00000000000000
--- a/libclc/generic/lib/integer/rhadd.inc
+++ /dev/null
@@ -1,6 +0,0 @@
-//rhadd = (x+y+1)>>1
-//This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit 
set)
-//This saves us having to do any checks for overflow in the addition sums
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y) {
-    return 
(x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+((x&(__CLC_GENTYPE)1)|(y&(__CLC_GENTYPE)1));
-}
diff --git a/libclc/generic/lib/integer/upsample.cl 
b/libclc/generic/lib/integer/upsample.cl
index da77315f8f9344..984a731e3b4d12 100644
--- a/libclc/generic/lib/integer/upsample.cl
+++ b/libclc/generic/lib/integer/upsample.cl
@@ -1,32 +1,34 @@
 #include <clc/clc.h>
+#include <clc/integer/clc_upsample.h>
 
-#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
-    _CLC_OVERLOAD _CLC_DEF BGENTYPE upsample(GENTYPE hi, UGENTYPE lo){ \
-        return ((BGENTYPE)hi << GENSIZE) | lo; \
-    } \
-    _CLC_OVERLOAD _CLC_DEF BGENTYPE##2 upsample(GENTYPE##2 hi, UGENTYPE##2 
lo){ \
-        return (BGENTYPE##2){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1)}; \
-    } \
-    _CLC_OVERLOAD _CLC_DEF BGENTYPE##3 upsample(GENTYPE##3 hi, UGENTYPE##3 
lo){ \
-        return (BGENTYPE##3){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1), 
upsample(hi.s2, lo.s2)}; \
-    } \
-    _CLC_OVERLOAD _CLC_DEF BGENTYPE##4 upsample(GENTYPE##4 hi, UGENTYPE##4 
lo){ \
-        return (BGENTYPE##4){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \
-    } \
-    _CLC_OVERLOAD _CLC_DEF BGENTYPE##8 upsample(GENTYPE##8 hi, UGENTYPE##8 
lo){ \
-        return (BGENTYPE##8){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \
-    } \
-    _CLC_OVERLOAD _CLC_DEF BGENTYPE##16 upsample(GENTYPE##16 hi, UGENTYPE##16 
lo){ \
-        return (BGENTYPE##16){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; 
\
-    } \
+#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE)                       
\
+  _CLC_OVERLOAD _CLC_DEF BGENTYPE upsample(GENTYPE hi, UGENTYPE lo) {          
\
+    return __clc_upsample(hi, lo);                                             
\
+  }                                                                            
\
+  _CLC_OVERLOAD _CLC_DEF BGENTYPE##2 upsample(GENTYPE##2 hi, UGENTYPE##2 lo) { 
\
+    return __clc_upsample(hi, lo);                                             
\
+  }                                                                            
\
+  _CLC_OVERLOAD _CLC_DEF BGENTYPE##3 upsample(GENTYPE##3 hi, UGENTYPE##3 lo) { 
\
+    return __clc_upsample(hi, lo);                                             
\
+  }                                                                            
\
+  _CLC_OVERLOAD _CLC_DEF BGENTYPE##4 upsample(GENTYPE##4 hi, UGENTYPE##4 lo) { 
\
+    return __clc_upsample(hi, lo);                                             
\
+  }                                                                            
\
+  _CLC_OVERLOAD _CLC_DEF BGENTYPE##8 upsample(GENTYPE##8 hi, UGENTYPE##8 lo) { 
\
+    return __clc_upsample(hi, lo);                                             
\
+  }                                                                            
\
+  _CLC_OVERLOAD _CLC_DEF BGENTYPE##16 upsample(GENTYPE##16 hi,                 
\
+                                               UGENTYPE##16 lo) {              
\
+    return __clc_upsample(hi, lo);                                             
\
+  }
 
-#define __CLC_UPSAMPLE_TYPES() \
-    __CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \
-    __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \
-    __CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \
-    __CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \
-    __CLC_UPSAMPLE_IMPL(long, int, uint, 32) \
-    __CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32) \
+#define __CLC_UPSAMPLE_TYPES()                                                 
\
+  __CLC_UPSAMPLE_IMPL(short, char, uchar)                                      
\
+  __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar)                                    
\
+  __CLC_UPSAMPLE_IMPL(int, short, ushort)                                      
\
+  __CLC_UPSAMPLE_IMPL(uint, ushort, ushort)                                    
\
+  __CLC_UPSAMPLE_IMPL(long, int, uint)                                         
\
+  __CLC_UPSAMPLE_IMPL(ulong, uint, uint)
 
 __CLC_UPSAMPLE_TYPES()
 
diff --git a/libclc/generic/lib/math/clc_fma.cl 
b/libclc/generic/lib/math/clc_fma.cl
index 3f29e7f92615a9..33f5072425d986 100644
--- a/libclc/generic/lib/math/clc_fma.cl
+++ b/libclc/generic/lib/math/clc_fma.cl
@@ -23,6 +23,7 @@
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
 #include <clc/integer/clc_abs.h>
+#include <clc/integer/clc_clz.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/relational/clc_isinf.h>
@@ -118,7 +119,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, 
float c) {
   }
 
   // detect overflow/underflow
-  int overflow_bits = 3 - clz(st_fma.mantissa);
+  int overflow_bits = 3 - __clc_clz(st_fma.mantissa);
 
   // adjust exponent
   st_fma.exponent += overflow_bits;
diff --git a/libclc/generic/lib/math/clc_fmod.cl 
b/libclc/generic/lib/math/clc_fmod.cl
index db47536833342f..35298b7e42d5c0 100644
--- a/libclc/generic/lib/math/clc_fmod.cl
+++ b/libclc/generic/lib/math/clc_fmod.cl
@@ -22,6 +22,7 @@
 
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
+#include <clc/integer/clc_clz.h>
 #include <clc/math/clc_floor.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/clc_trunc.h>
@@ -87,14 +88,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y)
     ulong xsgn = ux ^ ax;
     double dx = as_double(ax);
     int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
-    int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64);
+    int xexp1 = 11 - (int) __clc_clz(ax & MANTBITS_DP64);
     xexp1 = xexp < 1 ? xexp1 : xexp;
 
     ulong uy = as_ulong(y);
     ulong ay = uy & ~SIGNBIT_DP64;
     double dy = as_double(ay);
     int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
-    int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64);
+    int yexp1 = 11 - (int) __clc_clz(ay & MANTBITS_DP64);
     yexp1 = yexp < 1 ? yexp1 : yexp;
 
     // First assume |x| > |y|
diff --git a/libclc/generic/lib/math/clc_remainder.cl 
b/libclc/generic/lib/math/clc_remainder.cl
index e9d2e382d9beb3..3a357de6f1962f 100644
--- a/libclc/generic/lib/math/clc_remainder.cl
+++ b/libclc/generic/lib/math/clc_remainder.cl
@@ -22,6 +22,7 @@
 
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
+#include <clc/integer/clc_clz.h>
 #include <clc/math/clc_floor.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/clc_trunc.h>
@@ -95,14 +96,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, 
double y)
     ulong xsgn = ux ^ ax;
     double dx = as_double(ax);
     int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
-    int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64);
+    int xexp1 = 11 - (int) __clc_clz(ax & MANTBITS_DP64);
     xexp1 = xexp < 1 ? xexp1 : xexp;
 
     ulong uy = as_ulong(y);
     ulong ay = uy & ~SIGNBIT_DP64;
     double dy = as_double(ay);
     int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
-    int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64);
+    int yexp1 = 11 - (int) __clc_clz(ay & MANTBITS_DP64);
     yexp1 = yexp < 1 ? yexp1 : yexp;
 
     int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1;
diff --git a/libclc/generic/lib/math/clc_remquo.cl 
b/libclc/generic/lib/math/clc_remquo.cl
index 9cbda094294ad1..af3e7a2b07500d 100644
--- a/libclc/generic/lib/math/clc_remquo.cl
+++ b/libclc/generic/lib/math/clc_remquo.cl
@@ -22,6 +22,7 @@
 
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
+#include <clc/integer/clc_clz.h>
 #include <clc/math/clc_floor.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/clc_trunc.h>
@@ -134,14 +135,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, 
double y,
   ulong xsgn = ux ^ ax;
   double dx = as_double(ax);
   int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
-  int xexp1 = 11 - (int)clz(ax & MANTBITS_DP64);
+  int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64);
   xexp1 = xexp < 1 ? xexp1 : xexp;
 
   ulong uy = as_ulong(y);
   ulong ay = uy & ~SIGNBIT_DP64;
   double dy = as_double(ay);
   int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
-  int yexp1 = 11 - (int)clz(ay & MANTBITS_DP64);
+  int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64);
   yexp1 = yexp < 1 ? yexp1 : yexp;
 
   int qsgn = ((ux ^ uy) & SIGNBIT_DP64) == 0UL ? 1 : -1;
diff --git a/libclc/generic/lib/math/sincos_helpers.cl 
b/libclc/generic/lib/math/sincos_helpers.cl
index 6afb1887d77979..22f2bf61bf27d7 100644
--- a/libclc/generic/lib/math/sincos_helpers.cl
+++ b/libclc/generic/lib/math/sincos_helpers.cl
@@ -22,6 +22,8 @@
 
 #include "sincos_helpers.h"
 #include <clc/clc.h>
+#include <clc/integer/clc_clz.h>
+#include <clc/integer/clc_mul_hi.h>
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_trunc.h>
 #include <clc/math/math.h>
@@ -179,11 +181,11 @@ _CLC_DEF int __clc_argReductionSmallS(float *r, float 
*rr, float x) {
 
 #define FULL_MUL(A, B, HI, LO)                                                 
\
   LO = A * B;                                                                  
\
-  HI = mul_hi(A, B)
+  HI = __clc_mul_hi(A, B)
 
 #define FULL_MAD(A, B, C, HI, LO)                                              
\
   LO = ((A) * (B) + (C));                                                      
\
-  HI = mul_hi(A, B);                                                           
\
+  HI = __clc_mul_hi(A, B);                                                     
\
   HI += LO < C
 
 _CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, float x) {
@@ -277,7 +279,7 @@ _CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, 
float x) {
   p5 = p5 ^ flip;
 
   // Find exponent and shift away leading zeroes and hidden bit
-  xe = clz(p7) + 1;
+  xe = __clc_clz(p7) + 1;
   shift = 32 - xe;
   p7 = bitalign(p7, p6, shift);
   p6 = bitalign(p6, p5, shift);
@@ -290,7 +292,7 @@ _CLC_DEF int __clc_argReductionLargeS(float *r, float *rr, 
float x) {
 
   // Get 24 more bits of fraction in another float, there are not long strings
   // of zeroes here
-  int xxe = clz(p7) + 1;
+  int xxe = __clc_clz(p7) + 1;
   p7 = bitalign(p7, p6, 32 - xxe);
   float q0 = as_float(sign | ((127 - (xe + 23 + xxe)) << 23) | (p7 >> 9));
 

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libclc] [libclc] Move several integer functions to CLC library (PR #116786)

Reply via email to