Author: Fraser Cormack Date: 2025-02-03T17:50:42Z New Revision: fe694b18dc518b86eae9aab85ff03abc54e1662f
URL: https://github.com/llvm/llvm-project/commit/fe694b18dc518b86eae9aab85ff03abc54e1662f DIFF: https://github.com/llvm/llvm-project/commit/fe694b18dc518b86eae9aab85ff03abc54e1662f.diff LOG: [libclc] Move mad_sat to CLC; optimize for vector types (#125517) This commit moves the mad_sat builtin to the CLC library. It also optimizes it for vector types by avoiding scalarization. To help do this it transforms the previous control-flow code into vector select code. This has also been done for the scalar versions for simplicity. Added: libclc/clc/include/clc/integer/clc_mad_sat.h libclc/clc/lib/generic/integer/clc_mad_sat.cl Modified: libclc/clc/include/clc/integer/definitions.h libclc/clc/lib/clspv/SOURCES libclc/clc/lib/generic/SOURCES libclc/clc/lib/spirv/SOURCES libclc/generic/lib/integer/mad_sat.cl Removed: ################################################################################ diff --git a/libclc/clc/include/clc/integer/clc_mad_sat.h b/libclc/clc/include/clc/integer/clc_mad_sat.h new file mode 100644 index 00000000000000..c474d067b0f628 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_mad_sat.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_MAD_SAT_H__ +#define __CLC_INTEGER_CLC_MAD_SAT_H__ + +#define __CLC_FUNCTION __clc_mad_sat +#define __CLC_BODY <clc/shared/ternary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_MAD_SAT_H__ diff --git a/libclc/clc/include/clc/integer/definitions.h b/libclc/clc/include/clc/integer/definitions.h index 18a9e54dec75c6..1b51c11a93f801 100644 --- a/libclc/clc/include/clc/integer/definitions.h +++ b/libclc/clc/include/clc/integer/definitions.h @@ -13,8 +13,12 @@ #define SHRT_MAX 32767 #define SHRT_MIN (-32767 - 1) #define UCHAR_MAX 255 +#define UCHAR_MIN 0 #define USHRT_MAX 65535 +#define USHRT_MIN 0 #define UINT_MAX 0xffffffff +#define UINT_MIN 0 #define ULONG_MAX 0xffffffffffffffffUL +#define ULONG_MIN 0UL #endif // __CLC_INTEGER_DEFINITIONS_H__ diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES index 2581abe64f1443..c96a6fc15bf677 100644 --- a/libclc/clc/lib/clspv/SOURCES +++ b/libclc/clc/lib/clspv/SOURCES @@ -2,6 +2,7 @@ ../generic/integer/clc_clz.cl ../generic/integer/clc_hadd.cl ../generic/integer/clc_mad24.cl +../generic/integer/clc_mad_sat.cl ../generic/integer/clc_mul24.cl ../generic/integer/clc_mul_hi.cl ../generic/integer/clc_popcount.cl diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 2f4df168f70745..5fd882eb1bb256 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -8,6 +8,7 @@ integer/clc_add_sat.cl integer/clc_clz.cl integer/clc_hadd.cl integer/clc_mad24.cl +integer/clc_mad_sat.cl integer/clc_mul24.cl integer/clc_mul_hi.cl integer/clc_popcount.cl diff --git a/libclc/clc/lib/generic/integer/clc_mad_sat.cl b/libclc/clc/lib/generic/integer/clc_mad_sat.cl new file mode 100644 index 00000000000000..4e559dba2b2f57 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_mad_sat.cl @@ -0,0 +1,119 @@ +#include <clc/clcmacro.h> +#include <clc/integer/clc_add_sat.h> +#include <clc/integer/clc_mad24.h> +#include <clc/integer/clc_mul_hi.h> +#include <clc/integer/clc_upsample.h> +#include <clc/integer/definitions.h> +#include <clc/internal/clc.h> +#include <clc/relational/clc_select.h> +#include <clc/shared/clc_clamp.h> + +#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY) + +// Macro for defining mad_sat variants for char/uchar/short/ushort +// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs +#define __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \ + _CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \ + return __clc_clamp( \ + (UP_TYPE)__clc_mad24((UP_TYPE)x, (UP_TYPE)y, (UP_TYPE)z), \ + (UP_TYPE)LIT_PREFIX##_MIN, (UP_TYPE)LIT_PREFIX##_MAX); \ + } + +#define __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE, UP_TYPE, LIT_PREFIX) \ + _CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \ + UP_TYPE upscaled_mad = __clc_mad24(__CLC_CONVERT_TY(x, UP_TYPE), \ + __CLC_CONVERT_TY(y, UP_TYPE), \ + __CLC_CONVERT_TY(z, UP_TYPE)); \ + UP_TYPE clamped_mad = __clc_clamp(upscaled_mad, (UP_TYPE)LIT_PREFIX##_MIN, \ + (UP_TYPE)LIT_PREFIX##_MAX); \ + return __CLC_CONVERT_TY(clamped_mad, TYPE); \ + } + +#define __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(TYPE, UP_TYPE, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##2, UP_TYPE##2, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##3, UP_TYPE##3, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##4, UP_TYPE##4, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##8, UP_TYPE##8, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##16, UP_TYPE##16, LIT_PREFIX) + +__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(char, int, CHAR) +__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(uchar, uint, UCHAR) +__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(short, int, SHRT) +__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(ushort, uint, USHRT) + +// Macro for defining mad_sat variants for uint/ulong +#define __CLC_DEFINE_UINTLONG_MAD_SAT(UTYPE, STYPE, ULIT_PREFIX) \ + _CLC_OVERLOAD _CLC_DEF UTYPE __clc_mad_sat(UTYPE x, UTYPE y, UTYPE z) { \ + STYPE has_mul_hi = __clc_mul_hi(x, y) != (UTYPE)0; \ + return __clc_select(__clc_add_sat(x * y, z), (UTYPE)ULIT_PREFIX##_MAX, \ + has_mul_hi); \ + } + +#define __CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(UTY, STY, ULIT_PREFIX) \ + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY, STY, ULIT_PREFIX) \ + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##2, STY##2, ULIT_PREFIX) \ + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##3, STY##3, ULIT_PREFIX) \ + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##4, STY##4, ULIT_PREFIX) \ + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##8, STY##8, ULIT_PREFIX) \ + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##16, STY##16, ULIT_PREFIX) + +__CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(uint, int, UINT) +__CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(ulong, long, ULONG) + +// Macro for defining mad_sat variants for int +#define __CLC_DEFINE_SINT_MAD_SAT(INTTY, UINTTY, SLONGTY) \ + _CLC_OVERLOAD _CLC_DEF INTTY __clc_mad_sat(INTTY x, INTTY y, INTTY z) { \ + INTTY mhi = __clc_mul_hi(x, y); \ + UINTTY mlo = __clc_as_##UINTTY(x * y); \ + SLONGTY m = __clc_upsample(mhi, mlo); \ + m += __CLC_CONVERT_TY(z, SLONGTY); \ + m = __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \ + return __CLC_CONVERT_TY(m, INTTY); \ + } + +// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs +#define __CLC_DEFINE_SINT_MAD_SAT_ALL_TYS(INTTY, UINTTY, SLONGTY) \ + _CLC_OVERLOAD _CLC_DEF INTTY __clc_mad_sat(INTTY x, INTTY y, INTTY z) { \ + INTTY mhi = __clc_mul_hi(x, y); \ + UINTTY mlo = __clc_as_##UINTTY(x * y); \ + SLONGTY m = __clc_upsample(mhi, mlo); \ + m += z; \ + return __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \ + } \ + __CLC_DEFINE_SINT_MAD_SAT(INTTY##2, UINTTY##2, SLONGTY##2) \ + __CLC_DEFINE_SINT_MAD_SAT(INTTY##3, UINTTY##3, SLONGTY##3) \ + __CLC_DEFINE_SINT_MAD_SAT(INTTY##4, UINTTY##4, SLONGTY##4) \ + __CLC_DEFINE_SINT_MAD_SAT(INTTY##8, UINTTY##8, SLONGTY##8) \ + __CLC_DEFINE_SINT_MAD_SAT(INTTY##16, UINTTY##16, SLONGTY##16) + +__CLC_DEFINE_SINT_MAD_SAT_ALL_TYS(int, uint, long) + +// Macro for defining mad_sat variants for long +#define __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY, ULONGTY) \ + _CLC_OVERLOAD _CLC_DEF SLONGTY __clc_mad_sat(SLONGTY x, SLONGTY y, \ + SLONGTY z) { \ + SLONGTY hi = __clc_mul_hi(x, y); \ + ULONGTY ulo = __clc_as_##ULONGTY(x * y); \ + SLONGTY max1 = (x < 0) == (y < 0) && hi != 0; \ + SLONGTY max2 = hi == 0 && ulo >= LONG_MAX && \ + (z > 0 || (ulo + __clc_as_##ULONGTY(z)) > LONG_MAX); \ + SLONGTY min1 = (((x < 0) != (y < 0)) && hi != -1); \ + SLONGTY min2 = \ + hi == -1 && ulo <= ((ULONGTY)LONG_MAX + 1UL) && \ + (z < 0 || __clc_as_##ULONGTY(z) < ((ULONGTY)LONG_MAX - ulo)); \ + SLONGTY ret = __clc_as_##SLONGTY(ulo + __clc_as_##ULONGTY(z)); \ + ret = __clc_select(ret, (SLONGTY)LONG_MAX, (SLONGTY)(max1 || max2)); \ + ret = __clc_select(ret, (SLONGTY)LONG_MIN, (SLONGTY)(min1 || min2)); \ + return ret; \ + } + +#define __CLC_DEFINE_SLONG_MAD_SAT_ALL_TYS(SLONGTY, ULONGTY) \ + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY, ULONGTY) \ + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##2, ULONGTY##2) \ + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##3, ULONGTY##3) \ + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##4, ULONGTY##4) \ + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##8, ULONGTY##8) \ + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##16, ULONGTY##16) + +__CLC_DEFINE_SLONG_MAD_SAT_ALL_TYS(long, ulong) diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES index ddc9e4c49d8626..c3cc4068225d81 100644 --- a/libclc/clc/lib/spirv/SOURCES +++ b/libclc/clc/lib/spirv/SOURCES @@ -6,6 +6,7 @@ ../generic/integer/clc_clz.cl ../generic/integer/clc_hadd.cl ../generic/integer/clc_mad24.cl +../generic/integer/clc_mad_sat.cl ../generic/integer/clc_mul24.cl ../generic/integer/clc_mul_hi.cl ../generic/integer/clc_popcount.cl diff --git a/libclc/generic/lib/integer/mad_sat.cl b/libclc/generic/lib/integer/mad_sat.cl index 2372eaacd6da82..e9c44364deee0c 100644 --- a/libclc/generic/lib/integer/mad_sat.cl +++ b/libclc/generic/lib/integer/mad_sat.cl @@ -1,72 +1,7 @@ #include <clc/clc.h> -#include <clc/clcmacro.h> +#include <clc/integer/clc_mad_sat.h> -_CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) { - return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX); -} +#define FUNCTION mad_sat +#define __CLC_BODY <clc/shared/ternary_def.inc> -_CLC_OVERLOAD _CLC_DEF uchar mad_sat(uchar x, uchar y, uchar z) { - return clamp((ushort)mad24((ushort)x, (ushort)y, (ushort)z), (ushort)0, (ushort) UCHAR_MAX); -} - -_CLC_OVERLOAD _CLC_DEF short mad_sat(short x, short y, short z) { - return clamp((int)mad24((int)x, (int)y, (int)z), (int)SHRT_MIN, (int) SHRT_MAX); -} - -_CLC_OVERLOAD _CLC_DEF ushort mad_sat(ushort x, ushort y, ushort z) { - return clamp((uint)mad24((uint)x, (uint)y, (uint)z), (uint)0, (uint) USHRT_MAX); -} - -_CLC_OVERLOAD _CLC_DEF int mad_sat(int x, int y, int z) { - int mhi = mul_hi(x, y); - uint mlo = x * y; - long m = upsample(mhi, mlo); - m += z; - if (m > INT_MAX) - return INT_MAX; - if (m < INT_MIN) - return INT_MIN; - return m; -} - -_CLC_OVERLOAD _CLC_DEF uint mad_sat(uint x, uint y, uint z) { - if (mul_hi(x, y) != 0) - return UINT_MAX; - return add_sat(x * y, z); -} - -_CLC_OVERLOAD _CLC_DEF long mad_sat(long x, long y, long z) { - long hi = mul_hi(x, y); - ulong ulo = x * y; - long slo = x * y; - /* Big overflow of more than 2 bits, add can't fix this */ - if (((x < 0) == (y < 0)) && hi != 0) - return LONG_MAX; - /* Low overflow in mul and z not neg enough to correct it */ - if (hi == 0 && ulo >= LONG_MAX && (z > 0 || (ulo + z) > LONG_MAX)) - return LONG_MAX; - /* Big overflow of more than 2 bits, add can't fix this */ - if (((x < 0) != (y < 0)) && hi != -1) - return LONG_MIN; - /* Low overflow in mul and z not pos enough to correct it */ - if (hi == -1 && ulo <= ((ulong)LONG_MAX + 1UL) && (z < 0 || z < (LONG_MAX - ulo))) - return LONG_MIN; - /* We have checked all conditions, any overflow in addition returns - * the correct value */ - return ulo + z; -} - -_CLC_OVERLOAD _CLC_DEF ulong mad_sat(ulong x, ulong y, ulong z) { - if (mul_hi(x, y) != 0) - return ULONG_MAX; - return add_sat(x * y, z); -} - -_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, mad_sat, char, char, char) -_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, mad_sat, uchar, uchar, uchar) -_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, mad_sat, short, short, short) -_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, mad_sat, ushort, ushort, ushort) -_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, mad_sat, int, int, int) -_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, mad_sat, uint, uint, uint) -_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, mad_sat, long, long, long) -_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, mad_sat, ulong, ulong, ulong) +#include <clc/integer/gentype.inc> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits