Author: Fraser Cormack Date: 2025-01-29T11:12:40Z New Revision: 12cdf4330d32ce073f88dfaa1ab9a32327b9ef38
URL: https://github.com/llvm/llvm-project/commit/12cdf4330d32ce073f88dfaa1ab9a32327b9ef38 DIFF: https://github.com/llvm/llvm-project/commit/12cdf4330d32ce073f88dfaa1ab9a32327b9ef38.diff LOG: [libclc] Move (add|sub)_sat to CLC; optimize (#124903) Using the `__builtin_elementwise_(add|sub)_sat` functions allows us to directly optimize to the desired intrinsic, and avoid scalarization for vector types. Added: libclc/clc/include/clc/integer/clc_add_sat.h libclc/clc/include/clc/integer/clc_sub_sat.h libclc/clc/lib/generic/integer/clc_add_sat.cl libclc/clc/lib/generic/integer/clc_sub_sat.cl Modified: libclc/clc/lib/clspv/SOURCES libclc/clc/lib/generic/SOURCES libclc/clc/lib/spirv/SOURCES libclc/clc/lib/spirv64/SOURCES libclc/generic/lib/integer/add_sat.cl libclc/generic/lib/integer/sub_sat.cl libclc/generic/lib/math/clc_ldexp.cl Removed: ################################################################################ diff --git a/libclc/clc/include/clc/integer/clc_add_sat.h b/libclc/clc/include/clc/integer/clc_add_sat.h new file mode 100644 index 00000000000000..8db19627f18e31 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_add_sat.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_ADD_SAT_H__ +#define __CLC_INTEGER_CLC_ADD_SAT_H__ + +#define __CLC_FUNCTION __clc_add_sat +#define __CLC_BODY <clc/shared/binary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_ADD_SAT_H__ diff --git a/libclc/clc/include/clc/integer/clc_sub_sat.h b/libclc/clc/include/clc/integer/clc_sub_sat.h new file mode 100644 index 00000000000000..95a8fd8873e996 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_sub_sat.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_SUB_SAT_H__ +#define __CLC_INTEGER_CLC_SUB_SAT_H__ + +#define __CLC_FUNCTION __clc_sub_sat +#define __CLC_BODY <clc/shared/binary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_SUB_SAT_H__ diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES index 66818590100630..6efa3c59b53e70 100644 --- a/libclc/clc/lib/clspv/SOURCES +++ b/libclc/clc/lib/clspv/SOURCES @@ -1,3 +1,5 @@ +../generic/integer/clc_add_sat.cl +../generic/integer/clc_sub_sat.cl ../generic/math/clc_ceil.cl ../generic/math/clc_copysign.cl ../generic/math/clc_fabs.cl diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 9feda65c45f4bf..1ef6636be90b62 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -4,6 +4,8 @@ common/clc_smoothstep.cl geometric/clc_dot.cl integer/clc_abs.cl integer/clc_abs_ diff .cl +integer/clc_add_sat.cl +integer/clc_sub_sat.cl math/clc_ceil.cl math/clc_copysign.cl math/clc_fabs.cl diff --git a/libclc/clc/lib/generic/integer/clc_add_sat.cl b/libclc/clc/lib/generic/integer/clc_add_sat.cl new file mode 100644 index 00000000000000..344c105720e080 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_add_sat.cl @@ -0,0 +1,7 @@ +#include <clc/internal/clc.h> + +#define FUNCTION __clc_add_sat +#define __CLC_FUNCTION(x) __builtin_elementwise_add_sat +#define __CLC_BODY <clc/shared/binary_def.inc> + +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_sub_sat.cl b/libclc/clc/lib/generic/integer/clc_sub_sat.cl new file mode 100644 index 00000000000000..184713531fdbe7 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_sub_sat.cl @@ -0,0 +1,7 @@ +#include <clc/internal/clc.h> + +#define FUNCTION __clc_sub_sat +#define __CLC_FUNCTION(x) __builtin_elementwise_sub_sat +#define __CLC_BODY <clc/shared/binary_def.inc> + +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES index 509236d587cd01..a87223e8c622c5 100644 --- a/libclc/clc/lib/spirv/SOURCES +++ b/libclc/clc/lib/spirv/SOURCES @@ -2,6 +2,8 @@ ../generic/common/clc_radians.cl ../generic/common/clc_smoothstep.cl ../generic/geometric/clc_dot.cl +../generic/integer/clc_add_sat.cl +../generic/integer/clc_sub_sat.cl ../generic/math/clc_ceil.cl ../generic/math/clc_copysign.cl ../generic/math/clc_fabs.cl diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES index 509236d587cd01..a87223e8c622c5 100644 --- a/libclc/clc/lib/spirv64/SOURCES +++ b/libclc/clc/lib/spirv64/SOURCES @@ -2,6 +2,8 @@ ../generic/common/clc_radians.cl ../generic/common/clc_smoothstep.cl ../generic/geometric/clc_dot.cl +../generic/integer/clc_add_sat.cl +../generic/integer/clc_sub_sat.cl ../generic/math/clc_ceil.cl ../generic/math/clc_copysign.cl ../generic/math/clc_fabs.cl diff --git a/libclc/generic/lib/integer/add_sat.cl b/libclc/generic/lib/integer/add_sat.cl index 11a4a331fbd02a..895eeeba438264 100644 --- a/libclc/generic/lib/integer/add_sat.cl +++ b/libclc/generic/lib/integer/add_sat.cl @@ -1,73 +1,7 @@ #include <clc/clc.h> -#include <clc/clcmacro.h> +#include <clc/integer/clc_add_sat.h> -// From add_sat.ll -_CLC_DECL char __clc_add_sat_s8(char, char); -_CLC_DECL uchar __clc_add_sat_u8(uchar, uchar); -_CLC_DECL short __clc_add_sat_s16(short, short); -_CLC_DECL ushort __clc_add_sat_u16(ushort, ushort); -_CLC_DECL int __clc_add_sat_s32(int, int); -_CLC_DECL uint __clc_add_sat_u32(uint, uint); -_CLC_DECL long __clc_add_sat_s64(long, long); -_CLC_DECL ulong __clc_add_sat_u64(ulong, ulong); +#define FUNCTION add_sat +#define __CLC_BODY <clc/shared/binary_def.inc> -_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) { - short r = x + y; - return convert_char_sat(r); -} - -_CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) { - ushort r = x + y; - return convert_uchar_sat(r); -} - -_CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) { - int r = x + y; - return convert_short_sat(r); -} - -_CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) { - uint r = x + y; - return convert_ushort_sat(r); -} - -_CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) { - int r; - if (__builtin_sadd_overflow(x, y, &r)) - // The oveflow can only occur if both are pos or both are neg, - // thus we only need to check one operand - return x > 0 ? INT_MAX : INT_MIN; - return r; -} - -_CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) { - uint r; - if (__builtin_uadd_overflow(x, y, &r)) - return UINT_MAX; - return r; -} - -_CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) { - long r; - if (__builtin_saddl_overflow(x, y, &r)) - // The oveflow can only occur if both are pos or both are neg, - // thus we only need to check one operand - return x > 0 ? LONG_MAX : LONG_MIN; - return r; -} - -_CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) { - ulong r; - if (__builtin_uaddl_overflow(x, y, &r)) - return ULONG_MAX; - return r; -} - -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, add_sat, uchar, uchar) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, add_sat, short, short) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, add_sat, ushort, ushort) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, add_sat, int, int) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, add_sat, uint, uint) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, add_sat, long, long) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, add_sat, ulong, ulong) +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/sub_sat.cl b/libclc/generic/lib/integer/sub_sat.cl index e6beef787c0366..e153738169b94c 100644 --- a/libclc/generic/lib/integer/sub_sat.cl +++ b/libclc/generic/lib/integer/sub_sat.cl @@ -1,61 +1,7 @@ #include <clc/clc.h> -#include <clc/clcmacro.h> +#include <clc/integer/clc_sub_sat.h> -_CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) { - short r = x - y; - return convert_char_sat(r); -} +#define FUNCTION sub_sat +#define __CLC_BODY <clc/shared/binary_def.inc> -_CLC_OVERLOAD _CLC_DEF uchar sub_sat(uchar x, uchar y) { - short r = x - y; - return convert_uchar_sat(r); -} - -_CLC_OVERLOAD _CLC_DEF short sub_sat(short x, short y) { - int r = x - y; - return convert_short_sat(r); -} - -_CLC_OVERLOAD _CLC_DEF ushort sub_sat(ushort x, ushort y) { - int r = x - y; - return convert_ushort_sat(r); -} - -_CLC_OVERLOAD _CLC_DEF int sub_sat(int x, int y) { - int r; - if (__builtin_ssub_overflow(x, y, &r)) - // The oveflow can only occur in the direction of the first operand - return x > 0 ? INT_MAX : INT_MIN; - return r; -} - -_CLC_OVERLOAD _CLC_DEF uint sub_sat(uint x, uint y) { - uint r; - if (__builtin_usub_overflow(x, y, &r)) - return 0; - return r; -} - -_CLC_OVERLOAD _CLC_DEF long sub_sat(long x, long y) { - long r; - if (__builtin_ssubl_overflow(x, y, &r)) - // The oveflow can only occur in the direction of the first operand - return x > 0 ? LONG_MAX : LONG_MIN; - return r; -} - -_CLC_OVERLOAD _CLC_DEF ulong sub_sat(ulong x, ulong y) { - ulong r; - if (__builtin_usubl_overflow(x, y, &r)) - return 0; - return r; -} - -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, sub_sat, char, char) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, sub_sat, uchar, uchar) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, sub_sat, short, short) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, sub_sat, ushort, ushort) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, sub_sat, int, int) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, sub_sat, uint, uint) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, sub_sat, long, long) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, sub_sat, ulong, ulong) +#include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/math/clc_ldexp.cl b/libclc/generic/lib/math/clc_ldexp.cl index e052aa812244bf..a03f05d2177561 100644 --- a/libclc/generic/lib/math/clc_ldexp.cl +++ b/libclc/generic/lib/math/clc_ldexp.cl @@ -22,6 +22,7 @@ #include <clc/clc.h> #include <clc/clcmacro.h> +#include <clc/integer/clc_add_sat.h> #include <clc/math/clc_subnormal_config.h> #include <clc/math/math.h> #include <clc/relational/clc_isinf.h> @@ -37,7 +38,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) { int e = (i >> 23) & 0xff; int m = i & 0x007fffff; int s = i & 0x80000000; - int v = add_sat(e, n); + int v = __clc_add_sat(e, n); v = __clc_clamp(v, 0, 0xff); int mr = e == 0 | v == 0 | v == 0xff ? 0 : m; int c = e == 0xff; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits