Author: Fraser Cormack Date: 2025-02-05T10:38:23Z New Revision: 76d1cb22c1b9460c0abfba943d7cc202dc30fca3
URL: https://github.com/llvm/llvm-project/commit/76d1cb22c1b9460c0abfba943d7cc202dc30fca3 DIFF: https://github.com/llvm/llvm-project/commit/76d1cb22c1b9460c0abfba943d7cc202dc30fca3.diff LOG: [libclc] Move rotate to CLC library; optimize (#125713) This commit moves the rotate builtin to the CLC library. It also optimizes rotate(x, n) to generate the @llvm.fshl(x, x, n) intrinsic, for both scalar and vector types. The previous implementation was too cautious in its handling of the shift amount; the OpenCL rules state that the shift amount is always treated as an unsigned value modulo the bitwidth. Added: libclc/clc/include/clc/integer/clc_rotate.h libclc/clc/lib/generic/integer/clc_rotate.cl libclc/clc/lib/generic/integer/clc_rotate.inc Modified: libclc/clc/lib/clspv/SOURCES libclc/clc/lib/generic/SOURCES libclc/clc/lib/spirv/SOURCES libclc/generic/lib/integer/rotate.cl Removed: libclc/generic/lib/integer/rotate.inc ################################################################################ diff --git a/libclc/clc/include/clc/integer/clc_rotate.h b/libclc/clc/include/clc/integer/clc_rotate.h new file mode 100644 index 000000000000000..21c945a9ae1bdc1 --- /dev/null +++ b/libclc/clc/include/clc/integer/clc_rotate.h @@ -0,0 +1,12 @@ +#ifndef __CLC_INTEGER_CLC_ROTATE_H__ +#define __CLC_INTEGER_CLC_ROTATE_H__ + +#define __CLC_FUNCTION __clc_rotate +#define __CLC_BODY <clc/shared/binary_decl.inc> + +#include <clc/integer/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_INTEGER_CLC_ROTATE_H__ diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES index c96a6fc15bf6778..2fe07f62a328ca0 100644 --- a/libclc/clc/lib/clspv/SOURCES +++ b/libclc/clc/lib/clspv/SOURCES @@ -7,6 +7,7 @@ ../generic/integer/clc_mul_hi.cl ../generic/integer/clc_popcount.cl ../generic/integer/clc_rhadd.cl +../generic/integer/clc_rotate.cl ../generic/integer/clc_sub_sat.cl ../generic/integer/clc_upsample.cl ../generic/math/clc_ceil.cl diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 5fd882eb1bb256d..2d31f9ac23fec27 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -13,6 +13,7 @@ integer/clc_mul24.cl integer/clc_mul_hi.cl integer/clc_popcount.cl integer/clc_rhadd.cl +integer/clc_rotate.cl integer/clc_sub_sat.cl integer/clc_upsample.cl math/clc_ceil.cl diff --git a/libclc/clc/lib/generic/integer/clc_rotate.cl b/libclc/clc/lib/generic/integer/clc_rotate.cl new file mode 100644 index 000000000000000..7546862fe401e47 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_rotate.cl @@ -0,0 +1,5 @@ +#include <clc/internal/clc.h> +#include <clc/utils.h> + +#define __CLC_BODY <clc_rotate.inc> +#include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_rotate.inc b/libclc/clc/lib/generic/integer/clc_rotate.inc new file mode 100644 index 000000000000000..f144553eabd5252 --- /dev/null +++ b/libclc/clc/lib/generic/integer/clc_rotate.inc @@ -0,0 +1,22 @@ +#define __CLC_AS_GENTYPE(x) __CLC_XCONCAT(__clc_as_, __CLC_GENTYPE)(x) +#define __CLC_AS_U_GENTYPE(x) __CLC_XCONCAT(__clc_as_, __CLC_U_GENTYPE)(x) + +// The rotate(A, B) builtin left-shifts corresponding to the usual OpenCL shift +// modulo rules. These rules state that A is left-shifted by the log2(N) least +// significant bits in B when viewed as an unsigned integer value. Thus we don't +// have to worry about signed shift amounts, and can perform the computation in +// unsigned types. +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rotate(__CLC_GENTYPE x, + __CLC_GENTYPE n) { + __CLC_U_GENTYPE x_as_u = __CLC_AS_U_GENTYPE(x); + __CLC_U_GENTYPE mask = (__CLC_U_GENTYPE)(__CLC_GENSIZE - 1); + + __CLC_U_GENTYPE lshift_amt = __CLC_AS_U_GENTYPE(n) & mask; + + __CLC_U_GENTYPE rshift_amt = + (((__CLC_U_GENTYPE)__CLC_GENSIZE - lshift_amt) & mask); + + __CLC_U_GENTYPE result = (x_as_u << lshift_amt) | (x_as_u >> rshift_amt); + + return __CLC_AS_GENTYPE(result); +} diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES index c3cc4068225d811..96040a3aebd8355 100644 --- a/libclc/clc/lib/spirv/SOURCES +++ b/libclc/clc/lib/spirv/SOURCES @@ -11,6 +11,7 @@ ../generic/integer/clc_mul_hi.cl ../generic/integer/clc_popcount.cl ../generic/integer/clc_rhadd.cl +../generic/integer/clc_rotate.cl ../generic/integer/clc_sub_sat.cl ../generic/integer/clc_upsample.cl ../generic/math/clc_ceil.cl diff --git a/libclc/generic/lib/integer/rotate.cl b/libclc/generic/lib/integer/rotate.cl index 27ce515c729331b..1e72af30f33f2bd 100644 --- a/libclc/generic/lib/integer/rotate.cl +++ b/libclc/generic/lib/integer/rotate.cl @@ -1,4 +1,7 @@ #include <clc/clc.h> +#include <clc/integer/clc_rotate.h> + +#define FUNCTION rotate +#define __CLC_BODY <clc/shared/binary_def.inc> -#define __CLC_BODY <rotate.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/generic/lib/integer/rotate.inc b/libclc/generic/lib/integer/rotate.inc deleted file mode 100644 index 33bb0a85241d20b..000000000000000 --- a/libclc/generic/lib/integer/rotate.inc +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Not necessarily optimal... but it produces correct results (at least for int) - * If we're lucky, LLVM will recognize the pattern and produce rotate - * instructions: - * http://llvm.1065342.n5.nabble.com/rotate-td47679.html - * - * Eventually, someone should feel free to implement an llvm-specific version - */ - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE n){ - //Try to avoid extra work if someone's spinning the value through multiple - //full rotations - n = n % (__CLC_GENTYPE)__CLC_GENSIZE; - -#ifdef __CLC_SCALAR - if (n > 0){ - return (x << n) | (((__CLC_U_GENTYPE)x) >> (__CLC_GENSIZE - n)); - } else if (n == 0){ - return x; - } else { - return ( (((__CLC_U_GENTYPE)x) >> -n) | (x << (__CLC_GENSIZE + n)) ); - } -#else - //XXX: There's a lot of __builtin_astype calls to cast everything to - // unsigned ... This should be improved so that if __CLC_GENTYPE==__CLC_U_GENTYPE, no - // casts are required. - - __CLC_U_GENTYPE x_1 = __builtin_astype(x, __CLC_U_GENTYPE); - - //XXX: Is (__CLC_U_GENTYPE >> S__CLC_GENTYPE) | (__CLC_U_GENTYPE << S__CLC_GENTYPE) legal? - // If so, then combine the amt and shifts into a single set of statements - - __CLC_U_GENTYPE amt; - amt = (n < (__CLC_GENTYPE)0 ? __builtin_astype((__CLC_GENTYPE)0-n, __CLC_U_GENTYPE) : (__CLC_U_GENTYPE)0); - x_1 = (x_1 >> amt) | (x_1 << ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt)); - - amt = (n < (__CLC_GENTYPE)0 ? (__CLC_U_GENTYPE)0 : __builtin_astype(n, __CLC_U_GENTYPE)); - x_1 = (x_1 << amt) | (x_1 >> ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt)); - - return __builtin_astype(x_1, __CLC_GENTYPE); -#endif -} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits