https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/124598
This commit moves the implementation of the copysign builtin to the CLC library. It simultaneously optimizes it for vector types by avoiding scalarization. It does so by using the __builtin_elementwise_copysign clang builtins, which can handle vector types. It also fixes a bug in the half/fp16 implementation of the builtin. This version was using an incorrect mask (0x7FFFF instead of 0x7FFF) and was thus preserving the original sign bit, rather than masking it out. >From a5d4532410199464f87d0cd5011f9b73573d5935 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 27 Jan 2025 17:44:14 +0000 Subject: [PATCH] [libclc] Move copysign to CLC library; optimize & fix This commit moves the implementation of the copysign builtin to the CLC library. It simultaneously optimizes it for vector types by avoiding scalarization. It does so by using the __builtin_elementwise_copysign clang builtins, which can handle vector types. It also fixes a bug in the half/fp16 implementation of the builtin. This version was using an incorrect mask (0x7FFFF instead of 0x7FFF) and was thus preserving the original sign bit, rather than masking it out. --- libclc/clc/include/clc/math/clc_copysign.h | 12 +++++++++ libclc/clc/include/clc/shared/binary_def.inc | 10 +++++++ libclc/clc/lib/clspv/SOURCES | 1 + libclc/clc/lib/generic/SOURCES | 1 + libclc/clc/lib/generic/math/clc_copysign.cl | 27 +++++++++++++++++++ libclc/clc/lib/spirv/SOURCES | 1 + libclc/clc/lib/spirv64/SOURCES | 1 + libclc/generic/lib/math/copysign.cl | 28 +++----------------- 8 files changed, 57 insertions(+), 24 deletions(-) create mode 100644 libclc/clc/include/clc/math/clc_copysign.h create mode 100644 libclc/clc/include/clc/shared/binary_def.inc create mode 100644 libclc/clc/lib/generic/math/clc_copysign.cl diff --git a/libclc/clc/include/clc/math/clc_copysign.h b/libclc/clc/include/clc/math/clc_copysign.h new file mode 100644 index 00000000000000..2b08acf73795f5 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_copysign.h @@ -0,0 +1,12 @@ +#ifndef __CLC_MATH_CLC_COPYSIGN_H__ +#define __CLC_MATH_CLC_COPYSIGN_H__ + +#define __CLC_BODY <clc/shared/binary_decl.inc> +#define __CLC_FUNCTION __clc_copysign + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_COPYSIGN_H__ diff --git a/libclc/clc/include/clc/shared/binary_def.inc b/libclc/clc/include/clc/shared/binary_def.inc new file mode 100644 index 00000000000000..e6ef867f12c6b6 --- /dev/null +++ b/libclc/clc/include/clc/shared/binary_def.inc @@ -0,0 +1,10 @@ +#include <clc/utils.h> + +#ifndef __CLC_FUNCTION +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a, + __CLC_GENTYPE b) { + return __CLC_FUNCTION(FUNCTION)(a, b); +} diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES index 81f90a24d00d60..d5f5c68fd7ebfa 100644 --- a/libclc/clc/lib/clspv/SOURCES +++ b/libclc/clc/lib/clspv/SOURCES @@ -1,4 +1,5 @@ ../generic/math/clc_ceil.cl +../generic/math/clc_copysign.cl ../generic/math/clc_fabs.cl ../generic/math/clc_floor.cl ../generic/math/clc_mad.cl diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 59dad8e8606891..9feda65c45f4bf 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -5,6 +5,7 @@ geometric/clc_dot.cl integer/clc_abs.cl integer/clc_abs_diff.cl math/clc_ceil.cl +math/clc_copysign.cl math/clc_fabs.cl math/clc_floor.cl math/clc_mad.cl diff --git a/libclc/clc/lib/generic/math/clc_copysign.cl b/libclc/clc/lib/generic/math/clc_copysign.cl new file mode 100644 index 00000000000000..e225ec80692fbd --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_copysign.cl @@ -0,0 +1,27 @@ +#include <clc/clcmacro.h> +#include <clc/internal/clc.h> + +_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __clc_copysign, + __builtin_elementwise_copysign, float, + float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __clc_copysign, + __builtin_elementwise_copysign, double, + double) + +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __clc_copysign, + __builtin_elementwise_copysign, half, + half) + +#endif + diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES index 813b1e3d699378..509236d587cd01 100644 --- a/libclc/clc/lib/spirv/SOURCES +++ b/libclc/clc/lib/spirv/SOURCES @@ -3,6 +3,7 @@ ../generic/common/clc_smoothstep.cl ../generic/geometric/clc_dot.cl ../generic/math/clc_ceil.cl +../generic/math/clc_copysign.cl ../generic/math/clc_fabs.cl ../generic/math/clc_floor.cl ../generic/math/clc_mad.cl diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES index 813b1e3d699378..509236d587cd01 100644 --- a/libclc/clc/lib/spirv64/SOURCES +++ b/libclc/clc/lib/spirv64/SOURCES @@ -3,6 +3,7 @@ ../generic/common/clc_smoothstep.cl ../generic/geometric/clc_dot.cl ../generic/math/clc_ceil.cl +../generic/math/clc_copysign.cl ../generic/math/clc_fabs.cl ../generic/math/clc_floor.cl ../generic/math/clc_mad.cl diff --git a/libclc/generic/lib/math/copysign.cl b/libclc/generic/lib/math/copysign.cl index 08045bebf88a1c..59cd4f65203052 100644 --- a/libclc/generic/lib/math/copysign.cl +++ b/libclc/generic/lib/math/copysign.cl @@ -1,27 +1,7 @@ #include <clc/clc.h> -#include <clc/clcmacro.h> +#include <clc/math/clc_copysign.h> -_CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float) +#define FUNCTION copysign +#define __CLC_BODY <clc/shared/binary_def.inc> -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y) -{ - ushort sign_x = as_ushort(x) & 0x8000u; - ushort unsigned_y = as_ushort(y) & 0x7ffffu; - - return as_half((ushort)(sign_x | unsigned_y)); -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half) - -#endif +#include <clc/math/gentype.inc> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits