https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/128506
Note the CLC versions of these builtins don't offer the vector/scalar forms, for simplicity. The OpenCL layer converts the vector/scalar form to vector/vector. The CLC builtins use clang's __builtin_elementwise_(min|max) which helps us generate llvm.(min|max)num intrinsics directly. These intrinsics select the non-NAN input over the NAN input, which adheres to the OpenCL specification. Note that the OpenCL specification doesn't require support for sNAN, so returning qNAN over sNAN is acceptable. Note also that the intrinsics don't differentiate between -0.0 and +0.0; this does not appear to be required - going by the OpenCL CTS, at least. These intrinsics maintain the vector types, as opposed to scalarizing, which was previously happening. This commit therefore helps to optimize codegen for those targets. >From 43d4d7d1fd03e3692c957ff4b7609cfe0b03a9ad Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 24 Feb 2025 12:25:22 +0000 Subject: [PATCH] [libclc] Move fmin/fmax to the CLC library Note the CLC versions of these builtins don't offer the vector/scalar forms, for simplicity. The OpenCL layer converts the vector/scalar form to vector/vector. The CLC builtins use clang's __builtin_elementwise_(min|max) which helps us generate llvm.(min|max)num intrinsics directly. These intrinsics select the non-NAN input over the NAN input, which adheres to the OpenCL specification. Note that the OpenCL specification doesn't require support for sNAN, so returning qNAN over sNAN is acceptable. Note also that the intrinsics don't differentiate between -0.0 and +0.0; this does not appear to be required - going by the OpenCL CTS, at least. These intrinsics maintain the vector types, as opposed to scalarizing, which was previously happening. This commit therefore helps to optimize codegen for those targets. --- .../clc/include/clc/math/binary_builtin.inc | 27 +++++++++++++++++ libclc/clc/include/clc/math/clc_fmax.h | 12 ++++++++ libclc/clc/include/clc/math/clc_fmin.h | 12 ++++++++ .../binary_decl_with_scalar_second_arg.inc | 7 +++++ .../binary_def_with_scalar_second_arg.inc | 17 +++++++++++ libclc/clc/lib/generic/SOURCES | 4 ++- libclc/clc/lib/generic/math/clc_fmax.cl | 6 ++++ libclc/clc/lib/generic/math/clc_fmin.cl | 6 ++++ libclc/generic/lib/math/fmax.cl | 29 ++----------------- libclc/generic/lib/math/fmin.cl | 28 ++---------------- 10 files changed, 96 insertions(+), 52 deletions(-) create mode 100644 libclc/clc/include/clc/math/binary_builtin.inc create mode 100644 libclc/clc/include/clc/math/clc_fmax.h create mode 100644 libclc/clc/include/clc/math/clc_fmin.h create mode 100644 libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc create mode 100644 libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc create mode 100644 libclc/clc/lib/generic/math/clc_fmax.cl create mode 100644 libclc/clc/lib/generic/math/clc_fmin.cl diff --git a/libclc/clc/include/clc/math/binary_builtin.inc b/libclc/clc/include/clc/math/binary_builtin.inc new file mode 100644 index 0000000000000..da1d8d9cc92a6 --- /dev/null +++ b/libclc/clc/include/clc/math/binary_builtin.inc @@ -0,0 +1,27 @@ +#include <clc/clcmacro.h> +#include <clc/utils.h> + +#ifndef __CLC_BUILTIN +#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION) +#endif + +_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __CLC_FUNCTION, __CLC_BUILTIN, + float, float) + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __CLC_FUNCTION, __CLC_BUILTIN, + double, double) + +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __CLC_FUNCTION, __CLC_BUILTIN, + half, half) + +#endif diff --git a/libclc/clc/include/clc/math/clc_fmax.h b/libclc/clc/include/clc/math/clc_fmax.h new file mode 100644 index 0000000000000..4f13794beb399 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_fmax.h @@ -0,0 +1,12 @@ +#ifndef __CLC_MATH_CLC_FMAX_H__ +#define __CLC_MATH_CLC_FMAX_H__ + +#define __CLC_FUNCTION __clc_fmax +#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc> + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_FMAX_H__ diff --git a/libclc/clc/include/clc/math/clc_fmin.h b/libclc/clc/include/clc/math/clc_fmin.h new file mode 100644 index 0000000000000..f8d0012098840 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_fmin.h @@ -0,0 +1,12 @@ +#ifndef __CLC_MATH_CLC_FMIN_H__ +#define __CLC_MATH_CLC_FMIN_H__ + +#define __CLC_FUNCTION __clc_fmin +#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc> + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_FMIN_H__ diff --git a/libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc b/libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc new file mode 100644 index 0000000000000..bec8575d8cc24 --- /dev/null +++ b/libclc/clc/include/clc/shared/binary_decl_with_scalar_second_arg.inc @@ -0,0 +1,7 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x, + __CLC_GENTYPE y); + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x, + __CLC_SCALAR_GENTYPE y); +#endif diff --git a/libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc b/libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc new file mode 100644 index 0000000000000..c02f3ea0f7017 --- /dev/null +++ b/libclc/clc/include/clc/shared/binary_def_with_scalar_second_arg.inc @@ -0,0 +1,17 @@ +#include <clc/utils.h> + +#ifndef __CLC_FUNCTION +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a, + __CLC_GENTYPE b) { + return __CLC_FUNCTION(FUNCTION)(a, b); +} + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b) { + return __CLC_FUNCTION(FUNCTION)(a, (__CLC_GENTYPE)b); +} +#endif diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 0ab563df6a274..8d917461ccee0 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -20,8 +20,10 @@ integer/clc_upsample.cl math/clc_ceil.cl math/clc_copysign.cl math/clc_fabs.cl -math/clc_fma.cl math/clc_floor.cl +math/clc_fma.cl +math/clc_fmax.cl +math/clc_fmin.cl math/clc_frexp.cl math/clc_mad.cl math/clc_modf.cl diff --git a/libclc/clc/lib/generic/math/clc_fmax.cl b/libclc/clc/lib/generic/math/clc_fmax.cl new file mode 100644 index 0000000000000..82b1c5894d3ec --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_fmax.cl @@ -0,0 +1,6 @@ +#include <clc/internal/clc.h> + +#undef __CLC_FUNCTION +#define __CLC_FUNCTION __clc_fmax +#define __CLC_BUILTIN __builtin_elementwise_max +#include <clc/math/binary_builtin.inc> diff --git a/libclc/clc/lib/generic/math/clc_fmin.cl b/libclc/clc/lib/generic/math/clc_fmin.cl new file mode 100644 index 0000000000000..5411280835b1a --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_fmin.cl @@ -0,0 +1,6 @@ +#include <clc/internal/clc.h> + +#undef __CLC_FUNCTION +#define __CLC_FUNCTION __clc_fmin +#define __CLC_BUILTIN __builtin_elementwise_min +#include <clc/math/binary_builtin.inc> diff --git a/libclc/generic/lib/math/fmax.cl b/libclc/generic/lib/math/fmax.cl index c42fe4f54a9e6..44a27b8b7bdec 100644 --- a/libclc/generic/lib/math/fmax.cl +++ b/libclc/generic/lib/math/fmax.cl @@ -1,31 +1,8 @@ #include <clc/clc.h> #include <clc/clcmacro.h> +#include <clc/math/clc_fmax.h> -_CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float); +#define FUNCTION fmax +#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc> -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double); - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y) -{ - if (isnan(x)) - return y; - if (isnan(y)) - return x; - return (x < y) ? y : x; -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half) - -#endif - -#define __CLC_BODY <fmax.inc> #include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/fmin.cl b/libclc/generic/lib/math/fmin.cl index 55575d0486b60..40615ea89c9a0 100644 --- a/libclc/generic/lib/math/fmin.cl +++ b/libclc/generic/lib/math/fmin.cl @@ -1,30 +1,8 @@ #include <clc/clc.h> #include <clc/clcmacro.h> +#include <clc/math/clc_fmin.h> -_CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float); +#define FUNCTION fmin +#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc> -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double); - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y) -{ - if (isnan(x)) - return y; - if (isnan(y)) - return x; - return (y < x) ? y : x; -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half) - -#endif - -#define __CLC_BODY <fmin.inc> #include <clc/math/gentype.inc> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits