https://github.com/fineg74 updated https://github.com/llvm/llvm-project/pull/179846
>From 05956a95c3ecb6408654fb26957e61697c61ea33 Mon Sep 17 00:00:00 2001 From: "Fine, Gregory" <[email protected]> Date: Wed, 4 Feb 2026 18:11:08 -0800 Subject: [PATCH 1/3] Add headers to support complex math for sprv --- clang/lib/Headers/CMakeLists.txt | 1 + .../Headers/__clang_spirv_complex_builtins.h | 267 ++++++++++++++++++ clang/lib/Headers/openmp_wrappers/complex | 8 +- clang/lib/Headers/openmp_wrappers/complex.h | 6 + 4 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 clang/lib/Headers/__clang_spirv_complex_builtins.h diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index c92b370b88d2d..9e1c3988b4b83 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -141,6 +141,7 @@ set(riscv_files set(spirv_files __clang_spirv_builtins.h + __clang_spirv_complex_builtins.h ) set(systemz_files diff --git a/clang/lib/Headers/__clang_spirv_complex_builtins.h b/clang/lib/Headers/__clang_spirv_complex_builtins.h new file mode 100644 index 0000000000000..fd7a492b5744d --- /dev/null +++ b/clang/lib/Headers/__clang_spirv_complex_builtins.h @@ -0,0 +1,267 @@ +/*===-- __clang_spirv_complex_builtins - SPIRV impls of runtime complex fns ---=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_SPIRV_COMPLEX_BUILTINS +#define __CLANG_SPIRV_COMPLEX_BUILTINS + +// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are +// libgcc functions that clang assumes are available when compiling c99 complex +// operations. (These implementations come from libc++, and have been modified +// to work with SPIRV and OpenMP target offloading [in C and C++ mode].) + +#pragma push_macro("__DEVICE__") +#if defined(__OPENMP_SPIRV__) +#include <__clang_spirv_libdevice_declares.h> +#pragma omp declare target +#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) +#else +#define __DEVICE__ __device__ inline +#endif + +// To make the algorithms available for C and C++ in SPIRV and OpenMP we select +// different but equivalent function versions. TODO: For OpenMP we currently +// select the native builtins as the overload support for templates is lacking. +#if !defined(__OPENMP_SPIRV__) +#define _ISNANd std::isnan +#define _ISNANf std::isnan +#define _ISINFd std::isinf +#define _ISINFf std::isinf +#define _ISFINITEd std::isfinite +#define _ISFINITEf std::isfinite +#define _COPYSIGNd std::copysign +#define _COPYSIGNf std::copysign +#define _SCALBNd std::scalbn +#define _SCALBNf std::scalbn +#define _ABSd std::abs +#define _ABSf std::abs +#define _LOGBd std::logb +#define _LOGBf std::logb +// Rather than pulling in std::max from algorithm everytime, use available ::max. +#define _fmaxd max +#define _fmaxf max +#else +#define _ISNANd __spirv_IsNan +#define _ISNANf __spirv_IsNan +#define _ISINFd __spirv_IsInf +#define _ISINFf __spirv_IsInf +#define _ISFINITEd __spirv_IsFinite +#define _ISFINITEf __spirv_IsFinite +#define _COPYSIGNd __spirv_ocl_copysign +#define _COPYSIGNf __spirv_ocl_copysign +#define _SCALBNd __spirv_ocl_ldexp +#define _SCALBNf __spirv_ocl_ldexp +#define _ABSd __spirv_ocl_fabs +#define _ABSf __spirv_ocl_fabs +#define _LOGBd __spirv_ocl_logb +#define _LOGBf __spirv_ocl_logb +#define _fmaxd __spirv_ocl_fmax +#define _fmaxf __spirv_ocl_fmax +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, + double __d) { + double __ac = __a * __c; + double __bd = __b * __d; + double __ad = __a * __d; + double __bc = __b * __c; + double _Complex z; + __real__(z) = __ac - __bd; + __imag__(z) = __ad + __bc; + if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { + int __recalc = 0; + if (_ISINFd(__a) || _ISINFd(__b)) { + __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a); + __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b); + if (_ISNANd(__c)) + __c = _COPYSIGNd(0, __c); + if (_ISNANd(__d)) + __d = _COPYSIGNd(0, __d); + __recalc = 1; + } + if (_ISINFd(__c) || _ISINFd(__d)) { + __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c); + __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d); + if (_ISNANd(__a)) + __a = _COPYSIGNd(0, __a); + if (_ISNANd(__b)) + __b = _COPYSIGNd(0, __b); + __recalc = 1; + } + if (!__recalc && + (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) { + if (_ISNANd(__a)) + __a = _COPYSIGNd(0, __a); + if (_ISNANd(__b)) + __b = _COPYSIGNd(0, __b); + if (_ISNANd(__c)) + __c = _COPYSIGNd(0, __c); + if (_ISNANd(__d)) + __d = _COPYSIGNd(0, __d); + __recalc = 1; + } + if (__recalc) { + // Can't use std::numeric_limits<double>::infinity() -- that doesn't have + // a device overload (and isn't constexpr before C++11, naturally). + __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); + __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); + } + } + return z; +} + +__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { + float __ac = __a * __c; + float __bd = __b * __d; + float __ad = __a * __d; + float __bc = __b * __c; + float _Complex z; + __real__(z) = __ac - __bd; + __imag__(z) = __ad + __bc; + if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { + int __recalc = 0; + if (_ISINFf(__a) || _ISINFf(__b)) { + __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); + __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); + if (_ISNANf(__c)) + __c = _COPYSIGNf(0, __c); + if (_ISNANf(__d)) + __d = _COPYSIGNf(0, __d); + __recalc = 1; + } + if (_ISINFf(__c) || _ISINFf(__d)) { + __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); + __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); + if (_ISNANf(__a)) + __a = _COPYSIGNf(0, __a); + if (_ISNANf(__b)) + __b = _COPYSIGNf(0, __b); + __recalc = 1; + } + if (!__recalc && + (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) { + if (_ISNANf(__a)) + __a = _COPYSIGNf(0, __a); + if (_ISNANf(__b)) + __b = _COPYSIGNf(0, __b); + if (_ISNANf(__c)) + __c = _COPYSIGNf(0, __c); + if (_ISNANf(__d)) + __d = _COPYSIGNf(0, __d); + __recalc = 1; + } + if (__recalc) { + __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); + __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); + } + } + return z; +} + +__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, + double __d) { + int __ilogbw = 0; + // Can't use std::max, because that's defined in <algorithm>, and we don't + // want to pull that in for every compile. The CUDA headers define + // ::max(float, float) and ::max(double, double), which is sufficient for us. + double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d))); + if (_ISFINITEd(__logbw)) { + __ilogbw = (int)__logbw; + __c = _SCALBNd(__c, -__ilogbw); + __d = _SCALBNd(__d, -__ilogbw); + } + double __denom = __c * __c + __d * __d; + double _Complex z; + __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw); + if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { + if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) { + __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a; + __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b; + } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) && + _ISFINITEd(__d)) { + __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a); + __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b); + __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); + __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); + } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) && + _ISFINITEd(__b)) { + __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c); + __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d); + __real__(z) = 0.0 * (__a * __c + __b * __d); + __imag__(z) = 0.0 * (__b * __c - __a * __d); + } + } + return z; +} + +__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { + int __ilogbw = 0; + float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d))); + if (_ISFINITEf(__logbw)) { + __ilogbw = (int)__logbw; + __c = _SCALBNf(__c, -__ilogbw); + __d = _SCALBNf(__d, -__ilogbw); + } + float __denom = __c * __c + __d * __d; + float _Complex z; + __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw); + if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { + if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) { + __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a; + __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b; + } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) && + _ISFINITEf(__d)) { + __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); + __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); + __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); + __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); + } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) && + _ISFINITEf(__b)) { + __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); + __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); + __real__(z) = 0 * (__a * __c + __b * __d); + __imag__(z) = 0 * (__b * __c - __a * __d); + } + } + return z; +} + +#if defined(__cplusplus) +} // extern "C" +#endif + +#undef _ISNANd +#undef _ISNANf +#undef _ISINFd +#undef _ISINFf +#undef _COPYSIGNd +#undef _COPYSIGNf +#undef _ISFINITEd +#undef _ISFINITEf +#undef _SCALBNd +#undef _SCALBNf +#undef _ABSd +#undef _ABSf +#undef _LOGBd +#undef _LOGBf +#undef _fmaxd +#undef _fmaxf + +#if defined(__OPENMP_SPIRV__) +#pragma omp end declare target +#endif + +#pragma pop_macro("__DEVICE__") + +#endif // __CLANG_SPIRV_COMPLEX_BUILTINS \ No newline at end of file diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex index 1ceecc1af8aec..b23c4b6d7d2be 100644 --- a/clang/lib/Headers/openmp_wrappers/complex +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -29,6 +29,12 @@ #undef __OPENMP_AMDGCN__ #endif // __AMDGCN__ +#ifdef __SPIRV__ +#define __OPENMP_SPIRV__ +#include <__clang_spirv_complex_builtins.h> +#undef __OPENMP_SPIRV__ +#endif // __SPIRV__ + #endif // Grab the host header too. @@ -45,7 +51,7 @@ #ifndef _LIBCPP_STD_VER #pragma omp begin declare variant match( \ - device = {arch(amdgcn, nvptx, nvptx64)}, \ + device = {arch(amdgcn, nvptx, nvptx64, spirv64)}, \ implementation = {extension(match_any, allow_templates)}) #include <complex_cmath.h> diff --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h index 7e7c0866426bc..81501428b0f90 100644 --- a/clang/lib/Headers/openmp_wrappers/complex.h +++ b/clang/lib/Headers/openmp_wrappers/complex.h @@ -29,6 +29,12 @@ #undef __OPENMP_AMDGCN__ #endif +#ifdef __SPIRV__ +#define __OPENMP_SPIRV__ +#include <__clang_spirv_complex_builtins.h> +#undef __OPENMP_SPIRV__ +#endif // __SPIRV__ + #endif // Grab the host header too. >From 1c95f420df1e797085ba9b7b611d7eed3a2dde46 Mon Sep 17 00:00:00 2001 From: "Fine, Gregory" <[email protected]> Date: Wed, 4 Feb 2026 18:40:22 -0800 Subject: [PATCH 2/3] Fix formatting issues --- clang/lib/Headers/__clang_spirv_complex_builtins.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/Headers/__clang_spirv_complex_builtins.h b/clang/lib/Headers/__clang_spirv_complex_builtins.h index fd7a492b5744d..43ad92fd5972b 100644 --- a/clang/lib/Headers/__clang_spirv_complex_builtins.h +++ b/clang/lib/Headers/__clang_spirv_complex_builtins.h @@ -1,4 +1,4 @@ -/*===-- __clang_spirv_complex_builtins - SPIRV impls of runtime complex fns ---=== +/*==-- __clang_spirv_complex_builtins - SPIRV impls of runtime complex fns --== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -17,7 +17,6 @@ #pragma push_macro("__DEVICE__") #if defined(__OPENMP_SPIRV__) -#include <__clang_spirv_libdevice_declares.h> #pragma omp declare target #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) #else @@ -42,7 +41,6 @@ #define _ABSf std::abs #define _LOGBd std::logb #define _LOGBf std::logb -// Rather than pulling in std::max from algorithm everytime, use available ::max. #define _fmaxd max #define _fmaxf max #else >From d7025af1e1f91b4a80a11be2881e2dbd44a003ea Mon Sep 17 00:00:00 2001 From: "Fine, Gregory" <[email protected]> Date: Mon, 16 Feb 2026 12:11:51 -0800 Subject: [PATCH 3/3] Refactor implementation to reuse existing implementation --- clang/lib/Headers/CMakeLists.txt | 1 - .../Headers/__clang_cuda_complex_builtins.h | 11 +- .../Headers/__clang_spirv_complex_builtins.h | 265 ------------------ clang/lib/Headers/openmp_wrappers/complex | 2 +- clang/lib/Headers/openmp_wrappers/complex.h | 2 +- 5 files changed, 10 insertions(+), 271 deletions(-) delete mode 100644 clang/lib/Headers/__clang_spirv_complex_builtins.h diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 9e1c3988b4b83..c92b370b88d2d 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -141,7 +141,6 @@ set(riscv_files set(spirv_files __clang_spirv_builtins.h - __clang_spirv_complex_builtins.h ) set(systemz_files diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h index e3038dcb8fd36..9f0a08d7f9698 100644 --- a/clang/lib/Headers/__clang_cuda_complex_builtins.h +++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h @@ -16,19 +16,24 @@ // to work with CUDA and OpenMP target offloading [in C and C++ mode].) #pragma push_macro("__DEVICE__") -#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) +#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) || defined(__OPENMP_SPIRV__) #pragma omp declare target #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) #else #define __DEVICE__ __device__ inline #endif -#ifdef __NVPTX__ +#if defined(__NVPTX__) // FIXME: NVPTX should use generic builtins. #define _SCALBNd __nv_scalbn #define _SCALBNf __nv_scalbnf #define _LOGBd __nv_logb #define _LOGBf __nv_logbf +#elif defined(__OPENMP_SPIRV__) +#define _SCALBNd __spirv_ocl_ldexp +#define _SCALBNf __spirv_ocl_ldexp +#define _LOGBd __spirv_ocl_logb +#define _LOGBf __spirv_ocl_logb #else #define _SCALBNd __builtin_scalbn #define _SCALBNf __builtin_scalbnf @@ -220,7 +225,7 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { #undef _LOGBd #undef _LOGBf -#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) +#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) || defined(__OPENMP_SPIRV__) #pragma omp end declare target #endif diff --git a/clang/lib/Headers/__clang_spirv_complex_builtins.h b/clang/lib/Headers/__clang_spirv_complex_builtins.h deleted file mode 100644 index 43ad92fd5972b..0000000000000 --- a/clang/lib/Headers/__clang_spirv_complex_builtins.h +++ /dev/null @@ -1,265 +0,0 @@ -/*==-- __clang_spirv_complex_builtins - SPIRV impls of runtime complex fns --== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ - -#ifndef __CLANG_SPIRV_COMPLEX_BUILTINS -#define __CLANG_SPIRV_COMPLEX_BUILTINS - -// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are -// libgcc functions that clang assumes are available when compiling c99 complex -// operations. (These implementations come from libc++, and have been modified -// to work with SPIRV and OpenMP target offloading [in C and C++ mode].) - -#pragma push_macro("__DEVICE__") -#if defined(__OPENMP_SPIRV__) -#pragma omp declare target -#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) -#else -#define __DEVICE__ __device__ inline -#endif - -// To make the algorithms available for C and C++ in SPIRV and OpenMP we select -// different but equivalent function versions. TODO: For OpenMP we currently -// select the native builtins as the overload support for templates is lacking. -#if !defined(__OPENMP_SPIRV__) -#define _ISNANd std::isnan -#define _ISNANf std::isnan -#define _ISINFd std::isinf -#define _ISINFf std::isinf -#define _ISFINITEd std::isfinite -#define _ISFINITEf std::isfinite -#define _COPYSIGNd std::copysign -#define _COPYSIGNf std::copysign -#define _SCALBNd std::scalbn -#define _SCALBNf std::scalbn -#define _ABSd std::abs -#define _ABSf std::abs -#define _LOGBd std::logb -#define _LOGBf std::logb -#define _fmaxd max -#define _fmaxf max -#else -#define _ISNANd __spirv_IsNan -#define _ISNANf __spirv_IsNan -#define _ISINFd __spirv_IsInf -#define _ISINFf __spirv_IsInf -#define _ISFINITEd __spirv_IsFinite -#define _ISFINITEf __spirv_IsFinite -#define _COPYSIGNd __spirv_ocl_copysign -#define _COPYSIGNf __spirv_ocl_copysign -#define _SCALBNd __spirv_ocl_ldexp -#define _SCALBNf __spirv_ocl_ldexp -#define _ABSd __spirv_ocl_fabs -#define _ABSf __spirv_ocl_fabs -#define _LOGBd __spirv_ocl_logb -#define _LOGBf __spirv_ocl_logb -#define _fmaxd __spirv_ocl_fmax -#define _fmaxf __spirv_ocl_fmax -#endif - -#if defined(__cplusplus) -extern "C" { -#endif - -__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, - double __d) { - double __ac = __a * __c; - double __bd = __b * __d; - double __ad = __a * __d; - double __bc = __b * __c; - double _Complex z; - __real__(z) = __ac - __bd; - __imag__(z) = __ad + __bc; - if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { - int __recalc = 0; - if (_ISINFd(__a) || _ISINFd(__b)) { - __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a); - __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b); - if (_ISNANd(__c)) - __c = _COPYSIGNd(0, __c); - if (_ISNANd(__d)) - __d = _COPYSIGNd(0, __d); - __recalc = 1; - } - if (_ISINFd(__c) || _ISINFd(__d)) { - __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c); - __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d); - if (_ISNANd(__a)) - __a = _COPYSIGNd(0, __a); - if (_ISNANd(__b)) - __b = _COPYSIGNd(0, __b); - __recalc = 1; - } - if (!__recalc && - (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) { - if (_ISNANd(__a)) - __a = _COPYSIGNd(0, __a); - if (_ISNANd(__b)) - __b = _COPYSIGNd(0, __b); - if (_ISNANd(__c)) - __c = _COPYSIGNd(0, __c); - if (_ISNANd(__d)) - __d = _COPYSIGNd(0, __d); - __recalc = 1; - } - if (__recalc) { - // Can't use std::numeric_limits<double>::infinity() -- that doesn't have - // a device overload (and isn't constexpr before C++11, naturally). - __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); - __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); - } - } - return z; -} - -__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { - float __ac = __a * __c; - float __bd = __b * __d; - float __ad = __a * __d; - float __bc = __b * __c; - float _Complex z; - __real__(z) = __ac - __bd; - __imag__(z) = __ad + __bc; - if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { - int __recalc = 0; - if (_ISINFf(__a) || _ISINFf(__b)) { - __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); - __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); - if (_ISNANf(__c)) - __c = _COPYSIGNf(0, __c); - if (_ISNANf(__d)) - __d = _COPYSIGNf(0, __d); - __recalc = 1; - } - if (_ISINFf(__c) || _ISINFf(__d)) { - __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); - __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); - if (_ISNANf(__a)) - __a = _COPYSIGNf(0, __a); - if (_ISNANf(__b)) - __b = _COPYSIGNf(0, __b); - __recalc = 1; - } - if (!__recalc && - (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) { - if (_ISNANf(__a)) - __a = _COPYSIGNf(0, __a); - if (_ISNANf(__b)) - __b = _COPYSIGNf(0, __b); - if (_ISNANf(__c)) - __c = _COPYSIGNf(0, __c); - if (_ISNANf(__d)) - __d = _COPYSIGNf(0, __d); - __recalc = 1; - } - if (__recalc) { - __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); - __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); - } - } - return z; -} - -__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, - double __d) { - int __ilogbw = 0; - // Can't use std::max, because that's defined in <algorithm>, and we don't - // want to pull that in for every compile. The CUDA headers define - // ::max(float, float) and ::max(double, double), which is sufficient for us. - double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d))); - if (_ISFINITEd(__logbw)) { - __ilogbw = (int)__logbw; - __c = _SCALBNd(__c, -__ilogbw); - __d = _SCALBNd(__d, -__ilogbw); - } - double __denom = __c * __c + __d * __d; - double _Complex z; - __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw); - __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw); - if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { - if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) { - __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a; - __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b; - } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) && - _ISFINITEd(__d)) { - __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a); - __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b); - __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); - __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); - } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) && - _ISFINITEd(__b)) { - __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c); - __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d); - __real__(z) = 0.0 * (__a * __c + __b * __d); - __imag__(z) = 0.0 * (__b * __c - __a * __d); - } - } - return z; -} - -__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { - int __ilogbw = 0; - float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d))); - if (_ISFINITEf(__logbw)) { - __ilogbw = (int)__logbw; - __c = _SCALBNf(__c, -__ilogbw); - __d = _SCALBNf(__d, -__ilogbw); - } - float __denom = __c * __c + __d * __d; - float _Complex z; - __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw); - __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw); - if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { - if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) { - __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a; - __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b; - } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) && - _ISFINITEf(__d)) { - __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); - __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); - __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); - __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); - } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) && - _ISFINITEf(__b)) { - __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); - __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); - __real__(z) = 0 * (__a * __c + __b * __d); - __imag__(z) = 0 * (__b * __c - __a * __d); - } - } - return z; -} - -#if defined(__cplusplus) -} // extern "C" -#endif - -#undef _ISNANd -#undef _ISNANf -#undef _ISINFd -#undef _ISINFf -#undef _COPYSIGNd -#undef _COPYSIGNf -#undef _ISFINITEd -#undef _ISFINITEf -#undef _SCALBNd -#undef _SCALBNf -#undef _ABSd -#undef _ABSf -#undef _LOGBd -#undef _LOGBf -#undef _fmaxd -#undef _fmaxf - -#if defined(__OPENMP_SPIRV__) -#pragma omp end declare target -#endif - -#pragma pop_macro("__DEVICE__") - -#endif // __CLANG_SPIRV_COMPLEX_BUILTINS \ No newline at end of file diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex index b23c4b6d7d2be..afa8e3eb3d835 100644 --- a/clang/lib/Headers/openmp_wrappers/complex +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -31,7 +31,7 @@ #ifdef __SPIRV__ #define __OPENMP_SPIRV__ -#include <__clang_spirv_complex_builtins.h> +#include <__clang_cuda_complex_builtins.h> #undef __OPENMP_SPIRV__ #endif // __SPIRV__ diff --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h index 81501428b0f90..92839088b47d5 100644 --- a/clang/lib/Headers/openmp_wrappers/complex.h +++ b/clang/lib/Headers/openmp_wrappers/complex.h @@ -31,7 +31,7 @@ #ifdef __SPIRV__ #define __OPENMP_SPIRV__ -#include <__clang_spirv_complex_builtins.h> +#include <__clang_cuda_complex_builtins.h> #undef __OPENMP_SPIRV__ #endif // __SPIRV__ _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
