[PATCH] D83591: [OpenMP][CUDA] Fix std::complex in GPU regions

Johannes Doerfert via Phabricator via cfe-commits Fri, 10 Jul 2020 15:06:33 -0700

jdoerfert created this revision.
jdoerfert added reviewers: tra, hfinkel, JonChesterfield.
Herald added subscribers: sstefan1, guansong, bollu, yaxunl.
Herald added a project: clang.


The old way worked to some degree for C++-mode but in C mode we actually
tried to introduce variants of macros (e.g., isinf). To make both modes
work reliably we get rid of those extra variants and directly use NVIDIA
intrinsics in the complex implementation. While this has to be revisited
as we add other GPU targets which want to reuse the code, it should be
fine for now.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83591

Files:
  clang/lib/Headers/__clang_cuda_complex_builtins.h
  clang/lib/Headers/__clang_cuda_math.h
  clang/test/Headers/nvptx_device_math_complex.c
  clang/test/Headers/nvptx_device_math_complex.cpp

Index: clang/test/Headers/nvptx_device_math_complex.cpp
===================================================================
--- clang/test/Headers/nvptx_device_math_complex.cpp
+++ clang/test/Headers/nvptx_device_math_complex.cpp
@@ -5,12 +5,34 @@
 
 #include <complex>
 
-// CHECK-DAG: define weak {{.*}} @__mulsc3
-// CHECK-DAG: define weak {{.*}} @__muldc3
-// CHECK-DAG: define weak {{.*}} @__divsc3
-// CHECK-DAG: define weak {{.*}} @__divdc3
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call double @__nv_copysign(
 
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call float @__nv_copysignf(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call i32 @__nv_isfinited(
+// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call double @__nv_scalbn(
+// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @__nv_logb(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call i32 @__nv_finitef(
+// CHECK-DAG: call float @__nv_copysignf(
 // CHECK-DAG: call float @__nv_scalbnf(
+// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @__nv_logbf(
+
 void test_scmplx(std::complex<float> a) {
 #pragma omp target
   {
@@ -18,7 +40,6 @@
   }
 }
 
-// CHECK-DAG: call double @__nv_scalbn(
 void test_dcmplx(std::complex<double> a) {
 #pragma omp target
   {
Index: clang/test/Headers/nvptx_device_math_complex.c
===================================================================
--- clang/test/Headers/nvptx_device_math_complex.c
+++ clang/test/Headers/nvptx_device_math_complex.c
@@ -11,12 +11,34 @@
 #include <complex.h>
 #endif
 
-// CHECK-DAG: define weak {{.*}} @__mulsc3
-// CHECK-DAG: define weak {{.*}} @__muldc3
-// CHECK-DAG: define weak {{.*}} @__divsc3
-// CHECK-DAG: define weak {{.*}} @__divdc3
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call double @__nv_copysign(
 
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call float @__nv_copysignf(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call i32 @__nv_isfinited(
+// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call double @__nv_scalbn(
+// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @__nv_logb(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call i32 @__nv_finitef(
+// CHECK-DAG: call float @__nv_copysignf(
 // CHECK-DAG: call float @__nv_scalbnf(
+// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @__nv_logbf(
+
 void test_scmplx(float _Complex a) {
 #pragma omp target
   {
@@ -24,7 +46,6 @@
   }
 }
 
-// CHECK-DAG: call double @__nv_scalbn(
 void test_dcmplx(double _Complex a) {
 #pragma omp target
   {
Index: clang/lib/Headers/__clang_cuda_math.h
===================================================================
--- clang/lib/Headers/__clang_cuda_math.h
+++ clang/lib/Headers/__clang_cuda_math.h
@@ -340,16 +340,6 @@
 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 
-// In C++ mode OpenMP takes the system versions of these because some math
-// headers provide the wrong return type. This cannot happen in C and we can and
-// want to use the specialized versions right away.
-#if defined(_OPENMP) && !defined(__cplusplus)
-__DEVICE__ int isinff(float __x) { return __nv_isinff(__x); }
-__DEVICE__ int isinf(double __x) { return __nv_isinfd(__x); }
-__DEVICE__ int isnanf(float __x) { return __nv_isnanf(__x); }
-__DEVICE__ int isnan(double __x) { return __nv_isnand(__x); }
-#endif
-
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__DEVICE_VOID__")
 #pragma pop_macro("__FAST_OR_SLOW")
Index: clang/lib/Headers/__clang_cuda_complex_builtins.h
===================================================================
--- clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -23,44 +23,22 @@
 #define __DEVICE__ __device__ inline
 #endif
 
-// Make the algorithms available for C and C++ by selecting the right functions.
-#if defined(__cplusplus)
-// TODO: In OpenMP mode we cannot overload isinf/isnan/isfinite the way we
-// overload all other math functions because old math system headers and not
-// always conformant and return an integer instead of a boolean. Until that has
-// been addressed we need to work around it. For now, we substituate with the
-// calls we would have used to implement those three functions. Note that we
-// could use the C alternatives as well.
-#define _ISNANd ::__isnan
-#define _ISNANf ::__isnanf
-#define _ISINFd ::__isinf
-#define _ISINFf ::__isinff
-#define _ISFINITEd ::__isfinited
-#define _ISFINITEf ::__finitef
-#define _COPYSIGNd std::copysign
-#define _COPYSIGNf std::copysign
-#define _SCALBNd std::scalbn
-#define _SCALBNf std::scalbn
-#define _ABSd std::abs
-#define _ABSf std::abs
-#define _LOGBd std::logb
-#define _LOGBf std::logb
-#else
-#define _ISNANd isnan
-#define _ISNANf isnanf
-#define _ISINFd isinf
-#define _ISINFf isinff
-#define _ISFINITEd isfinite
-#define _ISFINITEf isfinitef
-#define _COPYSIGNd copysign
-#define _COPYSIGNf copysignf
-#define _SCALBNd scalbn
-#define _SCALBNf scalbnf
-#define _ABSd abs
-#define _ABSf absf
-#define _LOGBd logb
-#define _LOGBf logbf
-#endif
+// Make the algorithms available for C and C++ in CUDA and OpenMP mode by
+// directly selecting the native builtin or device function.
+#define _ISNANd __nv_isnand
+#define _ISNANf __nv_isnanf
+#define _ISINFd __nv_isinfd
+#define _ISINFf __nv_isinff
+#define _ISFINITEd __nv_isfinited
+#define _ISFINITEf __nv_finitef
+#define _COPYSIGNd __nv_copysign
+#define _COPYSIGNf __nv_copysignf
+#define _SCALBNd __nv_scalbn
+#define _SCALBNf __nv_scalbnf
+#define _ABSd __nv_fabs
+#define _ABSf __nv_fabsf
+#define _LOGBd __nv_logb
+#define _LOGBf __nv_logbf
 
 #if defined(__cplusplus)
 extern "C" {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D83591: [OpenMP][CUDA] Fix std::complex in GPU regions

Reply via email to