ABataev created this revision.
ABataev added reviewers: tra, hfinkel.
Herald added a subscriber: guansong.
Herald added a project: clang.

Inherited support for complex math functions from CUDA implementation.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D75209

Files:
  clang/lib/Headers/__clang_cuda_complex_builtins.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
  clang/test/Headers/nvptx_device_cmath_functions.c
  clang/test/Headers/nvptx_device_cmath_functions.cpp
  clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp

Index: clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp
===================================================================
--- clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp
+++ clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp
@@ -24,3 +24,15 @@
     double l5 = abs((int)a1);
   }
 }
+
+// CHECK-YES-DAG: call { double, double } @__muldc3(
+// CHECK-YES-DAG: call { double, double } @__divdc3(
+void test_cmplx(double _Complex a) {
+#pragma omp target
+  {
+    (void)(a * (a / a));
+  }
+}
+// CHECK-YES-DAG: define {{.*}}hidden { double, double } @__muldc3(
+// CHECK-YES-DAG: define {{.*}}hidden { double, double } @__divdc3(
+
Index: clang/test/Headers/nvptx_device_cmath_functions.cpp
===================================================================
--- clang/test/Headers/nvptx_device_cmath_functions.cpp
+++ clang/test/Headers/nvptx_device_cmath_functions.cpp
@@ -24,3 +24,15 @@
     double l5 = abs((int)a1);
   }
 }
+
+// CHECK-YES-DAG: call { float, float } @__mulsc3(
+// CHECK-YES-DAG: call { float, float } @__divsc3(
+void test_cmplx(float _Complex a) {
+#pragma omp target
+  {
+    (void)(a * (a / a));
+  }
+}
+// CHECK-YES-DAG: define {{.*}}hidden { float, float } @__mulsc3(
+// CHECK-YES-DAG: define {{.*}}hidden { float, float } @__divsc3(
+
Index: clang/test/Headers/nvptx_device_cmath_functions.c
===================================================================
--- clang/test/Headers/nvptx_device_cmath_functions.c
+++ clang/test/Headers/nvptx_device_cmath_functions.c
@@ -8,6 +8,11 @@
 
 #include <cmath>
 
+// CHECK-YES-DAG: define {{.*}}hidden { double, double } @__muldc3(
+// CHECK-YES-DAG: define {{.*}}hidden { float, float } @__mulsc3(
+// CHECK-YES-DAG: define {{.*}}hidden { double, double } @__divdc3(
+// CHECK-YES-DAG: define {{.*}}hidden { float, float } @__divsc3(
+
 void test_sqrt(double a1) {
   #pragma omp target
   {
@@ -23,3 +28,11 @@
     double l5 = abs((int)a1);
   }
 }
+
+// CHECK-YES-NOT: @{{__mulsc3|__divsc3}}(
+void test_cmplx(float _Complex a) {
+#pragma omp target
+  {
+    (void)(a * (a / a));
+  }
+}
Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
+++ clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
@@ -26,6 +26,8 @@
 #include <__clang_cuda_libdevice_declares.h>
 /// Provide definitions for these functions.
 #include <__clang_cuda_device_functions.h>
+/// Provide definitions for complex math functions.
+#include <__clang_cuda_complex_builtins.h>
 
 #undef __CUDA__
 
Index: clang/lib/Headers/__clang_cuda_complex_builtins.h
===================================================================
--- clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -15,8 +15,61 @@
 // operations.  (These implementations come from libc++, and have been modified
 // to work with CUDA.)
 
-extern "C" inline __device__ double _Complex __muldc3(double __a, double __b,
-                                                      double __c, double __d) {
+#ifdef _OPENMP
+#ifdef __cplusplus
+#define __DEVICE__ extern "C" inline
+#else
+#define __DEVICE__ __attribute__((always_inline))
+#endif // __cplusplus
+#else
+#define __DEVICE__ extern "C" inline __device__
+#endif // _OPENMP
+
+#ifdef _OPENMP
+#define MAX(x, y) fmax((x), (y))
+#define MAXF(x, y) fmaxf((x), (y))
+#define IS_NAN(x) __isnan(x)
+#define IS_NANF(x) __isnanf(x)
+#define IS_INF(x) __isinf(x)
+#define IS_INFF(x) __isinff(x)
+#define IS_FINITE(x) __finite(x)
+#define IS_FINITEF(x) __finitef(x)
+#define COPYSIGN(x, y) copysign((x), (y))
+#define COPYSIGNF(x, y) copysignf((x), (y))
+#define LOGB(x) logb(x)
+#define LOGBF(x) logbf(x)
+#define ABS(x) fabs(x)
+#define ABSF(x) fabsf(x)
+#define SCALBN(x, y) scalbn((x), (y))
+#define SCALBNF(x, y) scalbnf((x), (y))
+#else
+// Can't use std::max, because that's defined in <algorithm>, and we don't
+// want to pull that in for every compile.  The CUDA headers define
+// ::max(float, float) and ::max(double, double), which is sufficient for us.
+#define MAX(x, y) max((x), (y))
+#define MAXF(x, y) max((x), (y))
+#define IS_NAN(x) std::isnan(x)
+#define IS_NANF(x) std::isnan(x)
+#define IS_INF(x) std::isinf(x)
+#define IS_INFF(x) std::isinf(x)
+#define IS_FINITE(x) std::isfinite(x)
+#define IS_FINITEF(x) std::isfinite(x)
+#define COPYSIGN(x, y) std::copysign((x), (y))
+#define COPYSIGNF(x, y) std::copysign((x), (y))
+#define LOGB(x) std::logb(x)
+#define LOGBF(x) std::logb(x)
+#define ABS(x) std::abs(x)
+#define ABSF(x) std::abs(x)
+#define SCALBN(x, y) std::scalbn((x), (y))
+#define SCALBNF(x, y) std::scalbn((x), (y))
+#endif // _OPENMP
+
+#ifdef _OPENMP
+#pragma omp declare target
+#endif // _OPENMP
+
+__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
+                                    double __d) {
   double __ac = __a * __c;
   double __bd = __b * __d;
   double __ad = __a * __d;
@@ -24,50 +77,49 @@
   double _Complex z;
   __real__(z) = __ac - __bd;
   __imag__(z) = __ad + __bc;
-  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
+  if (IS_NAN(__real__(z)) && IS_NAN(__imag__(z))) {
     int __recalc = 0;
-    if (std::isinf(__a) || std::isinf(__b)) {
-      __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
-      __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
-      if (std::isnan(__c))
-        __c = std::copysign(0, __c);
-      if (std::isnan(__d))
-        __d = std::copysign(0, __d);
+    if (IS_INF(__a) || IS_INF(__b)) {
+      __a = COPYSIGN(IS_INF(__a) ? 1 : 0, __a);
+      __b = COPYSIGN(IS_INF(__b) ? 1 : 0, __b);
+      if (IS_NAN(__c))
+        __c = COPYSIGN(0, __c);
+      if (IS_NAN(__d))
+        __d = COPYSIGN(0, __d);
       __recalc = 1;
     }
-    if (std::isinf(__c) || std::isinf(__d)) {
-      __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
-      __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
-      if (std::isnan(__a))
-        __a = std::copysign(0, __a);
-      if (std::isnan(__b))
-        __b = std::copysign(0, __b);
+    if (IS_INF(__c) || IS_INF(__d)) {
+      __c = COPYSIGN(IS_INF(__c) ? 1 : 0, __c);
+      __d = COPYSIGN(IS_INF(__d) ? 1 : 0, __d);
+      if (IS_NAN(__a))
+        __a = COPYSIGN(0, __a);
+      if (IS_NAN(__b))
+        __b = COPYSIGN(0, __b);
       __recalc = 1;
     }
-    if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
-                      std::isinf(__ad) || std::isinf(__bc))) {
-      if (std::isnan(__a))
-        __a = std::copysign(0, __a);
-      if (std::isnan(__b))
-        __b = std::copysign(0, __b);
-      if (std::isnan(__c))
-        __c = std::copysign(0, __c);
-      if (std::isnan(__d))
-        __d = std::copysign(0, __d);
+    if (!__recalc && (IS_INF(__ac) || IS_INF(__bd) ||
+                      IS_INF(__ad) || IS_INF(__bc))) {
+      if (IS_NAN(__a))
+        __a = COPYSIGN(0, __a);
+      if (IS_NAN(__b))
+        __b = COPYSIGN(0, __b);
+      if (IS_NAN(__c))
+        __c = COPYSIGN(0, __c);
+      if (IS_NAN(__d))
+        __d = COPYSIGN(0, __d);
       __recalc = 1;
     }
     if (__recalc) {
       // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
       // a device overload (and isn't constexpr before C++11, naturally).
-      __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
-      __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
+      __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
+      __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
     }
   }
   return z;
 }
 
-extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
-                                                     float __c, float __d) {
+__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
   float __ac = __a * __c;
   float __bd = __b * __d;
   float __ad = __a * __d;
@@ -75,36 +127,36 @@
   float _Complex z;
   __real__(z) = __ac - __bd;
   __imag__(z) = __ad + __bc;
-  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
+  if (IS_NANF(__real__(z)) && IS_NANF(__imag__(z))) {
     int __recalc = 0;
-    if (std::isinf(__a) || std::isinf(__b)) {
-      __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
-      __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
-      if (std::isnan(__c))
-        __c = std::copysign(0, __c);
-      if (std::isnan(__d))
-        __d = std::copysign(0, __d);
+    if (IS_INFF(__a) || IS_INFF(__b)) {
+      __a = COPYSIGNF(IS_INFF(__a) ? 1 : 0, __a);
+      __b = COPYSIGNF(IS_INFF(__b) ? 1 : 0, __b);
+      if (IS_NANF(__c))
+        __c = COPYSIGNF(0, __c);
+      if (IS_NANF(__d))
+        __d = COPYSIGNF(0, __d);
       __recalc = 1;
     }
-    if (std::isinf(__c) || std::isinf(__d)) {
-      __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
-      __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
-      if (std::isnan(__a))
-        __a = std::copysign(0, __a);
-      if (std::isnan(__b))
-        __b = std::copysign(0, __b);
+    if (IS_INFF(__c) || IS_INFF(__d)) {
+      __c = COPYSIGNF(IS_INFF(__c) ? 1 : 0, __c);
+      __d = COPYSIGNF(IS_INFF(__d) ? 1 : 0, __d);
+      if (IS_NANF(__a))
+        __a = COPYSIGNF(0, __a);
+      if (IS_NANF(__b))
+        __b = COPYSIGNF(0, __b);
       __recalc = 1;
     }
-    if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
-                      std::isinf(__ad) || std::isinf(__bc))) {
-      if (std::isnan(__a))
-        __a = std::copysign(0, __a);
-      if (std::isnan(__b))
-        __b = std::copysign(0, __b);
-      if (std::isnan(__c))
-        __c = std::copysign(0, __c);
-      if (std::isnan(__d))
-        __d = std::copysign(0, __d);
+    if (!__recalc && (IS_INFF(__ac) || IS_INFF(__bd) ||
+                      IS_INFF(__ad) || IS_INFF(__bc))) {
+      if (IS_NANF(__a))
+        __a = COPYSIGNF(0, __a);
+      if (IS_NANF(__b))
+        __b = COPYSIGNF(0, __b);
+      if (IS_NANF(__c))
+        __c = COPYSIGNF(0, __c);
+      if (IS_NANF(__d))
+        __d = COPYSIGNF(0, __d);
       __recalc = 1;
     }
     if (__recalc) {
@@ -115,36 +167,33 @@
   return z;
 }
 
-extern "C" inline __device__ double _Complex __divdc3(double __a, double __b,
-                                                      double __c, double __d) {
+__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
+                                    double __d) {
   int __ilogbw = 0;
-  // Can't use std::max, because that's defined in <algorithm>, and we don't
-  // want to pull that in for every compile.  The CUDA headers define
-  // ::max(float, float) and ::max(double, double), which is sufficient for us.
-  double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
-  if (std::isfinite(__logbw)) {
+  double __logbw = LOGB(MAX(ABS(__c), ABS(__d)));
+  if (IS_FINITE(__logbw)) {
     __ilogbw = (int)__logbw;
-    __c = std::scalbn(__c, -__ilogbw);
-    __d = std::scalbn(__d, -__ilogbw);
+    __c = SCALBN(__c, -__ilogbw);
+    __d = SCALBN(__d, -__ilogbw);
   }
   double __denom = __c * __c + __d * __d;
   double _Complex z;
-  __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
-  __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
-  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
-    if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {
-      __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
-      __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
-    } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
-               std::isfinite(__d)) {
-      __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);
-      __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);
-      __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
-      __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
-    } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&
-               std::isfinite(__b)) {
-      __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);
-      __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);
+  __real__(z) = SCALBN((__a * __c + __b * __d) / __denom, -__ilogbw);
+  __imag__(z) = SCALBN((__b * __c - __a * __d) / __denom, -__ilogbw);
+  if (IS_NAN(__real__(z)) && IS_NAN(__imag__(z))) {
+    if ((__denom == 0.0) && (!IS_NAN(__a) || !IS_NAN(__b))) {
+      __real__(z) = COPYSIGN(__builtin_huge_val(), __c) * __a;
+      __imag__(z) = COPYSIGN(__builtin_huge_val(), __c) * __b;
+    } else if ((IS_INF(__a) || IS_INF(__b)) && IS_FINITE(__c) &&
+               IS_FINITE(__d)) {
+      __a = COPYSIGN(IS_INF(__a) ? 1.0 : 0.0, __a);
+      __b = COPYSIGN(IS_INF(__b) ? 1.0 : 0.0, __b);
+      __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
+      __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
+    } else if (IS_INF(__logbw) && __logbw > 0.0 && IS_FINITE(__a) &&
+               IS_FINITE(__b)) {
+      __c = COPYSIGN(IS_INF(__c) ? 1.0 : 0.0, __c);
+      __d = COPYSIGN(IS_INF(__d) ? 1.0 : 0.0, __d);
       __real__(z) = 0.0 * (__a * __c + __b * __d);
       __imag__(z) = 0.0 * (__b * __c - __a * __d);
     }
@@ -152,33 +201,32 @@
   return z;
 }
 
-extern "C" inline __device__ float _Complex __divsc3(float __a, float __b,
-                                                     float __c, float __d) {
+__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
   int __ilogbw = 0;
-  float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
-  if (std::isfinite(__logbw)) {
+  float __logbw = LOGBF(MAXF(ABSF(__c), ABSF(__d)));
+  if (IS_FINITEF(__logbw)) {
     __ilogbw = (int)__logbw;
-    __c = std::scalbn(__c, -__ilogbw);
-    __d = std::scalbn(__d, -__ilogbw);
+    __c = SCALBNF(__c, -__ilogbw);
+    __d = SCALBNF(__d, -__ilogbw);
   }
   float __denom = __c * __c + __d * __d;
   float _Complex z;
-  __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
-  __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
-  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
-    if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {
-      __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
-      __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
-    } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
-               std::isfinite(__d)) {
-      __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
-      __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
+  __real__(z) = SCALBNF((__a * __c + __b * __d) / __denom, -__ilogbw);
+  __imag__(z) = SCALBNF((__b * __c - __a * __d) / __denom, -__ilogbw);
+  if (IS_NANF(__real__(z)) && IS_NANF(__imag__(z))) {
+    if ((__denom == 0) && (!IS_NANF(__a) || !IS_NANF(__b))) {
+      __real__(z) = COPYSIGNF(__builtin_huge_valf(), __c) * __a;
+      __imag__(z) = COPYSIGNF(__builtin_huge_valf(), __c) * __b;
+    } else if ((IS_INFF(__a) || IS_INFF(__b)) && IS_FINITEF(__c) &&
+               IS_FINITEF(__d)) {
+      __a = COPYSIGNF(IS_INFF(__a) ? 1 : 0, __a);
+      __b = COPYSIGNF(IS_INFF(__b) ? 1 : 0, __b);
       __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
       __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
-    } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&
-               std::isfinite(__b)) {
-      __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
-      __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
+    } else if (IS_INFF(__logbw) && __logbw > 0 && IS_FINITEF(__a) &&
+               IS_FINITEF(__b)) {
+      __c = COPYSIGNF(IS_INFF(__c) ? 1 : 0, __c);
+      __d = COPYSIGNF(IS_INFF(__d) ? 1 : 0, __d);
       __real__(z) = 0 * (__a * __c + __b * __d);
       __imag__(z) = 0 * (__b * __c - __a * __d);
     }
@@ -186,4 +234,23 @@
   return z;
 }
 
+#ifdef _OPENMP
+#pragma omp end declare target
+#endif // _OPENMP
+
+#undef IS_NAN(x)
+#undef IS_NANF(x)
+#undef IS_INF(x)
+#undef IS_INFF(x)
+#undef IS_FINITE(x)
+#undef IS_FINITEF(x)
+#undef COPYSIGN(x, y)
+#undef COPYSIGNF(x, y)
+#undef LOGB(x)
+#undef LOGBF(x)
+#undef ABS(x)
+#undef ABSF(x)
+#undef SCALBN(x, y)
+#undef SCALBNF(x, y)
+
 #endif // __CLANG_CUDA_COMPLEX_BUILTINS
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to