[PATCH] D85879: [OpenMP] Overload `std::isnan` and friends multiple times for the GPU

Johannes Doerfert via Phabricator via cfe-commits Wed, 16 Sep 2020 11:41:22 -0700

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG97652202d1e6: [OpenMP] Overload `std::isnan` and friends 
multiple times for the GPU (authored by jdoerfert).


Changed prior to commit:
  https://reviews.llvm.org/D85879?vs=285385&id=292292#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85879/new/

https://reviews.llvm.org/D85879

Files:
  clang/lib/Headers/__clang_cuda_cmath.h
  clang/test/Headers/Inputs/include/cmath
  clang/test/Headers/openmp_device_math_isnan.cpp

Index: clang/test/Headers/openmp_device_math_isnan.cpp
===================================================================
--- /dev/null
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// expected-no-diagnostics
+
+#include <cmath>
+
+double math(float f, double d) {
+  double r = 0;
+  // INT_RETURN: call i32 @__nv_isnanf(float
+  // BOOL_RETURN: call i32 @__nv_isnanf(float
+  r += std::isnan(f);
+  // INT_RETURN: call i32 @__nv_isnand(double
+  // BOOL_RETURN: call i32 @__nv_isnand(double
+  r += std::isnan(d);
+  return r;
+}
+
+long double foo(float f, double d, long double ld) {
+  double r = ld;
+  r += math(f, d);
+#pragma omp target map(r)
+  { r += math(f, d); }
+  return r;
+}
Index: clang/test/Headers/Inputs/include/cmath
===================================================================
--- clang/test/Headers/Inputs/include/cmath
+++ clang/test/Headers/Inputs/include/cmath
@@ -82,8 +82,13 @@
 bool islessgreater(double, double);
 bool islessgreater(float, float);
 bool isnan(long double);
+#ifdef USE_ISNAN_WITH_INT_RETURN
+int isnan(double);
+int isnan(float);
+#else
 bool isnan(double);
 bool isnan(float);
+#endif
 bool isnormal(double);
 bool isnormal(float);
 bool isunordered(double, double);
Index: clang/lib/Headers/__clang_cuda_cmath.h
===================================================================
--- clang/lib/Headers/__clang_cuda_cmath.h
+++ clang/lib/Headers/__clang_cuda_cmath.h
@@ -66,10 +66,38 @@
 }
 
 // For inscrutable reasons, the CUDA headers define these functions for us on
-// Windows. For OpenMP we omit these as some old system headers have
-// non-conforming `isinf(float)` and `isnan(float)` implementations that return
-// an `int`. The system versions of these functions should be fine anyway.
-#if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__)
+// Windows.
+#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)
+
+// For OpenMP we work around some old system headers that have non-conforming
+// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
+// this by providing two versions of these functions, differing only in the
+// return type. To avoid conflicting definitions we disable implicit base
+// function generation. That means we will end up with two specializations, one
+// per type, but only one has a base function defined by the system header.
+#if defined(__OPENMP_NVPTX__)
+#pragma omp begin declare variant match(                                       \
+    implementation = {extension(disable_implicit_base)})
+
+// FIXME: We lack an extension to customize the mangling of the variants, e.g.,
+//        add a suffix. This means we would clash with the names of the variants
+//        (note that we do not create implicit base functions here). To avoid
+//        this clash we add a new trait to some of them that is always true
+//        (this is LLVM after all ;)). It will only influence the mangled name
+//        of the variants inside the inner region and avoid the clash.
+#pragma omp begin declare variant match(implementation = {vendor(llvm)})
+
+__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); }
+__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
+
+#pragma omp end declare variant
+
+#endif
+
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
@@ -79,6 +107,11 @@
 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+
+#if defined(__OPENMP_NVPTX__)
+#pragma omp end declare variant
+#endif
+
 #endif
 
 __DEVICE__ bool isgreater(float __x, float __y) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D85879: [OpenMP] Overload `std::isnan` and friends multiple times for the GPU

Reply via email to