[PATCH] D75788: [WIP][OpenMP] Reuse CUDA wrappers in `nvptx` target regions.

Johannes Doerfert via Phabricator via cfe-commits Fri, 06 Mar 2020 17:40:12 -0800

jdoerfert created this revision.
jdoerfert added reviewers: kiranchandramohan, ABataev, RaviNarayanaswamy, 
gtbercea, grokos, sdmitriev, JonChesterfield, hfinkel, fghanim, aaron.ballman.
Herald added subscribers: guansong, bollu, mgorny.
Herald added a project: clang.


This is a WIP patch to show one way of implementing math support (and
support for `new`, `algorithm`, ...) in OpenMP target regions by piggy
backing on the CUDA implementation. All CUDA "code" should be in
`omp begin/end declare variant match(device={arch(nvptx)})` at the end
of the day while the "host" code is not.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D75788

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/__clang_cuda_cmath.h
  clang/lib/Headers/__clang_cuda_device_functions.h
  clang/lib/Headers/__clang_cuda_libdevice_declares.h
  clang/lib/Headers/__clang_cuda_math_forward_declares.h
  clang/lib/Headers/__clang_cuda_runtime_wrapper.h
  clang/lib/Headers/cuda_wrappers/new
  clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/OpenMP/target_nvptx_math_fp_macro.cpp
  clang/test/OpenMP/target_nvptx_math_sin.c

Index: clang/test/OpenMP/target_nvptx_math_sin.c
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_nvptx_math_sin.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// expected-no-diagnostics
+
+#include <cmath>
+// TODO: How to include a "mock systme cmath" here for testing?
+
+double math(short s, int i, float f, double d, double ld) {
+  double r = 0;
+  r += sin(s);
+  r += sin(i);
+  r += sin(f);
+  r += sin(d);
+  return r;
+}
+
+long double foo(short s, int i, float f, double d, long double ld) {
+  double r = sin(ld);
+  r += math(s, i, f, d, ld);
+#pragma omp target map(r)
+  { r += math(s, i, f, d, ld); }
+  return r;
+}
Index: clang/test/OpenMP/target_nvptx_math_fp_macro.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_nvptx_math_fp_macro.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// expected-no-diagnostics
+
+#include <cmath>
+// TODO: How to include a "mock systme cmath" here for testing?
+
+int main() {
+  double a(0);
+  return (std::fpclassify(a) != FP_ZERO);
+}
Index: clang/lib/Headers/openmp_wrappers/math.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/math.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*===------------- math.h - Alternative math.h header ----------------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-
-#include <__clang_openmp_math.h>
-
-#ifndef __CLANG_NO_HOST_MATH__
-#include_next <math.h>
-#else
-#undef __CLANG_NO_HOST_MATH__
-#endif
-
Index: clang/lib/Headers/openmp_wrappers/cmath
===================================================================
--- clang/lib/Headers/openmp_wrappers/cmath
+++ /dev/null
@@ -1,16 +0,0 @@
-/*===-------------- cmath - Alternative cmath header -----------------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-
-#include <__clang_openmp_math.h>
-
-#ifndef __CLANG_NO_HOST_MATH__
-#include_next <cmath>
-#else
-#undef __CLANG_NO_HOST_MATH__
-#endif
Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*===---- __clang_openmp_math_declares.h - OpenMP math declares ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-
-#ifndef __CLANG_OPENMP_MATH_DECLARES_H__
-#define __CLANG_OPENMP_MATH_DECLARES_H__
-
-#ifndef _OPENMP
-#error "This file is for OpenMP compilation only."
-#endif
-
-#if defined(__NVPTX__) && defined(_OPENMP)
-
-#define __CUDA__
-
-#if defined(__cplusplus)
-  #include <__clang_cuda_math_forward_declares.h>
-#endif
-
-/// Include declarations for libdevice functions.
-#include <__clang_cuda_libdevice_declares.h>
-/// Provide definitions for these functions.
-#include <__clang_cuda_device_functions.h>
-
-#undef __CUDA__
-
-#endif
-#endif
Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*===---- __clang_openmp_math.h - OpenMP target math support ---------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-
-#if defined(__NVPTX__) && defined(_OPENMP)
-/// TODO:
-/// We are currently reusing the functionality of the Clang-CUDA code path
-/// as an alternative to the host declarations provided by math.h and cmath.
-/// This is suboptimal.
-///
-/// We should instead declare the device functions in a similar way, e.g.,
-/// through OpenMP 5.0 variants, and afterwards populate the module with the
-/// host declarations by unconditionally including the host math.h or cmath,
-/// respectively. This is actually what the Clang-CUDA code path does, using
-/// __device__ instead of variants to avoid redeclarations and get the desired
-/// overload resolution.
-
-#define __CUDA__
-
-#if defined(__cplusplus)
-  #include <__clang_cuda_cmath.h>
-#endif
-
-#undef __CUDA__
-
-/// Magic macro for stopping the math.h/cmath host header from being included.
-#define __CLANG_NO_HOST_MATH__
-
-#endif
-
Index: clang/lib/Headers/cuda_wrappers/new
===================================================================
--- clang/lib/Headers/cuda_wrappers/new
+++ clang/lib/Headers/cuda_wrappers/new
@@ -33,66 +33,77 @@
 #define CUDA_NOEXCEPT
 #endif
 
+#ifdef _OPENMP
+#define __DEVICE__
+#pragma omp begin declare variant match(device={arch(nvptx)})
+#else
+#define __DEVICE__ __device__
+#endif
+
 // Device overrides for non-placement new and delete.
-__device__ inline void *operator new(__SIZE_TYPE__ size) {
+DEVICE inline void *operator new(__SIZE_TYPE__ size) {
   if (size == 0) {
     size = 1;
   }
   return ::malloc(size);
 }
-__device__ inline void *operator new(__SIZE_TYPE__ size,
+DEVICE inline void *operator new(__SIZE_TYPE__ size,
                                      const std::nothrow_t &) CUDA_NOEXCEPT {
   return ::operator new(size);
 }
 
-__device__ inline void *operator new[](__SIZE_TYPE__ size) {
+DEVICE inline void *operator new[](__SIZE_TYPE__ size) {
   return ::operator new(size);
 }
-__device__ inline void *operator new[](__SIZE_TYPE__ size,
+DEVICE inline void *operator new[](__SIZE_TYPE__ size,
                                        const std::nothrow_t &) {
   return ::operator new(size);
 }
 
-__device__ inline void operator delete(void* ptr) CUDA_NOEXCEPT {
+DEVICE inline void operator delete(void* ptr) CUDA_NOEXCEPT {
   if (ptr) {
     ::free(ptr);
   }
 }
-__device__ inline void operator delete(void *ptr,
+DEVICE inline void operator delete(void *ptr,
                                        const std::nothrow_t &) CUDA_NOEXCEPT {
   ::operator delete(ptr);
 }
 
-__device__ inline void operator delete[](void* ptr) CUDA_NOEXCEPT {
+DEVICE inline void operator delete[](void* ptr) CUDA_NOEXCEPT {
   ::operator delete(ptr);
 }
-__device__ inline void operator delete[](void *ptr,
+DEVICE inline void operator delete[](void *ptr,
                                          const std::nothrow_t &) CUDA_NOEXCEPT {
   ::operator delete(ptr);
 }
 
 // Sized delete, C++14 only.
 #if __cplusplus >= 201402L
-__device__ inline void operator delete(void *ptr,
+DEVICE inline void operator delete(void *ptr,
                                        __SIZE_TYPE__ size) CUDA_NOEXCEPT {
   ::operator delete(ptr);
 }
-__device__ inline void operator delete[](void *ptr,
+DEVICE inline void operator delete[](void *ptr,
                                          __SIZE_TYPE__ size) CUDA_NOEXCEPT {
   ::operator delete(ptr);
 }
 #endif
 
 // Device overrides for placement new and delete.
-__device__ inline void *operator new(__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT {
+DEVICE inline void *operator new(__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT {
   return __ptr;
 }
-__device__ inline void *operator new[](__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT {
+DEVICE inline void *operator new[](__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT {
   return __ptr;
 }
-__device__ inline void operator delete(void *, void *) CUDA_NOEXCEPT {}
-__device__ inline void operator delete[](void *, void *) CUDA_NOEXCEPT {}
+DEVICE inline void operator delete(void *, void *) CUDA_NOEXCEPT {}
+DEVICE inline void operator delete[](void *, void *) CUDA_NOEXCEPT {}
 
 #pragma pop_macro("CUDA_NOEXCEPT")
 
+#ifdef _OPENMP
+#pragma omp end declare variant
+#endif
+
 #endif // include guard
Index: clang/lib/Headers/__clang_cuda_runtime_wrapper.h
===================================================================
--- clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -220,6 +220,9 @@
 #if CUDA_VERSION < 9000
 #include "math_functions.hpp"
 #endif
+#ifdef _OPENMP
+#pragma omp begin declare variant match(device={arch(nvptx)})
+#endif
 // Alas, additional overloads for these functions are hard to get to.
 // Considering that we only need these overloads for a few functions,
 // we can provide them here.
@@ -240,6 +243,9 @@
 // CUDA headers. Alas, device_functions.hpp included below needs it.
 static inline __device__ void __brkpt(int __c) { __brkpt(); }
 #endif
+#ifdef _OPENMP
+#pragma omp end declare variant
+#endif
 
 // Now include *.hpp with definitions of various GPU functions.  Alas,
 // a lot of thins get declared/defined with __host__ attribute which
@@ -331,6 +337,10 @@
 #undef __CUDABE__
 #define __CUDACC__
 
+#ifdef _OPENMP
+#pragma omp begin declare variant match(device={arch(nvptx)})
+#endif
+
 extern "C" {
 // Device-side CUDA system calls.
 // http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls
@@ -392,6 +402,10 @@
   return dim3(x, y, z);
 }
 
+#ifdef _OPENMP
+#pragma omp end declare variant
+#endif
+
 #include <__clang_cuda_cmath.h>
 #include <__clang_cuda_intrinsics.h>
 #include <__clang_cuda_complex_builtins.h>
Index: clang/lib/Headers/__clang_cuda_math_forward_declares.h
===================================================================
--- clang/lib/Headers/__clang_cuda_math_forward_declares.h
+++ clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -22,35 +22,15 @@
 #pragma push_macro("__DEVICE__")
 #ifdef _OPENMP
 #define __DEVICE__ static __inline__ __attribute__((always_inline))
+#pragma omp begin declare variant match(device={arch(nvptx)})
 #else
 #define __DEVICE__                                                             \
   static __inline__ __attribute__((always_inline)) __attribute__((device))
 #endif
 
-// For C++ 17 we need to include noexcept attribute to be compatible
-// with the header-defined version. This may be removed once
-// variant is supported.
-#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
-#define __NOEXCEPT noexcept
-#else
-#define __NOEXCEPT
-#endif
-
-#if !(defined(_OPENMP) && defined(__cplusplus))
 __DEVICE__ long abs(long);
 __DEVICE__ long long abs(long long);
-__DEVICE__ double abs(double);
-__DEVICE__ float abs(float);
-#endif
-// While providing the CUDA declarations and definitions for math functions,
-// we may manually define additional functions.
-// TODO: Once variant is supported the additional functions will have
-// to be removed.
-#if defined(_OPENMP) && defined(__cplusplus)
-__DEVICE__ const double abs(const double);
-__DEVICE__ const float abs(const float);
-#endif
-__DEVICE__ int abs(int) __NOEXCEPT;
+__DEVICE__ int abs(int);
 __DEVICE__ double acos(double);
 __DEVICE__ float acos(float);
 __DEVICE__ double acosh(double);
@@ -85,8 +65,8 @@
 __DEVICE__ float exp(float);
 __DEVICE__ double expm1(double);
 __DEVICE__ float expm1(float);
-__DEVICE__ double fabs(double) __NOEXCEPT;
-__DEVICE__ float fabs(float) __NOEXCEPT;
+__DEVICE__ double fabs(double);
+__DEVICE__ float fabs(float);
 __DEVICE__ double fdim(double, double);
 __DEVICE__ float fdim(float, float);
 __DEVICE__ double floor(double);
@@ -136,12 +116,12 @@
 __DEVICE__ bool isnormal(float);
 __DEVICE__ bool isunordered(double, double);
 __DEVICE__ bool isunordered(float, float);
-__DEVICE__ long labs(long) __NOEXCEPT;
+__DEVICE__ long labs(long);
 __DEVICE__ double ldexp(double, int);
 __DEVICE__ float ldexp(float, int);
 __DEVICE__ double lgamma(double);
 __DEVICE__ float lgamma(float);
-__DEVICE__ long long llabs(long long) __NOEXCEPT;
+__DEVICE__ long long llabs(long long);
 __DEVICE__ long long llrint(double);
 __DEVICE__ long long llrint(float);
 __DEVICE__ double log10(double);
@@ -152,9 +132,6 @@
 __DEVICE__ float log2(float);
 __DEVICE__ double logb(double);
 __DEVICE__ float logb(float);
-#if defined(_OPENMP) && defined(__cplusplus)
-__DEVICE__ long double log(long double);
-#endif
 __DEVICE__ double log(double);
 __DEVICE__ float log(float);
 __DEVICE__ long lrint(double);
@@ -302,7 +279,10 @@
 } // namespace std
 #endif
 
-#undef __NOEXCEPT
 #pragma pop_macro("__DEVICE__")
 
+#ifdef _OPENMP
+#pragma omp end declare variant
+#endif
+
 #endif
Index: clang/lib/Headers/__clang_cuda_libdevice_declares.h
===================================================================
--- clang/lib/Headers/__clang_cuda_libdevice_declares.h
+++ clang/lib/Headers/__clang_cuda_libdevice_declares.h
@@ -16,6 +16,7 @@
 
 #if defined(_OPENMP)
 #define __DEVICE__
+#pragma omp begin declare variant match(device={arch(nvptx)})
 #elif defined(__CUDA__)
 #define __DEVICE__ __device__
 #endif
@@ -459,4 +460,9 @@
 #if defined(__cplusplus)
 } // extern "C"
 #endif
+
+#ifdef _OPENMP
+#pragma omp end declare variant
+#endif
+
 #endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__
Index: clang/lib/Headers/__clang_cuda_device_functions.h
===================================================================
--- clang/lib/Headers/__clang_cuda_device_functions.h
+++ clang/lib/Headers/__clang_cuda_device_functions.h
@@ -22,6 +22,7 @@
 #pragma push_macro("__DEVICE__")
 #ifdef _OPENMP
 #define __DEVICE__ static __attribute__((always_inline))
+#pragma omp begin declare variant match(device={arch(nvptx)})
 #else
 #define __DEVICE__ static __device__ __forceinline__
 #endif
@@ -37,15 +38,6 @@
 #define __FAST_OR_SLOW(fast, slow) slow
 #endif
 
-// For C++ 17 we need to include noexcept attribute to be compatible
-// with the header-defined version. This may be removed once
-// variant is supported.
-#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
-#define __NOEXCEPT noexcept
-#else
-#define __NOEXCEPT
-#endif
-
 __DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }
 __DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }
 __DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }
@@ -53,13 +45,8 @@
 __DEVICE__ unsigned long long __brevll(unsigned long long __a) {
   return __nv_brevll(__a);
 }
-#if defined(__cplusplus)
 __DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
 __DEVICE__ void __brkpt(int __a) { __brkpt(); }
-#else
-__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
-__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
-#endif
 __DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
                                     unsigned int __c) {
   return __nv_byte_perm(__a, __b, __c);
@@ -1483,8 +1470,8 @@
   return r;
 }
 #endif // CUDA_VERSION >= 9020
-__DEVICE__ int abs(int __a) __NOEXCEPT { return __nv_abs(__a); }
-__DEVICE__ double fabs(double __a) __NOEXCEPT { return __nv_fabs(__a); }
+__DEVICE__ int abs(int __a) { return __nv_abs(__a); }
+__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
 __DEVICE__ double acos(double __a) { return __nv_acos(__a); }
 __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
 __DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
@@ -1503,10 +1490,8 @@
 __DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }
 __DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }
 __DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }
-#ifndef _OPENMP
 __DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }
 __DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }
-#endif
 __DEVICE__ double copysign(double __a, double __b) {
   return __nv_copysign(__a, __b);
 }
@@ -1581,15 +1566,15 @@
 __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
 __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
 #if defined(__LP64__) || defined(_WIN64)
-__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_llabs(__a); };
+__DEVICE__ long labs(long __a) { return __nv_llabs(__a); };
 #else
-__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_abs(__a); };
+__DEVICE__ long labs(long __a) { return __nv_abs(__a); };
 #endif
 __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }
 __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }
 __DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }
 __DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }
-__DEVICE__ long long llabs(long long __a) __NOEXCEPT { return __nv_llabs(__a); }
+__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }
 __DEVICE__ long long llmax(long long __a, long long __b) {
   return __nv_llmax(__a, __b);
 }
@@ -1626,16 +1611,12 @@
 __DEVICE__ long lroundf(float __a) { return roundf(__a); }
 #endif
 __DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }
-// These functions shouldn't be declared when including this header
-// for math function resolution purposes.
-#ifndef _OPENMP
 __DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
   return __builtin_memcpy(__a, __b, __c);
 }
 __DEVICE__ void *memset(void *__a, int __b, size_t __c) {
   return __builtin_memset(__a, __b, __c);
 }
-#endif
 __DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }
 __DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
 __DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
@@ -1719,8 +1700,6 @@
 __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
 __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
 __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
-// TODO: remove once variant is supported
-#ifndef _OPENMP
 __DEVICE__ double scalbln(double __a, long __b) {
   if (__b > INT_MAX)
     return __a > 0 ? HUGE_VAL : -HUGE_VAL;
@@ -1735,7 +1714,6 @@
     return __a > 0 ? 0.f : -0.f;
   return scalbnf(__a, (int)__b);
 }
-#endif
 __DEVICE__ double sin(double __a) { return __nv_sin(__a); }
 __DEVICE__ void sincos(double __a, double *__s, double *__c) {
   return __nv_sincos(__a, __s, __c);
@@ -1787,7 +1765,11 @@
 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 
-#undef __NOEXCEPT
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__FAST_OR_SLOW")
+
+#ifdef _OPENMP
+#pragma omp end declare variant
+#endif
+
 #endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__
Index: clang/lib/Headers/__clang_cuda_cmath.h
===================================================================
--- clang/lib/Headers/__clang_cuda_cmath.h
+++ clang/lib/Headers/__clang_cuda_cmath.h
@@ -32,30 +32,15 @@
 
 #ifdef _OPENMP
 #define __DEVICE__ static __attribute__((always_inline))
+#pragma omp begin declare variant match(device={arch(nvptx)})
 #else
 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
 #endif
 
-// For C++ 17 we need to include noexcept attribute to be compatible
-// with the header-defined version. This may be removed once
-// variant is supported.
-#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
-#define __NOEXCEPT noexcept
-#else
-#define __NOEXCEPT
-#endif
-
-#if !(defined(_OPENMP) && defined(__cplusplus))
 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
 __DEVICE__ long abs(long __n) { return ::labs(__n); }
 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
-#endif
-// TODO: remove once variat is supported.
-#if defined(_OPENMP) && defined(__cplusplus)
-__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); }
-__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); }
-#endif
 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
@@ -64,11 +49,9 @@
 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
 __DEVICE__ float exp(float __x) { return ::expf(__x); }
-__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); }
+__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
-// TODO: remove when variant is supported
-#ifndef _OPENMP
 __DEVICE__ int fpclassify(float __x) {
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
@@ -77,7 +60,6 @@
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
 }
-#endif
 __DEVICE__ float frexp(float __arg, int *__exp) {
   return ::frexpf(__arg, __exp);
 }
@@ -457,10 +439,7 @@
 using ::remquof;
 using ::rintf;
 using ::roundf;
-// TODO: remove once variant is supported
-#ifndef _OPENMP
 using ::scalblnf;
-#endif
 using ::scalbnf;
 using ::sinf;
 using ::sinhf;
@@ -479,7 +458,10 @@
 } // namespace std
 #endif
 
-#undef __NOEXCEPT
 #undef __DEVICE__
 
+#ifdef _OPENMP
+#pragma omp end declare variant
+#endif
+
 #endif
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -139,14 +139,6 @@
   ppc_wrappers/smmintrin.h
 )
 
-set(openmp_wrapper_files
-  openmp_wrappers/math.h
-  openmp_wrappers/cmath
-  openmp_wrappers/__clang_openmp_math.h
-  openmp_wrappers/__clang_openmp_math_declares.h
-  openmp_wrappers/new
-)
-
 set(output_dir ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include)
 set(out_files)
 set(generated_files)
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1210,13 +1210,13 @@
       // standard library headers.
       SmallString<128> P(D.ResourceDir);
       llvm::sys::path::append(P, "include");
-      llvm::sys::path::append(P, "openmp_wrappers");
+      llvm::sys::path::append(P, "cuda_wrappers");
       CmdArgs.push_back("-internal-isystem");
       CmdArgs.push_back(Args.MakeArgString(P));
     }
 
     CmdArgs.push_back("-include");
-    CmdArgs.push_back("__clang_openmp_math_declares.h");
+    CmdArgs.push_back("__clang_cuda_runtime_wrapper.h");
   }
 
   // Add -i* options, and automatically translate to

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75788: [WIP][OpenMP] Reuse CUDA wrappers in `nvptx` target regions.

Reply via email to