[PATCH] D75788: [WIP][OpenMP] Reuse CUDA wrappers in `nvptx` target regions.

Johannes Doerfert via Phabricator via cfe-commits Thu, 12 Mar 2020 13:06:12 -0700

jdoerfert updated this revision to Diff 250035.
jdoerfert marked an inline comment as done.
jdoerfert added a comment.


Adjust to new scheme, tested locally with some math functions, seems to work


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75788/new/

https://reviews.llvm.org/D75788

Files:
  clang/lib/AST/ASTContext.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/__clang_cuda_cmath.h
  clang/lib/Headers/__clang_cuda_device_functions.h
  clang/lib/Headers/__clang_cuda_math_forward_declares.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
  clang/lib/Headers/openmp_wrappers/cmath
  clang/lib/Headers/openmp_wrappers/math.h
  clang/test/Headers/nvptx_device_cmath_functions.c
  clang/test/OpenMP/target_nvptx_math_complex.c
  clang/test/OpenMP/target_nvptx_math_fp_macro.cpp
  clang/test/OpenMP/target_nvptx_math_sin.c

Index: clang/test/OpenMP/target_nvptx_math_sin.c
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_nvptx_math_sin.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -internal-isystem %S -verify -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -internal-isystem %S -include openmp_wrappers/__clang_openmp_math_declares.h -verify -fopenmp -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -aux-triple powerpc64le-unknown-unknown -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+#include <math.h>
+
+double math(float f, double d, long double ld) {
+  double r = 0;
+  r += sin(f);
+  r += sin(d);
+  return r;
+}
+
+long double foo(float f, double d, long double ld) {
+  double r = ld;
+  r += math(f, d, ld);
+#pragma omp target map(r)
+  { r += math(f, d, ld); }
+  return r;
+}
Index: clang/test/OpenMP/target_nvptx_math_fp_macro.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_nvptx_math_fp_macro.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// expected-no-diagnostics
+
+#include <cmath>
+// TODO: How to include a "mock systme cmath" here for testing?
+
+int main() {
+  double a(0);
+  return (std::fpclassify(a) != FP_ZERO);
+}
Index: clang/test/OpenMP/target_nvptx_math_complex.c
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_nvptx_math_complex.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-DAG: call { float, float } @__divsc3(
+// CHECK-DAG: call { float, float } @__mulsc3(
+void test_scmplx(float _Complex a) {
+#pragma omp target
+  {
+    (void)(a * (a / a));
+  }
+}
+
+
+// CHECK-DAG: call { double, double } @__divdc3(
+// CHECK-DAG: call { double, double } @__muldc3(
+void test_dcmplx(double _Complex a) {
+#pragma omp target
+  {
+    (void)(a * (a / a));
+  }
+}
Index: clang/test/Headers/nvptx_device_cmath_functions.c
===================================================================
--- clang/test/Headers/nvptx_device_cmath_functions.c
+++ clang/test/Headers/nvptx_device_cmath_functions.c
@@ -4,7 +4,7 @@
 // REQUIRES: nvptx-registered-target
 
 // RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s
+// RUN: %clang_cc1 -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s
 
 #include <cmath>
 
Index: clang/lib/Headers/openmp_wrappers/math.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/math.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*===------------- math.h - Alternative math.h header ----------------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-
-#include <__clang_openmp_math.h>
-
-#ifndef __CLANG_NO_HOST_MATH__
-#include_next <math.h>
-#else
-#undef __CLANG_NO_HOST_MATH__
-#endif
-
Index: clang/lib/Headers/openmp_wrappers/cmath
===================================================================
--- clang/lib/Headers/openmp_wrappers/cmath
+++ clang/lib/Headers/openmp_wrappers/cmath
@@ -1,4 +1,4 @@
-/*===-------------- cmath - Alternative cmath header -----------------------===
+/*===---- __clang_openmp_math_declares.h - OpenMP math declares ------ c++ -===
  *
  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  * See https://llvm.org/LICENSE.txt for license information.
@@ -7,10 +7,34 @@
  *===-----------------------------------------------------------------------===
  */
 
-#include <__clang_openmp_math.h>
+#ifndef __CLANG_OPENMP_CMATH_H__
+#define __CLANG_OPENMP_CMATH_H__
+
+#ifndef _OPENMP
+#error "This file is for OpenMP compilation only."
+#endif
 
-#ifndef __CLANG_NO_HOST_MATH__
 #include_next <cmath>
-#else
-#undef __CLANG_NO_HOST_MATH__
+
+#pragma omp begin declare variant match(device={arch(nvptx64)})
+#define __CUDA__
+
+/*#include <__clang_cuda_math_forward_declares.h>*/
+#include <__clang_cuda_cmath.h>
+
+#undef __CUDA__
+// TODO: Hack until we support an extension to the match clause that allows "or".
+/*#undef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__*/
+#undef __CLANG_CUDA_CMATH_H__
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device={arch(nvptx)})
+#define __CUDA__
+
+/*#include <__clang_cuda_math_forward_declares.h>*/
+#include <__clang_cuda_cmath.h>
+
+#undef __CUDA__
+#pragma omp end declare variant
+
 #endif
Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
+++ clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h
@@ -14,20 +14,58 @@
 #error "This file is for OpenMP compilation only."
 #endif
 
-#if defined(__NVPTX__) && defined(_OPENMP)
+/**
+ * A positive float constant expression. HUGE_VALF evaluates
+ * to +infinity. Used as an error value returned by the built-in
+ * math functions.
+ */
+#define HUGE_VALF (__builtin_huge_valf())
 
-#define __CUDA__
+/**
+ * A positive double constant expression. HUGE_VAL evaluates
+ * to +infinity. Used as an error value returned by the built-in
+ * math functions.
+ */
+#define HUGE_VAL (__builtin_huge_val())
 
 #if defined(__cplusplus)
-  #include <__clang_cuda_math_forward_declares.h>
+  #include <limits>
+  #include <climits>
+#else
+  #include <limits.h>
 #endif
 
+#pragma omp begin declare variant match(device={arch(nvptx64)})
+#define __CUDA__
+
 /// Include declarations for libdevice functions.
 #include <__clang_cuda_libdevice_declares.h>
+
 /// Provide definitions for these functions.
 #include <__clang_cuda_device_functions.h>
 
 #undef __CUDA__
+// TODO: Hack until we support an extension to the match clause that allows "or".
+#undef __CLANG_CUDA_LIBDEVICE_DECLARES_H__
+#undef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device={arch(nvptx)})
+#define __CUDA__
+
+#include <limits.h>
+
+/// Include declarations for libdevice functions.
+#include <__clang_cuda_libdevice_declares.h>
+
+/// Provide definitions for these functions.
+#include <__clang_cuda_device_functions.h>
+
+#undef __CUDA__
+#pragma omp end declare variant
+
+
+#undef HUGE_VAL
+#undef HUGE_VALF
 
-#endif
 #endif
Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*===---- __clang_openmp_math.h - OpenMP target math support ---------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-
-#if defined(__NVPTX__) && defined(_OPENMP)
-/// TODO:
-/// We are currently reusing the functionality of the Clang-CUDA code path
-/// as an alternative to the host declarations provided by math.h and cmath.
-/// This is suboptimal.
-///
-/// We should instead declare the device functions in a similar way, e.g.,
-/// through OpenMP 5.0 variants, and afterwards populate the module with the
-/// host declarations by unconditionally including the host math.h or cmath,
-/// respectively. This is actually what the Clang-CUDA code path does, using
-/// __device__ instead of variants to avoid redeclarations and get the desired
-/// overload resolution.
-
-#define __CUDA__
-
-#if defined(__cplusplus)
-  #include <__clang_cuda_cmath.h>
-#endif
-
-#undef __CUDA__
-
-/// Magic macro for stopping the math.h/cmath host header from being included.
-#define __CLANG_NO_HOST_MATH__
-
-#endif
-
Index: clang/lib/Headers/__clang_cuda_math_forward_declares.h
===================================================================
--- clang/lib/Headers/__clang_cuda_math_forward_declares.h
+++ clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -27,30 +27,11 @@
   static __inline__ __attribute__((always_inline)) __attribute__((device))
 #endif
 
-// For C++ 17 we need to include noexcept attribute to be compatible
-// with the header-defined version. This may be removed once
-// variant is supported.
-#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
-#define __NOEXCEPT noexcept
-#else
-#define __NOEXCEPT
-#endif
-
-#if !(defined(_OPENMP) && defined(__cplusplus))
 __DEVICE__ long abs(long);
 __DEVICE__ long long abs(long long);
 __DEVICE__ double abs(double);
 __DEVICE__ float abs(float);
-#endif
-// While providing the CUDA declarations and definitions for math functions,
-// we may manually define additional functions.
-// TODO: Once variant is supported the additional functions will have
-// to be removed.
-#if defined(_OPENMP) && defined(__cplusplus)
-__DEVICE__ const double abs(const double);
-__DEVICE__ const float abs(const float);
-#endif
-__DEVICE__ int abs(int) __NOEXCEPT;
+__DEVICE__ int abs(int);
 __DEVICE__ double acos(double);
 __DEVICE__ float acos(float);
 __DEVICE__ double acosh(double);
@@ -85,8 +66,8 @@
 __DEVICE__ float exp(float);
 __DEVICE__ double expm1(double);
 __DEVICE__ float expm1(float);
-__DEVICE__ double fabs(double) __NOEXCEPT;
-__DEVICE__ float fabs(float) __NOEXCEPT;
+__DEVICE__ double fabs(double);
+__DEVICE__ float fabs(float);
 __DEVICE__ double fdim(double, double);
 __DEVICE__ float fdim(float, float);
 __DEVICE__ double floor(double);
@@ -136,12 +117,12 @@
 __DEVICE__ bool isnormal(float);
 __DEVICE__ bool isunordered(double, double);
 __DEVICE__ bool isunordered(float, float);
-__DEVICE__ long labs(long) __NOEXCEPT;
+__DEVICE__ long labs(long);
 __DEVICE__ double ldexp(double, int);
 __DEVICE__ float ldexp(float, int);
 __DEVICE__ double lgamma(double);
 __DEVICE__ float lgamma(float);
-__DEVICE__ long long llabs(long long) __NOEXCEPT;
+__DEVICE__ long long llabs(long long);
 __DEVICE__ long long llrint(double);
 __DEVICE__ long long llrint(float);
 __DEVICE__ double log10(double);
@@ -152,9 +133,6 @@
 __DEVICE__ float log2(float);
 __DEVICE__ double logb(double);
 __DEVICE__ float logb(float);
-#if defined(_OPENMP) && defined(__cplusplus)
-__DEVICE__ long double log(long double);
-#endif
 __DEVICE__ double log(double);
 __DEVICE__ float log(float);
 __DEVICE__ long lrint(double);
@@ -188,6 +166,7 @@
 __DEVICE__ float scalbn(float, int);
 __DEVICE__ bool signbit(double);
 __DEVICE__ bool signbit(float);
+__DEVICE__ long double sin(long double);
 __DEVICE__ double sin(double);
 __DEVICE__ float sin(float);
 __DEVICE__ double sinh(double);
@@ -302,7 +281,6 @@
 } // namespace std
 #endif
 
-#undef __NOEXCEPT
 #pragma pop_macro("__DEVICE__")
 
 #endif
Index: clang/lib/Headers/__clang_cuda_device_functions.h
===================================================================
--- clang/lib/Headers/__clang_cuda_device_functions.h
+++ clang/lib/Headers/__clang_cuda_device_functions.h
@@ -37,15 +37,6 @@
 #define __FAST_OR_SLOW(fast, slow) slow
 #endif
 
-// For C++ 17 we need to include noexcept attribute to be compatible
-// with the header-defined version. This may be removed once
-// variant is supported.
-#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
-#define __NOEXCEPT noexcept
-#else
-#define __NOEXCEPT
-#endif
-
 __DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }
 __DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }
 __DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }
@@ -1483,8 +1474,8 @@
   return r;
 }
 #endif // CUDA_VERSION >= 9020
-__DEVICE__ int abs(int __a) __NOEXCEPT { return __nv_abs(__a); }
-__DEVICE__ double fabs(double __a) __NOEXCEPT { return __nv_fabs(__a); }
+__DEVICE__ int abs(int __a) { return __nv_abs(__a); }
+__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
 __DEVICE__ double acos(double __a) { return __nv_acos(__a); }
 __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
 __DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
@@ -1503,10 +1494,8 @@
 __DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }
 __DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }
 __DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }
-#ifndef _OPENMP
 __DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }
 __DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }
-#endif
 __DEVICE__ double copysign(double __a, double __b) {
   return __nv_copysign(__a, __b);
 }
@@ -1581,15 +1570,15 @@
 __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
 __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
 #if defined(__LP64__) || defined(_WIN64)
-__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_llabs(__a); };
+__DEVICE__ long labs(long __a) { return __nv_llabs(__a); };
 #else
-__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_abs(__a); };
+__DEVICE__ long labs(long __a) { return __nv_abs(__a); };
 #endif
 __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }
 __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }
 __DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }
 __DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }
-__DEVICE__ long long llabs(long long __a) __NOEXCEPT { return __nv_llabs(__a); }
+__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }
 __DEVICE__ long long llmax(long long __a, long long __b) {
   return __nv_llmax(__a, __b);
 }
@@ -1719,8 +1708,6 @@
 __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
 __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
 __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
-// TODO: remove once variant is supported
-#ifndef _OPENMP
 __DEVICE__ double scalbln(double __a, long __b) {
   if (__b > INT_MAX)
     return __a > 0 ? HUGE_VAL : -HUGE_VAL;
@@ -1735,7 +1722,6 @@
     return __a > 0 ? 0.f : -0.f;
   return scalbnf(__a, (int)__b);
 }
-#endif
 __DEVICE__ double sin(double __a) { return __nv_sin(__a); }
 __DEVICE__ void sincos(double __a, double *__s, double *__c) {
   return __nv_sincos(__a, __s, __c);
@@ -1787,7 +1773,6 @@
 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 
-#undef __NOEXCEPT
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__FAST_OR_SLOW")
 #endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__
Index: clang/lib/Headers/__clang_cuda_cmath.h
===================================================================
--- clang/lib/Headers/__clang_cuda_cmath.h
+++ clang/lib/Headers/__clang_cuda_cmath.h
@@ -36,26 +36,10 @@
 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
 #endif
 
-// For C++ 17 we need to include noexcept attribute to be compatible
-// with the header-defined version. This may be removed once
-// variant is supported.
-#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
-#define __NOEXCEPT noexcept
-#else
-#define __NOEXCEPT
-#endif
-
-#if !(defined(_OPENMP) && defined(__cplusplus))
 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
 __DEVICE__ long abs(long __n) { return ::labs(__n); }
 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
-#endif
-// TODO: remove once variat is supported.
-#if defined(_OPENMP) && defined(__cplusplus)
-__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); }
-__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); }
-#endif
 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
@@ -64,11 +48,9 @@
 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
 __DEVICE__ float exp(float __x) { return ::expf(__x); }
-__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); }
+__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
-// TODO: remove when variant is supported
-#ifndef _OPENMP
 __DEVICE__ int fpclassify(float __x) {
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
@@ -77,7 +59,6 @@
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
 }
-#endif
 __DEVICE__ float frexp(float __arg, int *__exp) {
   return ::frexpf(__arg, __exp);
 }
@@ -322,6 +303,7 @@
   return std::scalbn((double)__x, __exp);
 }
 
+#ifndef _OPENMP
 // We need to define these overloads in exactly the namespace our standard
 // library uses (including the right inline namespace), otherwise they won't be
 // picked up by other functions in the standard library (e.g. functions in
@@ -457,10 +439,7 @@
 using ::remquof;
 using ::rintf;
 using ::roundf;
-// TODO: remove once variant is supported
-#ifndef _OPENMP
 using ::scalblnf;
-#endif
 using ::scalbnf;
 using ::sinf;
 using ::sinhf;
@@ -478,8 +457,8 @@
 #endif
 } // namespace std
 #endif
+#endif
 
-#undef __NOEXCEPT
 #undef __DEVICE__
 
 #endif
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -140,10 +140,8 @@
 )
 
 set(openmp_wrapper_files
-  openmp_wrappers/math.h
-  openmp_wrappers/cmath
-  openmp_wrappers/__clang_openmp_math.h
   openmp_wrappers/__clang_openmp_math_declares.h
+  openmp_wrappers/cmath
   openmp_wrappers/new
 )
 
Index: clang/lib/AST/ItaniumMangle.cpp
===================================================================
--- clang/lib/AST/ItaniumMangle.cpp
+++ clang/lib/AST/ItaniumMangle.cpp
@@ -2654,18 +2654,22 @@
     Out << 'd';
     break;
   case BuiltinType::LongDouble: {
-    const TargetInfo *TI = getASTContext().getLangOpts().OpenMP &&
-                                   getASTContext().getLangOpts().OpenMPIsDevice
-                               ? getASTContext().getAuxTargetInfo()
-                               : &getASTContext().getTargetInfo();
+    const TargetInfo *TI =
+        getASTContext().getLangOpts().OpenMP &&
+                getASTContext().getLangOpts().OpenMPIsDevice &&
+                getASTContext().getAuxTargetInfo()
+            ? getASTContext().getAuxTargetInfo()
+            : &getASTContext().getTargetInfo();
     Out << TI->getLongDoubleMangling();
     break;
   }
   case BuiltinType::Float128: {
-    const TargetInfo *TI = getASTContext().getLangOpts().OpenMP &&
-                                   getASTContext().getLangOpts().OpenMPIsDevice
-                               ? getASTContext().getAuxTargetInfo()
-                               : &getASTContext().getTargetInfo();
+    const TargetInfo *TI =
+        getASTContext().getLangOpts().OpenMP &&
+                getASTContext().getLangOpts().OpenMPIsDevice &&
+                getASTContext().getAuxTargetInfo()
+            ? getASTContext().getAuxTargetInfo()
+            : &getASTContext().getTargetInfo();
     Out << TI->getFloat128Mangling();
     break;
   }
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -1618,11 +1618,11 @@
   case BuiltinType::Float:      return Target->getFloatFormat();
   case BuiltinType::Double:     return Target->getDoubleFormat();
   case BuiltinType::LongDouble:
-    if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)
+    if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && AuxTarget)
       return AuxTarget->getLongDoubleFormat();
     return Target->getLongDoubleFormat();
   case BuiltinType::Float128:
-    if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)
+    if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && AuxTarget)
       return AuxTarget->getFloat128Format();
     return Target->getFloat128Format();
   }
@@ -1994,7 +1994,7 @@
     case BuiltinType::Float16:
     case BuiltinType::Half:
       if (Target->hasFloat16Type() || !getLangOpts().OpenMP ||
-          !getLangOpts().OpenMPIsDevice) {
+          !getLangOpts().OpenMPIsDevice || !AuxTarget) {
         Width = Target->getHalfWidth();
         Align = Target->getHalfAlign();
       } else {
@@ -2013,7 +2013,7 @@
       Align = Target->getDoubleAlign();
       break;
     case BuiltinType::LongDouble:
-      if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
+      if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && AuxTarget &&
           (Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() ||
            Target->getLongDoubleAlign() != AuxTarget->getLongDoubleAlign())) {
         Width = AuxTarget->getLongDoubleWidth();
@@ -2025,7 +2025,7 @@
       break;
     case BuiltinType::Float128:
       if (Target->hasFloat128Type() || !getLangOpts().OpenMP ||
-          !getLangOpts().OpenMPIsDevice) {
+          !getLangOpts().OpenMPIsDevice || !AuxTarget) {
         Width = Target->getFloat128Width();
         Align = Target->getFloat128Align();
       } else {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75788: [WIP][OpenMP] Reuse CUDA wrappers in `nvptx` target regions.

Reply via email to