tianshilei1992 updated this revision to Diff 317255.
tianshilei1992 added a comment.

Fixed compilation in C mode


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94884/new/

https://reviews.llvm.org/D94884

Files:
  clang/lib/Headers/__clang_cuda_builtin_vars.h
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/test/Headers/nvptx_device_cmath_functions.c
  clang/test/Headers/nvptx_device_cmath_functions.cpp
  clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp
  clang/test/Headers/nvptx_device_math_complex.c
  clang/test/Headers/nvptx_device_math_complex.cpp
  clang/test/Headers/nvptx_device_math_functions.c
  clang/test/Headers/nvptx_device_math_functions.cpp
  clang/test/Headers/nvptx_device_math_functions_cxx17.cpp
  clang/test/Headers/nvptx_device_math_macro.cpp
  clang/test/Headers/nvptx_device_math_modf.cpp
  clang/test/Headers/nvptx_device_math_sin.c
  clang/test/Headers/nvptx_device_math_sin.cpp
  clang/test/Headers/nvptx_device_math_sin_cos.cpp
  clang/test/Headers/nvptx_device_math_sincos.cpp
  clang/test/Headers/openmp_device_math_isnan.cpp
  clang/test/OpenMP/assumes_include_nvptx.cpp
  clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp

Index: clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
===================================================================
--- clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
+++ clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
Index: clang/test/OpenMP/assumes_include_nvptx.cpp
===================================================================
--- clang/test/OpenMP/assumes_include_nvptx.cpp
+++ clang/test/OpenMP/assumes_include_nvptx.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
-// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
Index: clang/test/Headers/openmp_device_math_isnan.cpp
===================================================================
--- clang/test/Headers/openmp_device_math_isnan.cpp
+++ clang/test/Headers/openmp_device_math_isnan.cpp
@@ -1,11 +1,11 @@
 // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN
 // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
-// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN
 // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN
-// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
 // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN
-// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
 // expected-no-diagnostics
 
 #include <cmath>
Index: clang/test/Headers/nvptx_device_math_sincos.cpp
===================================================================
--- clang/test/Headers/nvptx_device_math_sincos.cpp
+++ clang/test/Headers/nvptx_device_math_sincos.cpp
@@ -1,7 +1,7 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers         -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -DCMATH -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers         -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -DCMATH -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
 
 #ifdef CMATH
 #include <cmath>
Index: clang/test/Headers/nvptx_device_math_sin_cos.cpp
===================================================================
--- clang/test/Headers/nvptx_device_math_sin_cos.cpp
+++ clang/test/Headers/nvptx_device_math_sin_cos.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
 
 #include <cmath>
 
Index: clang/test/Headers/nvptx_device_math_sin.cpp
===================================================================
--- clang/test/Headers/nvptx_device_math_sin.cpp
+++ clang/test/Headers/nvptx_device_math_sin.cpp
@@ -1,8 +1,8 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW
 // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
-// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=FAST
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=FAST
 // expected-no-diagnostics
 
 #include <cmath>
Index: clang/test/Headers/nvptx_device_math_sin.c
===================================================================
--- clang/test/Headers/nvptx_device_math_sin.c
+++ clang/test/Headers/nvptx_device_math_sin.c
@@ -1,8 +1,8 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -x c -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW
+// RUN: %clang_cc1 -x c -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW
 // RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
-// RUN: %clang_cc1 -x c -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=FAST
+// RUN: %clang_cc1 -x c -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=FAST
 // expected-no-diagnostics
 
 #include <math.h>
Index: clang/test/Headers/nvptx_device_math_modf.cpp
===================================================================
--- clang/test/Headers/nvptx_device_math_modf.cpp
+++ clang/test/Headers/nvptx_device_math_modf.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
 
 #include <cmath>
 
Index: clang/test/Headers/nvptx_device_math_macro.cpp
===================================================================
--- clang/test/Headers/nvptx_device_math_macro.cpp
+++ clang/test/Headers/nvptx_device_math_macro.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
 // expected-no-diagnostics
 
 #include <cmath>
Index: clang/test/Headers/nvptx_device_math_functions_cxx17.cpp
===================================================================
--- clang/test/Headers/nvptx_device_math_functions_cxx17.cpp
+++ clang/test/Headers/nvptx_device_math_functions_cxx17.cpp
@@ -4,7 +4,7 @@
 // REQUIRES: nvptx-registered-target
 
 // RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s
 
 #include <cstdlib>
 #include <cmath>
Index: clang/test/Headers/nvptx_device_math_functions.cpp
===================================================================
--- clang/test/Headers/nvptx_device_math_functions.cpp
+++ clang/test/Headers/nvptx_device_math_functions.cpp
@@ -4,7 +4,7 @@
 // REQUIRES: nvptx-registered-target
 
 // RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s
 
 #include <cstdlib>
 #include <cmath>
Index: clang/test/Headers/nvptx_device_math_functions.c
===================================================================
--- clang/test/Headers/nvptx_device_math_functions.c
+++ clang/test/Headers/nvptx_device_math_functions.c
@@ -4,9 +4,9 @@
 // REQUIRES: nvptx-registered-target
 
 // RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -x c -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
 // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -x c++ -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
 
 #ifdef __cplusplus
 #include <cstdlib>
Index: clang/test/Headers/nvptx_device_math_complex.cpp
===================================================================
--- clang/test/Headers/nvptx_device_math_complex.cpp
+++ clang/test/Headers/nvptx_device_math_complex.cpp
@@ -1,6 +1,6 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
 // expected-no-diagnostics
 
 #include <cmath>
Index: clang/test/Headers/nvptx_device_math_complex.c
===================================================================
--- clang/test/Headers/nvptx_device_math_complex.c
+++ clang/test/Headers/nvptx_device_math_complex.c
@@ -1,8 +1,8 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -x c -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
 // expected-no-diagnostics
 
 #ifdef __cplusplus
Index: clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp
===================================================================
--- clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp
+++ clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp
@@ -4,7 +4,7 @@
 // REQUIRES: nvptx-registered-target
 
 // RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s
 
 #include <cmath>
 #include <cstdlib>
Index: clang/test/Headers/nvptx_device_cmath_functions.cpp
===================================================================
--- clang/test/Headers/nvptx_device_cmath_functions.cpp
+++ clang/test/Headers/nvptx_device_cmath_functions.cpp
@@ -4,7 +4,7 @@
 // REQUIRES: nvptx-registered-target
 
 // RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s
 
 #include <cmath>
 #include <cstdlib>
Index: clang/test/Headers/nvptx_device_cmath_functions.c
===================================================================
--- clang/test/Headers/nvptx_device_cmath_functions.c
+++ clang/test/Headers/nvptx_device_cmath_functions.c
@@ -4,7 +4,7 @@
 // REQUIRES: nvptx-registered-target
 
 // RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fdeclspec -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s
 
 #include <stdlib.h>
 #include <math.h>
Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
===================================================================
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -30,6 +30,9 @@
 /// Provide definitions for these functions.
 #include <__clang_cuda_device_functions.h>
 
+/// Provide definition for CUDA builtin variables
+#include <__clang_cuda_builtin_vars.h>
+
 #undef __OPENMP_NVPTX__
 #undef __CUDA__
 
Index: clang/lib/Headers/__clang_cuda_builtin_vars.h
===================================================================
--- clang/lib/Headers/__clang_cuda_builtin_vars.h
+++ clang/lib/Headers/__clang_cuda_builtin_vars.h
@@ -27,12 +27,29 @@
 // PTX output:
 //  mov.u32     %r2, %tid.x;
 
+#ifndef __OPENMP_NVPTX__
+#define DEVICE __attribute__((device))
+#else
+#define DEVICE
+#endif
+
+#ifdef __cplusplus
 #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC)                                \
   __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD;      \
   static inline __attribute__((always_inline))                                 \
-      __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) {     \
+      DEVICE unsigned int __fetch_builtin_##FIELD(void) {                      \
+    return INTRINSIC;                                                          \
+  }
+#else
+#define __CUDA_DEVICE_BUILTIN_GETTER(NAME, FIELD, INTRINSIC)                   \
+  static inline __attribute__((always_inline))                                 \
+      DEVICE unsigned int __fetch_builtin_##NAME##_##FIELD(void) {             \
     return INTRINSIC;                                                          \
   }
+#define __CUDA_DEVICE_BUILTIN(NAME, FIELD)                                     \
+  __declspec(                                                                  \
+      property(get = __fetch_builtin_##NAME##_##FIELD)) unsigned int FIELD;
+#endif
 
 #if __cplusplus >= 201103L
 #define __DELETE =delete
@@ -44,65 +61,111 @@
 // also disallows taking address of special variables, so we disable address-of
 // operator as well.
 #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName)                            \
-  __attribute__((device)) TypeName() __DELETE;                                 \
-  __attribute__((device)) TypeName(const TypeName &) __DELETE;                 \
-  __attribute__((device)) void operator=(const TypeName &) const __DELETE;     \
-  __attribute__((device)) TypeName *operator&() const __DELETE
+  DEVICE TypeName() __DELETE;                                                  \
+  DEVICE TypeName(const TypeName &) __DELETE;                                  \
+  DEVICE void operator=(const TypeName &) const __DELETE;                      \
+  DEVICE TypeName *operator&() const __DELETE
+
+#ifndef __cplusplus
+// threadIdx
+__CUDA_DEVICE_BUILTIN_GETTER(threadIdx, x, __nvvm_read_ptx_sreg_tid_x());
+__CUDA_DEVICE_BUILTIN_GETTER(threadIdx, y, __nvvm_read_ptx_sreg_tid_y());
+__CUDA_DEVICE_BUILTIN_GETTER(threadIdx, z, __nvvm_read_ptx_sreg_tid_z());
+// blockIdx
+__CUDA_DEVICE_BUILTIN_GETTER(blockIdx, x, __nvvm_read_ptx_sreg_ctaid_x());
+__CUDA_DEVICE_BUILTIN_GETTER(blockIdx, y, __nvvm_read_ptx_sreg_ctaid_y());
+__CUDA_DEVICE_BUILTIN_GETTER(blockIdx, z, __nvvm_read_ptx_sreg_ctaid_z());
+// blockDim
+__CUDA_DEVICE_BUILTIN_GETTER(blockDim, x, __nvvm_read_ptx_sreg_ntid_x());
+__CUDA_DEVICE_BUILTIN_GETTER(blockDim, y, __nvvm_read_ptx_sreg_ntid_y());
+__CUDA_DEVICE_BUILTIN_GETTER(blockDim, z, __nvvm_read_ptx_sreg_ntid_z());
+// gridDim
+__CUDA_DEVICE_BUILTIN_GETTER(gridDim, x, __nvvm_read_ptx_sreg_nctaid_x());
+__CUDA_DEVICE_BUILTIN_GETTER(gridDim, y, __nvvm_read_ptx_sreg_nctaid_y());
+__CUDA_DEVICE_BUILTIN_GETTER(gridDim, z, __nvvm_read_ptx_sreg_nctaid_z());
+#endif
 
 struct __cuda_builtin_threadIdx_t {
+#ifdef __cplusplus
   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
   // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
   // uint3).  This function is defined after we pull in vector_types.h.
-  __attribute__((device)) operator dim3() const;
-  __attribute__((device)) operator uint3() const;
+  DEVICE operator dim3() const;
+  DEVICE operator uint3() const;
 
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
+#else
+  __CUDA_DEVICE_BUILTIN(threadIdx, x);
+  __CUDA_DEVICE_BUILTIN(threadIdx, y);
+  __CUDA_DEVICE_BUILTIN(threadIdx, z);
+#endif
 };
 
 struct __cuda_builtin_blockIdx_t {
+#ifdef __cplusplus
   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
   // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
   // uint3).  This function is defined after we pull in vector_types.h.
-  __attribute__((device)) operator dim3() const;
-  __attribute__((device)) operator uint3() const;
+  DEVICE operator dim3() const;
+  DEVICE operator uint3() const;
 
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
+#else
+  __CUDA_DEVICE_BUILTIN(blockIdx, x);
+  __CUDA_DEVICE_BUILTIN(blockIdx, y);
+  __CUDA_DEVICE_BUILTIN(blockIdx, z);
+#endif
 };
 
 struct __cuda_builtin_blockDim_t {
+#ifdef __cplusplus
   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
   // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
   // dim3).  This function is defined after we pull in vector_types.h.
-  __attribute__((device)) operator dim3() const;
-  __attribute__((device)) operator uint3() const;
+  DEVICE operator dim3() const;
+  DEVICE operator uint3() const;
 
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
+#else
+  __CUDA_DEVICE_BUILTIN(blockDim, x);
+  __CUDA_DEVICE_BUILTIN(blockDim, y);
+  __CUDA_DEVICE_BUILTIN(blockDim, z);
+#endif
 };
 
 struct __cuda_builtin_gridDim_t {
+#ifdef __cplusplus
   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
   // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
   // dim3).  This function is defined after we pull in vector_types.h.
-  __attribute__((device)) operator dim3() const;
-  __attribute__((device)) operator uint3() const;
+  DEVICE operator dim3() const;
+  DEVICE operator uint3() const;
 
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
+#else
+  __CUDA_DEVICE_BUILTIN(gridDim, x);
+  __CUDA_DEVICE_BUILTIN(gridDim, y);
+  __CUDA_DEVICE_BUILTIN(gridDim, z);
+#endif
 };
 
-#define __CUDA_BUILTIN_VAR                                                     \
-  extern const __attribute__((device)) __attribute__((weak))
+#ifdef __cplusplus
+#define __CUDA_BUILTIN_VAR extern const DEVICE __attribute__((weak))
+#else
+#define __CUDA_BUILTIN_VAR extern const DEVICE __attribute__((weak)) struct
+#endif
 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
 __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;
 __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;
@@ -111,11 +174,15 @@
 // warpSize should translate to read of %WARP_SZ but there's currently no
 // builtin to do so. According to PTX v4.2 docs 'to date, all target
 // architectures have a WARP_SZ value of 32'.
-__attribute__((device)) const int warpSize = 32;
+DEVICE const int warpSize = 32;
 
 #undef __CUDA_DEVICE_BUILTIN
 #undef __CUDA_BUILTIN_VAR
 #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
 #undef __DELETE
+#undef DEVICE
+#ifndef __cplusplus
+#undef __CUDA_DEVICE_BUILTIN_GETTER
+#endif
 
 #endif /* __CUDA_BUILTIN_VARS_H */
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to