jlebar updated this revision to Diff 48260.
jlebar added a comment.
Move coded into SetLLVMFunctionAttributes (not ForDefinition).
http://reviews.llvm.org/D17056
Files:
lib/CodeGen/CodeGenModule.cpp
test/CodeGenCUDA/convergent.cu
Index: test/CodeGenCUDA/convergent.cu
===================================================================
--- /dev/null
+++ test/CodeGenCUDA/convergent.cu
@@ -0,0 +1,35 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
+// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN: -disable-llvm-passes -o - %s | \
+// RUN: FileCheck -check-prefix HOST %s
+
+#include "Inputs/cuda.h"
+
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3foov
+__device__ void foo() {}
+
+// HOST: Function Attrs:
+// HOST-NOT: convergent
+// HOST-NEXT: define void @_Z3barv
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3barv
+__host__ __device__ void baz();
+__host__ __device__ void bar() { baz(); }
+
+// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// DEVICE: attributes [[BAZ_ATTR]] = {
+// DEVICE-SAME: convergent
+// DEVICE-SAME: }
+
+// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// HOST: attributes [[BAZ_ATTR]] = {
+// HOST-NOT: convergent
+// NOST-SAME: }
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -813,6 +813,14 @@
false);
F->setAttributes(llvm::AttributeSet::get(getLLVMContext(), AttributeList));
F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
+
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+ // Conservatively, mark all functions in CUDA as convergent (meaning, they
+ // may call an intrinsically convergent op, such as __syncthreads(), and so
+ // can't have certain optimizations applied around them). LLVM will remove
+ // this attribute where it safely can.
+ F->addFnAttr(llvm::Attribute::Convergent);
+ }
}
/// Determines whether the language options require us to model
Index: test/CodeGenCUDA/convergent.cu
===================================================================
--- /dev/null
+++ test/CodeGenCUDA/convergent.cu
@@ -0,0 +1,35 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
+// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN: -disable-llvm-passes -o - %s | \
+// RUN: FileCheck -check-prefix HOST %s
+
+#include "Inputs/cuda.h"
+
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3foov
+__device__ void foo() {}
+
+// HOST: Function Attrs:
+// HOST-NOT: convergent
+// HOST-NEXT: define void @_Z3barv
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3barv
+__host__ __device__ void baz();
+__host__ __device__ void bar() { baz(); }
+
+// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// DEVICE: attributes [[BAZ_ATTR]] = {
+// DEVICE-SAME: convergent
+// DEVICE-SAME: }
+
+// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// HOST: attributes [[BAZ_ATTR]] = {
+// HOST-NOT: convergent
+// NOST-SAME: }
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -813,6 +813,14 @@
false);
F->setAttributes(llvm::AttributeSet::get(getLLVMContext(), AttributeList));
F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
+
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+ // Conservatively, mark all functions in CUDA as convergent (meaning, they
+ // may call an intrinsically convergent op, such as __syncthreads(), and so
+ // can't have certain optimizations applied around them). LLVM will remove
+ // this attribute where it safely can.
+ F->addFnAttr(llvm::Attribute::Convergent);
+ }
}
/// Determines whether the language options require us to model
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits