Author: darkbuck Date: 2024-07-31T11:30:48-04:00 New Revision: fa842970027b6d2f0160ad42fa82a872bf8d8600
URL: https://github.com/llvm/llvm-project/commit/fa842970027b6d2f0160ad42fa82a872bf8d8600 DIFF: https://github.com/llvm/llvm-project/commit/fa842970027b6d2f0160ad42fa82a872bf8d8600.diff LOG: [clang][CUDA] Add 'noconvergent' function and statement attribute - For languages following SPMD/SIMT programming model, functions and call sites are marked 'convergent' by default. 'noconvergent' is added in this patch to allow developers to remove that 'convergent' attribute when it's safe. Reviewers: nhaehnle, Sirraide, yxsamliu, Artem-B, ilovepi, jayfoad, ssahasra, arsenm Reviewed By: arsenm Pull Request: https://github.com/llvm/llvm-project/pull/100637 Added: clang/test/SemaCUDA/attr-noconvergent.cu Modified: clang/include/clang/Basic/Attr.td clang/include/clang/Basic/AttrDocs.td clang/lib/CodeGen/CGCall.cpp clang/lib/CodeGen/CGStmt.cpp clang/lib/CodeGen/CodeGenFunction.h clang/lib/Sema/SemaStmtAttr.cpp clang/test/CodeGenCUDA/convergent.cu clang/test/Misc/pragma-attribute-supported-attributes-list.test Removed: ################################################################################ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 46d0a66d59c37..8ac2079099c85 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2050,6 +2050,17 @@ def Convergent : InheritableAttr { let SimpleHandler = 1; } +def NoConvergent : InheritableAttr { + let Spellings = [Clang<"noconvergent">, Declspec<"noconvergent">]; + let Subjects = SubjectList<[Function, Stmt], WarnDiag, + "functions and statements">; + let LangOpts = [CUDA]; + let Documentation = [NoConvergentDocs]; + let SimpleHandler = 1; +} + +def : MutualExclusions<[Convergent, NoConvergent]>; + def NoInline : DeclOrStmtAttr { let Spellings = [CustomKeyword<"__noinline__">, GCC<"noinline">, CXX11<"clang", "noinline">, C23<"clang", "noinline">, diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 4b8d520d73893..94c284fc73158 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -1382,6 +1382,34 @@ Sample usage: }]; } +def NoConvergentDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +This attribute prevents a function from being treated as convergent, which +means that optimizations can only move calls to that function to +control-equivalent blocks. If a statement is marked as ``noconvergent`` and +contains calls, it also prevents those calls from being treated as convergent. +In other words, those calls are not restricted to only being moved to +control-equivalent blocks. + +In languages following SPMD/SIMT programming model, e.g., CUDA/HIP, function +declarations and calls are treated as convergent by default for correctness. +This ``noconvergent`` attribute is helpful for developers to prevent them from +being treated as convergent when it's safe. + +.. code-block:: c + + __device__ float bar(float); + __device__ float foo(float) __attribute__((noconvergent)) {} + + __device__ int example(void) { + float x; + [[clang::noconvergent]] x = bar(x); + } + + }]; +} + def NoSplitStackDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 2f3dd5d01fa6c..ee6e8e0905723 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2522,6 +2522,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, } } } + // Remove 'convergent' if requested. + if (TargetDecl->hasAttr<NoConvergentAttr>()) + FuncAttrs.removeAttribute(llvm::Attribute::Convergent); } // Add "sample-profile-suffix-elision-policy" attribute for internal linkage @@ -5636,6 +5639,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); + // Remove call-site convergent attribute if requested. + if (InNoConvergentAttributedStmt) + Attrs = + Attrs.removeFnAttribute(getLLVMContext(), llvm::Attribute::Convergent); + // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index e16aa3cdd5506..30b6fce5d016a 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -723,6 +723,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { bool nomerge = false; bool noinline = false; bool alwaysinline = false; + bool noconvergent = false; const CallExpr *musttail = nullptr; for (const auto *A : S.getAttrs()) { @@ -738,6 +739,9 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { case attr::AlwaysInline: alwaysinline = true; break; + case attr::NoConvergent: + noconvergent = true; + break; case attr::MustTail: { const Stmt *Sub = S.getSubStmt(); const ReturnStmt *R = cast<ReturnStmt>(Sub); @@ -756,6 +760,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge); SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline); SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline); + SaveAndRestore save_noconvergent(InNoConvergentAttributedStmt, noconvergent); SaveAndRestore save_musttail(MustTailCall, musttail); EmitStmt(S.getSubStmt(), S.getAttrs()); } @@ -2465,7 +2470,8 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, bool HasUnwindClobber, bool ReadOnly, - bool ReadNone, bool NoMerge, const AsmStmt &S, + bool ReadNone, bool NoMerge, bool NoConvergent, + const AsmStmt &S, const std::vector<llvm::Type *> &ResultRegTypes, const std::vector<llvm::Type *> &ArgElemTypes, CodeGenFunction &CGF, @@ -2506,11 +2512,11 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, llvm::ConstantAsMetadata::get(Loc))); } - if (CGF.getLangOpts().assumeFunctionsAreConvergent()) + if (!NoConvergent && CGF.getLangOpts().assumeFunctionsAreConvergent()) // Conservatively, mark all inline asm blocks in CUDA or OpenCL as // convergent (meaning, they may call an intrinsically convergent op, such // as bar.sync, and so can't have certain optimizations applied around - // them). + // them) unless it's explicitly marked 'noconvergent'. Result.addFnAttr(llvm::Attribute::Convergent); // Extract all of the register value results from the asm. if (ResultRegTypes.size() == 1) { @@ -3040,9 +3046,10 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { if (IsGCCAsmGoto) { CBR = Builder.CreateCallBr(IA, Fallthrough, Transfer, Args); EmitBlock(Fallthrough); - UpdateAsmCallInst(*CBR, HasSideEffect, false, ReadOnly, ReadNone, - InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, - *this, RegResults); + UpdateAsmCallInst(*CBR, HasSideEffect, /*HasUnwindClobber=*/false, ReadOnly, + ReadNone, InNoMergeAttributedStmt, + InNoConvergentAttributedStmt, S, ResultRegTypes, + ArgElemTypes, *this, RegResults); // Because we are emitting code top to bottom, we don't have enough // information at this point to know precisely whether we have a critical // edge. If we have outputs, split all indirect destinations. @@ -3070,15 +3077,17 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { } } else if (HasUnwindClobber) { llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, ""); - UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone, - InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, - *this, RegResults); + UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/true, + ReadOnly, ReadNone, InNoMergeAttributedStmt, + InNoConvergentAttributedStmt, S, ResultRegTypes, + ArgElemTypes, *this, RegResults); } else { llvm::CallInst *Result = Builder.CreateCall(IA, Args, getBundlesForFunclet(IA)); - UpdateAsmCallInst(*Result, HasSideEffect, false, ReadOnly, ReadNone, - InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, - *this, RegResults); + UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/false, + ReadOnly, ReadNone, InNoMergeAttributedStmt, + InNoConvergentAttributedStmt, S, ResultRegTypes, + ArgElemTypes, *this, RegResults); } EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 89cc819c43bb5..1911fbac100c5 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -612,6 +612,9 @@ class CodeGenFunction : public CodeGenTypeCache { /// True if the current statement has always_inline attribute. bool InAlwaysInlineAttributedStmt = false; + /// True if the current statement has noconvergent attribute. + bool InNoConvergentAttributedStmt = false; + // The CallExpr within the current statement that the musttail attribute // applies to. nullptr if there is no 'musttail' on the current statement. const CallExpr *MustTailCall = nullptr; diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 3cf742b6a672d..b9b3b4063bc38 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -229,6 +229,19 @@ static Attr *handleNoMergeAttr(Sema &S, Stmt *St, const ParsedAttr &A, return ::new (S.Context) NoMergeAttr(S.Context, A); } +static Attr *handleNoConvergentAttr(Sema &S, Stmt *St, const ParsedAttr &A, + SourceRange Range) { + CallExprFinder CEF(S, St); + + if (!CEF.foundCallExpr() && !CEF.foundAsmStmt()) { + S.Diag(St->getBeginLoc(), diag::warn_attribute_ignored_no_calls_in_stmt) + << A; + return nullptr; + } + + return ::new (S.Context) NoConvergentAttr(S.Context, A); +} + template <typename OtherAttr, int DiagIdx> static bool CheckStmtInlineAttr(Sema &SemaRef, const Stmt *OrigSt, const Stmt *CurSt, @@ -664,6 +677,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A, return handleCodeAlignAttr(S, St, A); case ParsedAttr::AT_MSConstexpr: return handleMSConstexprAttr(S, St, A, Range); + case ParsedAttr::AT_NoConvergent: + return handleNoConvergentAttr(S, St, A, Range); default: // N.B., ClangAttrEmitter.cpp emits a diagnostic helper that ensures a // declaration attribute is not written on a statement, but this code is diff --git a/clang/test/CodeGenCUDA/convergent.cu b/clang/test/CodeGenCUDA/convergent.cu index 5d98d4ba69262..b187f3a8a32d6 100644 --- a/clang/test/CodeGenCUDA/convergent.cu +++ b/clang/test/CodeGenCUDA/convergent.cu @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target @@ -10,36 +11,89 @@ #include "Inputs/cuda.h" -// DEVICE: Function Attrs: -// DEVICE-SAME: convergent -// DEVICE-NEXT: define{{.*}} void @_Z3foov +// DEVICE-LABEL: define dso_local void @_Z3foov( +// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: ret void +// __device__ void foo() {} +// DEVICE-LABEL: define dso_local void @_Z3baxv( +// DEVICE-SAME: ) #[[ATTR1:[0-9]+]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: ret void +// +[[clang::noconvergent]] __device__ void bax() {} -// HOST: Function Attrs: -// HOST-NOT: convergent -// HOST-NEXT: define{{.*}} void @_Z3barv -// DEVICE: Function Attrs: -// DEVICE-SAME: convergent -// DEVICE-NEXT: define{{.*}} void @_Z3barv __host__ __device__ void baz(); + +__host__ __device__ float aliasf0(int) asm("something"); +__host__ __device__ [[clang::noconvergent]] float aliasf1(int) asm("somethingelse"); + +// DEVICE-LABEL: define dso_local void @_Z3barv( +// DEVICE-SAME: ) #[[ATTR0]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[X:%.*]] = alloca i32, align 4 +// DEVICE-NEXT: call void @_Z3bazv() #[[ATTR4:[0-9]+]] +// DEVICE-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l"() #[[ATTR5:[0-9]+]], !srcloc [[META3:![0-9]+]] +// DEVICE-NEXT: store i32 [[TMP0]], ptr [[X]], align 4 +// DEVICE-NEXT: call void asm sideeffect "trap", ""() #[[ATTR4]], !srcloc [[META4:![0-9]+]] +// DEVICE-NEXT: call void asm sideeffect "nop", ""() #[[ATTR6:[0-9]+]], !srcloc [[META5:![0-9]+]] +// DEVICE-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +// DEVICE-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR4]] +// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 +// DEVICE-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR6]] +// DEVICE-NEXT: ret void +// +// HOST-LABEL: define dso_local void @_Z3barv( +// HOST-SAME: ) #[[ATTR0:[0-9]+]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[X:%.*]] = alloca i32, align 4 +// HOST-NEXT: call void @_Z3bazv() +// HOST-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]] +// HOST-NEXT: store i32 [[TMP0]], ptr [[X]], align 4 +// HOST-NEXT: call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META3:![0-9]+]] +// HOST-NEXT: call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3]], !srcloc [[META4:![0-9]+]] +// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +// HOST-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) +// HOST-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 +// HOST-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) +// HOST-NEXT: ret void +// __host__ __device__ void bar() { - // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] baz(); - // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]] int x; - asm ("trap;" : "=l"(x)); - // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]] - asm volatile ("trap;"); + asm ("trap" : "=l"(x)); + asm volatile ("trap"); + [[clang::noconvergent]] { asm volatile ("nop"); } + aliasf0(x); + aliasf1(x); } -// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] -// DEVICE: attributes [[BAZ_ATTR]] = { -// DEVICE-SAME: convergent -// DEVICE-SAME: } -// DEVICE-DAG: attributes [[CALL_ATTR]] = { convergent -// DEVICE-DAG: attributes [[ASM_ATTR]] = { convergent - -// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] -// HOST: attributes [[BAZ_ATTR]] = { -// HOST-NOT: convergent -// HOST-SAME: } + +//. +// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } +// DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } +// DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } +// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } +// DEVICE: attributes #[[ATTR4]] = { convergent nounwind } +// DEVICE: attributes #[[ATTR5]] = { convergent nounwind memory(none) } +// DEVICE: attributes #[[ATTR6]] = { nounwind } +//. +// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// HOST: attributes #[[ATTR2]] = { nounwind memory(none) } +// HOST: attributes #[[ATTR3]] = { nounwind } +//. +// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// DEVICE: [[META1:![0-9]+]] = !{i32 4, !"nvvm-reflect-ftz", i32 0} +// DEVICE: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// DEVICE: [[META3]] = !{i64 3120} +// DEVICE: [[META4]] = !{i64 3155} +// DEVICE: [[META5]] = !{i64 3206} +//. +// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// HOST: [[META2]] = !{i64 3120} +// HOST: [[META3]] = !{i64 3155} +// HOST: [[META4]] = !{i64 3206} +//. diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index e082db698ef0c..0f7dcab7c4248 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -109,6 +109,7 @@ // CHECK-NEXT: Naked (SubjectMatchRule_function) // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function) // CHECK-NEXT: NoCommon (SubjectMatchRule_variable) +// CHECK-NEXT: NoConvergent (SubjectMatchRule_function) // CHECK-NEXT: NoDebug (SubjectMatchRule_type_alias, SubjectMatchRule_hasType_functionType, SubjectMatchRule_objc_method, SubjectMatchRule_variable_not_is_parameter) // CHECK-NEXT: NoDestroy (SubjectMatchRule_variable) // CHECK-NEXT: NoDuplicate (SubjectMatchRule_function) diff --git a/clang/test/SemaCUDA/attr-noconvergent.cu b/clang/test/SemaCUDA/attr-noconvergent.cu new file mode 100644 index 0000000000000..0c051fdde4379 --- /dev/null +++ b/clang/test/SemaCUDA/attr-noconvergent.cu @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s + +#include "Inputs/cuda.h" + +__device__ float f0(float) __attribute__((noconvergent)); +__device__ __attribute__((noconvergent)) float f1(float); +[[clang::noconvergent]] __device__ float f2(float); + +__device__ [[clang::noconvergent(1)]] float f3(float); +// expected-error@-1 {{'noconvergent' attribute takes no arguments}} + +__device__ [[clang::noconvergent]] float g0; +// expected-warning@-1 {{'noconvergent' attribute only applies to functions and statements}} + +__device__ __attribute__((convergent)) __attribute__((noconvergent)) float f4(float); +// expected-error@-1 {{'noconvergent' and 'convergent' attributes are not compatible}} +// expected-note@-2 {{conflicting attribute is here}} + +__device__ [[clang::noconvergent]] float f5(float); +__device__ [[clang::convergent]] float f5(float); +// expected-error@-1 {{'convergent' and 'noconvergent' attributes are not compatible}} +// expected-note@-3 {{conflicting attribute is here}} + +__device__ float f5(float x) { + [[clang::noconvergent]] float y; +// expected-warning@-1 {{'noconvergent' attribute only applies to functions and statements}} + + float z; + + [[clang::noconvergent]] z = 1; +// expected-warning@-1 {{'noconvergent' attribute is ignored because there exists no call expression inside the statement}} + + [[clang::noconvergent]] z = f0(x); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits