https://github.com/darkbuck updated https://github.com/llvm/llvm-project/pull/100637
>From d9de73264bf4d555e7e09a2c2687eae72c1fa19e Mon Sep 17 00:00:00 2001 From: Michael Liao <michael.hl...@gmail.com> Date: Thu, 25 Jul 2024 15:19:15 -0400 Subject: [PATCH 1/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 --- clang/include/clang/Basic/Attr.td | 3 ++- clang/lib/CodeGen/CGCall.cpp | 5 +++++ clang/lib/CodeGen/CGStmt.cpp | 33 ++++++++++++++++++++--------- clang/lib/CodeGen/CodeGenFunction.h | 3 +++ clang/lib/Sema/SemaStmtAttr.cpp | 16 ++++++++++++++ clang/test/SemaOpenCL/convergent.cl | 4 ++-- 6 files changed, 51 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 4825979a974d2..c3bcaa5d5f235 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2042,7 +2042,8 @@ def NoDuplicate : InheritableAttr { def Convergent : InheritableAttr { let Spellings = [Clang<"convergent">]; - let Subjects = SubjectList<[Function]>; + let Subjects = SubjectList<[Function, Stmt], WarnDiag, + "functions and statements">; let Documentation = [ConvergentDocs]; let SimpleHandler = 1; } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 2f3dd5d01fa6c..d73feb4382acd 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5636,6 +5636,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); + // Add call-site convergent attribute if exists. + if (InConvergentAttributedStmt) + Attrs = + Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::Convergent); + // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index aa97f685ac7a9..99559dfe075fb 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -723,6 +723,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { bool nomerge = false; bool noinline = false; bool alwaysinline = false; + bool convergent = false; const CallExpr *musttail = nullptr; for (const auto *A : S.getAttrs()) { @@ -738,6 +739,9 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { case attr::AlwaysInline: alwaysinline = true; break; + case attr::Convergent: + convergent = true; + break; case attr::MustTail: { const Stmt *Sub = S.getSubStmt(); const ReturnStmt *R = cast<ReturnStmt>(Sub); @@ -756,6 +760,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge); SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline); SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline); + SaveAndRestore save_convergent(InConvergentAttributedStmt, convergent); SaveAndRestore save_musttail(MustTailCall, musttail); EmitStmt(S.getSubStmt(), S.getAttrs()); } @@ -2465,7 +2470,8 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, bool HasUnwindClobber, bool ReadOnly, - bool ReadNone, bool NoMerge, const AsmStmt &S, + bool ReadNone, bool NoMerge, bool Convergent, + const AsmStmt &S, const std::vector<llvm::Type *> &ResultRegTypes, const std::vector<llvm::Type *> &ArgElemTypes, CodeGenFunction &CGF, @@ -2475,6 +2481,10 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, if (NoMerge) Result.addFnAttr(llvm::Attribute::NoMerge); + + if (Convergent) + Result.addFnAttr(llvm::Attribute::Convergent); + // Attach readnone and readonly attributes. if (!HasSideEffect) { if (ReadNone) @@ -3037,9 +3047,10 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { if (IsGCCAsmGoto) { CBR = Builder.CreateCallBr(IA, Fallthrough, Transfer, Args); EmitBlock(Fallthrough); - UpdateAsmCallInst(*CBR, HasSideEffect, false, ReadOnly, ReadNone, - InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, - *this, RegResults); + UpdateAsmCallInst(*CBR, HasSideEffect, /*HasUnwindClobber=*/false, ReadOnly, + ReadNone, InNoMergeAttributedStmt, + InConvergentAttributedStmt, S, ResultRegTypes, + ArgElemTypes, *this, RegResults); // Because we are emitting code top to bottom, we don't have enough // information at this point to know precisely whether we have a critical // edge. If we have outputs, split all indirect destinations. @@ -3067,15 +3078,17 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { } } else if (HasUnwindClobber) { llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, ""); - UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone, - InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, - *this, RegResults); + UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/true, + ReadOnly, ReadNone, InNoMergeAttributedStmt, + InConvergentAttributedStmt, S, ResultRegTypes, + ArgElemTypes, *this, RegResults); } else { llvm::CallInst *Result = Builder.CreateCall(IA, Args, getBundlesForFunclet(IA)); - UpdateAsmCallInst(*Result, HasSideEffect, false, ReadOnly, ReadNone, - InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, - *this, RegResults); + UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/false, + ReadOnly, ReadNone, InNoMergeAttributedStmt, + InConvergentAttributedStmt, S, ResultRegTypes, + ArgElemTypes, *this, RegResults); } EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 67e3019565cd0..329120b70fd49 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -612,6 +612,9 @@ class CodeGenFunction : public CodeGenTypeCache { /// True if the current statement has always_inline attribute. bool InAlwaysInlineAttributedStmt = false; + /// True if the current statement has convergent attribute. + bool InConvergentAttributedStmt = false; + // The CallExpr within the current statement that the musttail attribute // applies to. nullptr if there is no 'musttail' on the current statement. const CallExpr *MustTailCall = nullptr; diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 7f452d177c16f..ff743d9f9df20 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -230,6 +230,20 @@ static Attr *handleNoMergeAttr(Sema &S, Stmt *St, const ParsedAttr &A, return ::new (S.Context) NoMergeAttr(S.Context, A); } +static Attr *handleConvergentAttr(Sema &S, Stmt *St, const ParsedAttr &A, + SourceRange Range) { + NoMergeAttr NMA(S.Context, A); + CallExprFinder CEF(S, St); + + if (!CEF.foundCallExpr() && !CEF.foundAsmStmt()) { + S.Diag(St->getBeginLoc(), diag::warn_attribute_ignored_no_calls_in_stmt) + << A; + return nullptr; + } + + return ::new (S.Context) ConvergentAttr(S.Context, A); +} + template <typename OtherAttr, int DiagIdx> static bool CheckStmtInlineAttr(Sema &SemaRef, const Stmt *OrigSt, const Stmt *CurSt, @@ -672,6 +686,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A, return handleCodeAlignAttr(S, St, A); case ParsedAttr::AT_MSConstexpr: return handleMSConstexprAttr(S, St, A, Range); + case ParsedAttr::AT_Convergent: + return handleConvergentAttr(S, St, A, Range); default: // N.B., ClangAttrEmitter.cpp emits a diagnostic helper that ensures a // declaration attribute is not written on a statement, but this code is diff --git a/clang/test/SemaOpenCL/convergent.cl b/clang/test/SemaOpenCL/convergent.cl index 1b7fda41fc0c8..a00e65cea0176 100644 --- a/clang/test/SemaOpenCL/convergent.cl +++ b/clang/test/SemaOpenCL/convergent.cl @@ -4,9 +4,9 @@ void f1(void) __attribute__((convergent)); void f2(void) __attribute__((convergent(1))); // expected-error {{'convergent' attribute takes no arguments}} -void f3(int a __attribute__((convergent))); // expected-warning {{'convergent' attribute only applies to functions}} +void f3(int a __attribute__((convergent))); // expected-warning {{'convergent' attribute only applies to functions and statements}} void f4(void) { - int var1 __attribute__((convergent)); // expected-warning {{'convergent' attribute only applies to functions}} + int var1 __attribute__((convergent)); // expected-warning {{'convergent' attribute only applies to functions and statements}} } >From 72631243e58ce58daa2535675aa5a8893267e7a0 Mon Sep 17 00:00:00 2001 From: Michael Liao <michael.hl...@gmail.com> Date: Thu, 25 Jul 2024 15:34:11 -0400 Subject: [PATCH 2/8] Fix clang-formatting Created using spr 1.3.4 --- clang/lib/CodeGen/CGCall.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index d73feb4382acd..d11fe5795c52a 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5638,8 +5638,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Add call-site convergent attribute if exists. if (InConvergentAttributedStmt) - Attrs = - Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::Convergent); + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::Convergent); // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. >From ef8ebb9e00f6f281a07decd79021dbe48bf1e29f Mon Sep 17 00:00:00 2001 From: Michael Liao <michael.hl...@gmail.com> Date: Thu, 25 Jul 2024 18:54:31 -0400 Subject: [PATCH 3/8] cleanup Created using spr 1.3.4 --- clang/test/CodeGen/convergent.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clang/test/CodeGen/convergent.cpp b/clang/test/CodeGen/convergent.cpp index ae782710f04fb..1d9c07d26bf71 100644 --- a/clang/test/CodeGen/convergent.cpp +++ b/clang/test/CodeGen/convergent.cpp @@ -16,14 +16,12 @@ class B : public A { bool bar(); [[clang::convergent]] void f(bool, bool); -//[[clang::convergent]] void (*fptr)(void); void foo(int i, A *ap, B *bp) { [[clang::convergent]] bar(); [[clang::convergent]] (i = 4, bar()); [[clang::convergent]] (void)(bar()); f(bar(), bar()); - //fptr(); [[clang::convergent]] [] { bar(); bar(); }(); // convergent only applies to the anonymous function call [[clang::convergent]] for (bar(); bar(); bar()) {} [[clang::convergent]] { asm("nop"); } @@ -68,8 +66,6 @@ void something_else_again() { // CHECK: call noundef zeroext i1 @_Z3barv(){{$}} // CHECK: call noundef zeroext i1 @_Z3barv(){{$}} // CHECK: call void @_Z1fbb({{.*}}) #[[ATTR0]] -// XXX: %[[FPTR:.*]] = load ptr, ptr @fptr -// XXX-NEXT: call void %[[FPTR]]() #[[ATTR0]] // CHECK: call void @"_ZZ3fooiP1AP1BENK3$_0clEv"{{.*}} #[[ATTR0]] // CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]] // CHECK-LABEL: for.cond: >From fa48a1230bb0a32aba5ee9229db6ccdf15a6e51b Mon Sep 17 00:00:00 2001 From: Michael Liao <michael.hl...@gmail.com> Date: Thu, 25 Jul 2024 19:13:21 -0400 Subject: [PATCH 4/8] update doc Created using spr 1.3.4 --- clang/include/clang/Basic/AttrDocs.td | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 99738812c8157..cda583a69fa77 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -1357,10 +1357,11 @@ of the condition. def ConvergentDocs : Documentation { let Category = DocCatFunction; let Content = [{ -The ``convergent`` attribute can be placed on a function declaration. It is -translated into the LLVM ``convergent`` attribute, which indicates that the call -instructions of a function with this attribute cannot be made control-dependent -on any additional values. +The ``convergent`` attribute can be placed on a function declaration or a +statement containing call expressions. It is translated into the LLVM +``convergent`` attribute, which indicates that the call instructions of a +function with this attribute cannot be made control-dependent on any additional +values. In languages designed for SPMD/SIMT programming model, e.g. OpenCL or CUDA, the call instructions of a function with this attribute must be executed by @@ -1379,6 +1380,10 @@ Sample usage: // Setting it as a C++11 attribute is also valid in a C++ program. // void convfunc(void) [[clang::convergent]]; + int f() { + [[clang::convergent]] foo(arg); + // The call to 'foo' has attribute 'convergent'. + } }]; } >From e6b536822c0d81bbcde5c3dacb8db3b9108b0a7b Mon Sep 17 00:00:00 2001 From: Michael Liao <michael.hl...@gmail.com> Date: Thu, 25 Jul 2024 19:22:34 -0400 Subject: [PATCH 5/8] autogen codegen test checks Created using spr 1.3.4 --- clang/test/CodeGen/convergent.cpp | 130 +++++++++++++++++------------- 1 file changed, 74 insertions(+), 56 deletions(-) diff --git a/clang/test/CodeGen/convergent.cpp b/clang/test/CodeGen/convergent.cpp index 1d9c07d26bf71..867cb145312ed 100644 --- a/clang/test/CodeGen/convergent.cpp +++ b/clang/test/CodeGen/convergent.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -emit-llvm %s -triple x86_64-unknown-linux-gnu -o - | FileCheck %s class A { @@ -17,6 +18,74 @@ class B : public A { bool bar(); [[clang::convergent]] void f(bool, bool); +// CHECK-LABEL: define dso_local void @_Z3fooiP1AP1B( +// CHECK-SAME: i32 noundef [[I:%.*]], ptr noundef [[AP:%.*]], ptr noundef [[BP:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[BP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// CHECK-NEXT: [[A:%.*]] = alloca [[CLASS_A:%.*]], align 8 +// CHECK-NEXT: [[B:%.*]] = alloca [[CLASS_B:%.*]], align 8 +// CHECK-NEXT: [[NEWA:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 +// CHECK-NEXT: store ptr [[AP]], ptr [[AP_ADDR]], align 8 +// CHECK-NEXT: store ptr [[BP]], ptr [[BP_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6:[0-9]+]] +// CHECK-NEXT: store i32 4, ptr [[I_ADDR]], align 4 +// CHECK-NEXT: [[CALL1:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]] +// CHECK-NEXT: [[CALL2:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]] +// CHECK-NEXT: [[CALL3:%.*]] = call noundef zeroext i1 @_Z3barv() +// CHECK-NEXT: [[CALL4:%.*]] = call noundef zeroext i1 @_Z3barv() +// CHECK-NEXT: call void @_Z1fbb(i1 noundef zeroext [[CALL3]], i1 noundef zeroext [[CALL4]]) #[[ATTR6]] +// CHECK-NEXT: call void @"_ZZ3fooiP1AP1BENK3$_0clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) #[[ATTR6]] +// CHECK-NEXT: [[CALL5:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]] +// CHECK-NEXT: br label %[[FOR_COND:.*]] +// CHECK: [[FOR_COND]]: +// CHECK-NEXT: [[CALL6:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]] +// CHECK-NEXT: br i1 [[CALL6]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK: [[FOR_BODY]]: +// CHECK-NEXT: br label %[[FOR_INC:.*]] +// CHECK: [[FOR_INC]]: +// CHECK-NEXT: [[CALL7:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]] +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// CHECK: [[FOR_END]]: +// CHECK-NEXT: call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR7:[0-9]+]], !srcloc [[META4:![0-9]+]] +// CHECK-NEXT: [[CALL8:%.*]] = call noundef zeroext i1 @_Z3barv() +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AP_ADDR]], align 8 +// CHECK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CHECK-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 2 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8 +// CHECK-NEXT: call void [[TMP1]](ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) #[[ATTR6]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BP_ADDR]], align 8 +// CHECK-NEXT: [[VTABLE9:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK-NEXT: [[VFN10:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE9]], i64 2 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VFN10]], align 8 +// CHECK-NEXT: call void [[TMP3]](ptr noundef nonnull align 8 dereferenceable(8) [[TMP2]]) +// CHECK-NEXT: call void @_ZN1AC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[A]]) #[[ATTR6]] +// CHECK-NEXT: call void @_ZN1A1fEv(ptr noundef nonnull align 8 dereferenceable(8) [[A]]) #[[ATTR6]] +// CHECK-NEXT: call void @_ZN1A1gEv(ptr noundef nonnull align 8 dereferenceable(8) [[A]]) #[[ATTR6]] +// CHECK-NEXT: call void @_ZN1A2f1Ev() #[[ATTR6]] +// CHECK-NEXT: call void @_ZN1BC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[B]]) +// CHECK-NEXT: call void @_ZN1B1gEv(ptr noundef nonnull align 8 dereferenceable(8) [[B]]) +// CHECK-NEXT: [[CALL11:%.*]] = call noalias noundef nonnull ptr @_Znwm(i64 noundef 8) #[[ATTR8:[0-9]+]] +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[CALL11]], i8 0, i64 8, i1 false) +// CHECK-NEXT: call void @_ZN1BC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[CALL11]]) +// CHECK-NEXT: store ptr [[CALL11]], ptr [[NEWA]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[NEWA]], align 8 +// CHECK-NEXT: [[ISNULL:%.*]] = icmp eq ptr [[TMP4]], null +// CHECK-NEXT: br i1 [[ISNULL]], label %[[DELETE_END:.*]], label %[[DELETE_NOTNULL:.*]] +// CHECK: [[DELETE_NOTNULL]]: +// CHECK-NEXT: [[VTABLE12:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK-NEXT: [[VFN13:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE12]], i64 1 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VFN13]], align 8 +// CHECK-NEXT: call void [[TMP5]](ptr noundef nonnull align 8 dereferenceable(8) [[TMP4]]) #[[ATTR7]] +// CHECK-NEXT: br label %[[DELETE_END]] +// CHECK: [[DELETE_END]]: +// CHECK-NEXT: call void @_ZN1BD1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[B]]) #[[ATTR9:[0-9]+]] +// CHECK-NEXT: call void @_ZN1AD1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[A]]) #[[ATTR7]] +// CHECK-NEXT: ret void +// void foo(int i, A *ap, B *bp) { [[clang::convergent]] bar(); [[clang::convergent]] (i = 4, bar()); @@ -41,59 +110,8 @@ void foo(int i, A *ap, B *bp) { A *newA = new B(); delete newA; } - -int g(int i); - -void something() { - g(1); -} - -[[clang::convergent]] int g(int i); - -void something_else() { - g(1); -} - -int g(int i) { return i; } - -void something_else_again() { - g(1); -} - -// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0:[0-9]+]] -// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]] -// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]] -// CHECK: call noundef zeroext i1 @_Z3barv(){{$}} -// CHECK: call noundef zeroext i1 @_Z3barv(){{$}} -// CHECK: call void @_Z1fbb({{.*}}) #[[ATTR0]] -// CHECK: call void @"_ZZ3fooiP1AP1BENK3$_0clEv"{{.*}} #[[ATTR0]] -// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]] -// CHECK-LABEL: for.cond: -// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]] -// CHECK-LABEL: for.inc: -// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]] -// CHECK: call void asm sideeffect "nop"{{.*}} #[[ATTR1:[0-9]+]] -// CHECK: call noundef zeroext i1 @_Z3barv(){{$}} -// CHECK: load ptr, ptr -// CHECK: load ptr, ptr -// CHECK: %[[AG:.*]] = load ptr, ptr -// CHECK-NEXT: call void %[[AG]](ptr {{.*}}) #[[ATTR0]] -// CHECK: load ptr, ptr -// CHECK: load ptr, ptr -// CHECK: %[[BG:.*]] = load ptr, ptr -// CHECK-NEXT: call void %[[BG]](ptr noundef{{.*}} -// CHECK: call void @_ZN1AC1Ev({{.*}}) #[[ATTR0]] -// CHECK: call void @_ZN1A1fEv({{.*}}) #[[ATTR0]] -// CHECK: call void @_ZN1A1gEv({{.*}}) #[[ATTR0]] -// CHECK: call void @_ZN1A2f1Ev() #[[ATTR0]] -// CHECK: call void @_ZN1BC1Ev({{.*}}){{$}} -// CHECK: call void @_ZN1B1gEv({{.*}}){{$}} -// CHECK: call void @_ZN1BC1Ev({{.*}}){{$}} -// CHECK: load ptr, ptr -// CHECK: load ptr, ptr -// CHECK: %[[AG:.*]] = load ptr, ptr -// CHECK-NEXT: call void %[[AG]](ptr {{.*}}) #[[ATTR1]] -// CHECK: call void @_ZN1AD1Ev(ptr {{.*}}) #[[ATTR1]] - -// CHECK-DAG: attributes #[[ATTR0]] = {{{.*}}convergent{{.*}}} -// CHECK-DAG: attributes #[[ATTR1]] = {{{.*}}convergent{{.*}}} +//. +// CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} +// CHECK: [[META3]] = !{!"llvm.loop.mustprogress"} +// CHECK: [[META4]] = !{i64 5689} +//. >From 9bee8623e62edb3e4190abcad443039c2b42ec84 Mon Sep 17 00:00:00 2001 From: Michael Liao <michael.hl...@gmail.com> Date: Thu, 25 Jul 2024 19:44:24 -0400 Subject: [PATCH 6/8] hack to generate attribute def checks Created using spr 1.3.4 --- clang/test/CodeGen/convergent.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/test/CodeGen/convergent.cpp b/clang/test/CodeGen/convergent.cpp index 867cb145312ed..9eb9155e79f39 100644 --- a/clang/test/CodeGen/convergent.cpp +++ b/clang/test/CodeGen/convergent.cpp @@ -111,6 +111,13 @@ void foo(int i, A *ap, B *bp) { delete newA; } //. +// CHECK: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #[[ATTR6]] = { convergent } +// CHECK: attributes #[[ATTR7]] = { convergent nounwind } +// CHECK: attributes #[[ATTR8]] = { builtin allocsize(0) } +// CHECK: attributes #[[ATTR9]] = { nounwind } +//. // CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} // CHECK: [[META3]] = !{!"llvm.loop.mustprogress"} // CHECK: [[META4]] = !{i64 5689} >From b9223f5be05bf0ccae0b0447bf7a81f484af5e75 Mon Sep 17 00:00:00 2001 From: Michael Liao <michael.hl...@gmail.com> Date: Thu, 25 Jul 2024 22:41:30 -0400 Subject: [PATCH 7/8] update test, srcloc num is changed Created using spr 1.3.4 --- clang/test/CodeGen/convergent.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/convergent.cpp b/clang/test/CodeGen/convergent.cpp index 4979380400774..abc6856fc4026 100644 --- a/clang/test/CodeGen/convergent.cpp +++ b/clang/test/CodeGen/convergent.cpp @@ -129,5 +129,5 @@ void foo(int i, A *ap, B *bp) { // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} // CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} // CHECK: [[META3]] = !{!"llvm.loop.mustprogress"} -// CHECK: [[META4]] = !{i64 5689} +// CHECK: [[META4]] = !{i64 5791} //. >From ee4cf7119f5c96801601d37087209374e200f084 Mon Sep 17 00:00:00 2001 From: Michael Liao <michael.hl...@gmail.com> Date: Mon, 29 Jul 2024 13:19:46 -0400 Subject: [PATCH 8/8] add new tests and refine doc Created using spr 1.3.4 --- clang/include/clang/Basic/AttrDocs.td | 19 ++++++----- clang/test/CodeGenCUDA/convergent.cu | 47 ++++++++++++++++++--------- 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 7db5d37ccc070..c743b68f70dfb 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -1385,14 +1385,17 @@ Sample usage: def NoConvergentDocs : Documentation { let Category = DocCatFunction; let Content = [{ -The ``noconvergent`` attribute removes the LLVM ``convergent`` attribute if -present. If a statement is marked ``noconvergent`` and contains calls, -``convergent`` attributes on those calls are removed as well. - -In languages following SPMD/SIMT programming model, e.g. CUDA, mark function -declarations and calls with ``convergent`` by default for the correctness. This -``noconvergent`` attribute could be used to remove that ``convergent`` -attribute when it's safe. +This attribute prevents a function from being treated as convergent, which +means that optimizations can only move calls to that function to +control-equivalent blocks. If a statement is marked as ``noconvergent`` and +contains calls, it also prevents those calls from being treated as convergent. +In other words, those calls are not restricted to only being moved to +control-equivalent blocks. + +In languages following SPMD/SIMT programming model, e.g., CUDA/HIP, function +declarations and calls are treated as convergent by default for correctness. +This ``noconvergent`` attribute is helpful for developers to prevent them from +being treated as convergent when it's safe. .. code-block:: c diff --git a/clang/test/CodeGenCUDA/convergent.cu b/clang/test/CodeGenCUDA/convergent.cu index 2c036c7d5470d..b187f3a8a32d6 100644 --- a/clang/test/CodeGenCUDA/convergent.cu +++ b/clang/test/CodeGenCUDA/convergent.cu @@ -25,15 +25,23 @@ __device__ void foo() {} [[clang::noconvergent]] __device__ void bax() {} __host__ __device__ void baz(); + +__host__ __device__ float aliasf0(int) asm("something"); +__host__ __device__ [[clang::noconvergent]] float aliasf1(int) asm("somethingelse"); + // DEVICE-LABEL: define dso_local void @_Z3barv( // DEVICE-SAME: ) #[[ATTR0]] { // DEVICE-NEXT: [[ENTRY:.*:]] // DEVICE-NEXT: [[X:%.*]] = alloca i32, align 4 -// DEVICE-NEXT: call void @_Z3bazv() #[[ATTR3:[0-9]+]] -// DEVICE-NEXT: [[TMP0:%.*]] = call i32 asm "trap +// DEVICE-NEXT: call void @_Z3bazv() #[[ATTR4:[0-9]+]] +// DEVICE-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l"() #[[ATTR5:[0-9]+]], !srcloc [[META3:![0-9]+]] // DEVICE-NEXT: store i32 [[TMP0]], ptr [[X]], align 4 -// DEVICE-NEXT: call void asm sideeffect "trap", ""() #[[ATTR3]], !srcloc [[META4:![0-9]+]] -// DEVICE-NEXT: call void asm sideeffect "nop", ""() #[[ATTR5:[0-9]+]], !srcloc [[META5:![0-9]+]] +// DEVICE-NEXT: call void asm sideeffect "trap", ""() #[[ATTR4]], !srcloc [[META4:![0-9]+]] +// DEVICE-NEXT: call void asm sideeffect "nop", ""() #[[ATTR6:[0-9]+]], !srcloc [[META5:![0-9]+]] +// DEVICE-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +// DEVICE-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR4]] +// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 +// DEVICE-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR6]] // DEVICE-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z3barv( @@ -41,18 +49,24 @@ __host__ __device__ void baz(); // HOST-NEXT: [[ENTRY:.*:]] // HOST-NEXT: [[X:%.*]] = alloca i32, align 4 // HOST-NEXT: call void @_Z3bazv() -// HOST-NEXT: [[TMP0:%.*]] = call i32 asm "trap +// HOST-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]] // HOST-NEXT: store i32 [[TMP0]], ptr [[X]], align 4 // HOST-NEXT: call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META3:![0-9]+]] // HOST-NEXT: call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3]], !srcloc [[META4:![0-9]+]] +// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +// HOST-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) +// HOST-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 +// HOST-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) // HOST-NEXT: ret void // __host__ __device__ void bar() { baz(); int x; - asm ("trap;" : "=l"(x)); + asm ("trap" : "=l"(x)); asm volatile ("trap"); [[clang::noconvergent]] { asm volatile ("nop"); } + aliasf0(x); + aliasf1(x); } @@ -60,25 +74,26 @@ __host__ __device__ void bar() { // DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } // DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } // DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } -// DEVICE: attributes #[[ATTR3]] = { convergent nounwind } -// DEVICE: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind memory(none) } -// DEVICE: attributes #[[ATTR5]] = { nounwind } +// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } +// DEVICE: attributes #[[ATTR4]] = { convergent nounwind } +// DEVICE: attributes #[[ATTR5]] = { convergent nounwind memory(none) } +// DEVICE: attributes #[[ATTR6]] = { nounwind } //. // HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } // HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// HOST: attributes #[[ATTR2:[0-9]+]] = { nounwind memory(none) } +// HOST: attributes #[[ATTR2]] = { nounwind memory(none) } // HOST: attributes #[[ATTR3]] = { nounwind } //. // DEVICE: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // DEVICE: [[META1:![0-9]+]] = !{i32 4, !"nvvm-reflect-ftz", i32 0} // DEVICE: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -// DEVICE: [[META3:![0-9]+]] = !{i64 2184} -// DEVICE: [[META4]] = !{i64 2220} -// DEVICE: [[META5]] = !{i64 2271} +// DEVICE: [[META3]] = !{i64 3120} +// DEVICE: [[META4]] = !{i64 3155} +// DEVICE: [[META5]] = !{i64 3206} //. // HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -// HOST: [[META2:![0-9]+]] = !{i64 2184} -// HOST: [[META3]] = !{i64 2220} -// HOST: [[META4]] = !{i64 2271} +// HOST: [[META2]] = !{i64 3120} +// HOST: [[META3]] = !{i64 3155} +// HOST: [[META4]] = !{i64 3206} //. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits