Prince781 updated this revision to Diff 215695.
Prince781 added a comment.
Herald added a subscriber: mgrang.
I've updated the patch to initialize, in the proper order, all foreign static
TLS variables and the variables they depend on for initialization. I've also
cleaned up the patch a bit.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D66122/new/
https://reviews.llvm.org/D66122
Files:
clang/lib/CodeGen/CGExpr.cpp
clang/lib/CodeGen/CodeGenFunction.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/CodeGenCXX/cxx11-thread-local.cpp
Index: clang/test/CodeGenCXX/cxx11-thread-local.cpp
===================================================================
--- clang/test/CodeGenCXX/cxx11-thread-local.cpp
+++ clang/test/CodeGenCXX/cxx11-thread-local.cpp
@@ -268,6 +268,33 @@
return this->n;
}
+namespace static_tls_in_lambda {
+ struct X {
+ X() {}
+ };
+
+
+ X (*f())() {
+ static thread_local X x;
+
+ return [] { return x; };
+ }
+
+ auto y = f();
+
+ void g() { y(); }
+
+ void bar(X**, X**, X**);
+ void baz(void());
+ void f2() {
+ thread_local X x;
+ thread_local X* p = &x;
+ thread_local X* q = p;
+ thread_local X* r = q;
+ baz([]{bar(&p, &q, &r);});
+ }
+}
+
namespace {
thread_local int anon_i{1};
}
@@ -303,6 +330,42 @@
// CHECK: store i64 1, i64* @_ZGVN1XIiE1mE
// CHECK: br label
+// CHECK: define internal void @"_ZZN20static_tls_in_lambda1fEvENK3$_1clEv"
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: call void @_ZN20static_tls_in_lambda1XC1Ev(%"struct.static_tls_in_lambda::X"* @_ZZN20static_tls_in_lambda1fEvE1x)
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x, align 1
+
+// CHECK: define internal void @"_ZZN20static_tls_in_lambda2f2EvENK3$_2clEv"
+// init x
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1x
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: call void @_ZN20static_tls_in_lambda1XC1Ev(%"struct.static_tls_in_lambda::X"* @_ZZN20static_tls_in_lambda2f2EvE1x)
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1x, align 1
+// init p
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1p
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: store %"struct.static_tls_in_lambda::X"* @_ZZN20static_tls_in_lambda2f2EvE1x, %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1p
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1p, align 1
+// init q
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1q
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: %[[static_tls_var_prev:.*]] = load %"struct.static_tls_in_lambda::X"*, %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1p, align 8
+// CHECK: store %"struct.static_tls_in_lambda::X"* %[[static_tls_var_prev]], %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1q, align 8
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1q, align 1
+// init r
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1r
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: %[[static_tls_var_prev:.*]] = load %"struct.static_tls_in_lambda::X"*, %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1q, align 8
+// CHECK: store %"struct.static_tls_in_lambda::X"* %[[static_tls_var_prev]], %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1r, align 8
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1r, align 1
+
+
// CHECK: define {{.*}}@[[GLOBAL_INIT:.*]]()
// CHECK: call void @[[C_INIT]]()
// CHECK: call void @[[E_INIT]]()
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -467,6 +467,10 @@
/// should emit cleanups.
bool CurFuncIsThunk = false;
+ /// static thread-local variables we've referenced that were declared in a
+ /// parent function.
+ llvm::SmallSet<const VarDecl *, 32> ForeignStaticTLSVars;
+
/// In ARC, whether we should autorelease the return value.
bool AutoreleaseResult = false;
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -31,6 +31,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Intrinsics.h"
@@ -295,6 +296,37 @@
delete BB;
}
+/// If a variable has an initializer, list all of the
+/// variables that it depends on.
+static llvm::SmallSet<const VarDecl *, 32>
+enumerateVarInitDependencies(const VarDecl *VD) {
+ llvm::SmallSet<const VarDecl *, 32> deps;
+
+ if (const auto *InitExpr = VD->getInit()) {
+ std::deque<const Stmt *> frontier;
+
+ for (const auto *s : InitExpr->children())
+ frontier.emplace_back(s);
+
+ while (!frontier.empty()) {
+ auto x = frontier.front();
+ frontier.pop_front();
+ if (x->getStmtClass() == clang::Stmt::DeclRefExprClass) {
+ if (const auto *V = dyn_cast<VarDecl>(cast<DeclRefExpr>(x)->getDecl())) {
+ deps.insert(V);
+ auto V_Refs = enumerateVarInitDependencies(V);
+ deps.insert(V_Refs.begin(), V_Refs.end());
+ }
+ } else {
+ for (const auto *s : x->children())
+ frontier.emplace_back(s);
+ }
+ }
+ }
+
+ return deps;
+}
+
void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
assert(BreakContinueStack.empty() &&
"mismatched push/pop in break/continue stack!");
@@ -384,6 +416,97 @@
CGBuilderTy(*this, AllocaInsertPt).CreateCall(FrameEscapeFn, EscapeArgs);
}
+ // Emit initializers for static local variables that we referenced that are
+ // declared in another function, which may be uninitialized on entry if this
+ // function may execute on a separate thread. For example, when we're
+ // emitting the lambda in the following code:
+ //
+ // class Object {
+ // int init;
+ // Object() : init(1) {}
+ // };
+ //
+ // main() {
+ // static thread_local Object var;
+ // std::thread([] {
+ // ...emit initializer for var here...
+ // });
+ // }
+ //
+ // or another example:
+ //
+ // main() {
+ // static Object var;
+ // #pragma omp threadprivate(var)
+ // #pragma omp parallel
+ // {
+ // ...emit initializer for var here...
+ // }
+ // }
+ llvm::SmallSet<const VarDecl *, 32> UniqueVarsToInit;
+ llvm::SmallVector<const VarDecl *, 32> OrderedVarInits;
+ llvm::DenseMap<const VarDecl *, llvm::SmallSet<const VarDecl *, 4>> VarInitDependencies;
+
+ for (const VarDecl *VD : ForeignStaticTLSVars) {
+ llvm::SmallVector<const VarDecl *, 4> Frontier = {VD};
+
+ // don't initialize dependencies of CUDA __shared__ var with initializer?
+ bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+ VD->hasAttr<CUDASharedAttr>();
+ while (!isCudaSharedVar && !Frontier.empty()) {
+ auto *Child = Frontier.pop_back_val();
+
+ if (UniqueVarsToInit.find(Child) == UniqueVarsToInit.end()) {
+ for (auto *N : enumerateVarInitDependencies(Child)) {
+ Frontier.push_back(N);
+ VarInitDependencies[Child].insert(N);
+ }
+ UniqueVarsToInit.insert(Child);
+ }
+ }
+ }
+
+ for (const VarDecl *VD : UniqueVarsToInit)
+ OrderedVarInits.push_back(VD);
+
+ llvm::sort(OrderedVarInits.begin(), OrderedVarInits.end(),
+ [&VarInitDependencies](const VarDecl *a, const VarDecl *b) {
+ auto a_deps = VarInitDependencies[a];
+ // A < B iff B \in VarInitDependencies(A)
+ // the order is reversed because our codegen reverses the order of
+ // initializers
+ return a_deps.find(b) != a_deps.end();
+ });
+
+ for (const VarDecl *VD : OrderedVarInits) {
+ // CUDA's local and local static __shared__ variables should not
+ // have any non-empty initializers. This is ensured by Sema.
+ // Whatever initializer such variable may have when it gets here is
+ // a no-op and should not be emitted.
+ bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+ VD->hasAttr<CUDASharedAttr>();
+ // If this value has an initializer, and it's thread-local, emit it.
+ if (VD->getInit() && !isCudaSharedVar) {
+ auto *GV = dyn_cast<llvm::GlobalVariable>(CGM.getStaticLocalDeclAddress(VD));
+ auto IP = Builder.saveAndClearIP();
+ llvm::BasicBlock *BBParent = AllocaInsertPt->getParent();
+ llvm::Instruction *INext = AllocaInsertPt->getNextNonDebugInstruction();
+ llvm::BasicBlock *BBNext = BBParent->splitBasicBlock(INext, BBParent->getName() + ".next");
+
+ INext = AllocaInsertPt->getNextNonDebugInstruction();
+
+ Builder.SetInsertPoint(BBParent);
+ // the global variable shouldn't change, as this function should've
+ // been called first when generating the parent function
+ AddInitializerToStaticVarDecl(*VD, GV);
+ if (INext != BBParent->getTerminator()) {
+ INext->eraseFromParent();
+ Builder.CreateBr(BBNext);
+ }
+ Builder.restoreIP(IP);
+ }
+ }
+
// Remove the AllocaInsertPt instruction, which is just a convenience for us.
llvm::Instruction *Ptr = AllocaInsertPt;
AllocaInsertPt = nullptr;
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2614,7 +2614,12 @@
*VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)),
getContext().getDeclAlign(VD));
- // No other cases for now.
+ // add to ForeignStaticVarDecls if this is a thread-local variable
+ // declared in a different function
+ const Decl *DC = cast<Decl>(VD->getDeclContext());
+ if (DC != CurGD.getDecl() && VD->getTLSKind() == VarDecl::TLS_Dynamic)
+ ForeignStaticTLSVars.insert(VD);
+ // No other cases for now.
} else {
llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits