lxfind updated this revision to Diff 310575.
lxfind added a comment.
Herald added subscribers: nikic, kerbowa, jvesely.
Fix all failing tests
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D92661/new/
https://reviews.llvm.org/D92661
Files:
clang/lib/CodeGen/CGExpr.cpp
clang/lib/CodeGen/ItaniumCXXABI.cpp
clang/test/CodeGen/lto-newpm-pipeline.c
clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp
clang/test/CodeGenCoroutines/coro-tls.cpp
llvm/include/llvm/IR/IRBuilder.h
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/InitializePasses.h
llvm/include/llvm/Transforms/Scalar.h
llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h
llvm/lib/IR/IRBuilder.cpp
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
llvm/lib/Transforms/Scalar/CMakeLists.txt
llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp
llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
llvm/test/Other/new-pass-manager.ll
llvm/test/Other/new-pm-O0-defaults.ll
llvm/test/Other/new-pm-defaults.ll
llvm/test/Other/opt-O2-pipeline.ll
llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
llvm/test/Other/opt-O3-pipeline.ll
llvm/test/Other/opt-Os-pipeline.ll
llvm/test/Other/pass-pipelines.ll
Index: llvm/test/Other/pass-pipelines.ll
===================================================================
--- llvm/test/Other/pass-pipelines.ll
+++ llvm/test/Other/pass-pipelines.ll
@@ -72,6 +72,7 @@
; Next we break out of the main Function passes inside the CGSCC pipeline with
; a barrier pass.
; CHECK-O2: A No-Op Barrier Pass
+; CHECK-O2-NEXT: Lower ThreadLocal Intrinsics
; CHECK-O2-NEXT: Eliminate Available Externally
; Inferring function attribute should be right after the CGSCC pipeline, before
; any other optimizations/analyses.
Index: llvm/test/Other/opt-Os-pipeline.ll
===================================================================
--- llvm/test/Other/opt-Os-pipeline.ll
+++ llvm/test/Other/opt-Os-pipeline.ll
@@ -173,6 +173,7 @@
; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Combine redundant instructions
; CHECK-NEXT: A No-Op Barrier Pass
+; CHECK-NEXT: Lower ThreadLocal Intrinsics
; CHECK-NEXT: Eliminate Available Externally Globals
; CHECK-NEXT: CallGraph Construction
; CHECK-NEXT: Deduce function attributes in RPO
Index: llvm/test/Other/opt-O3-pipeline.ll
===================================================================
--- llvm/test/Other/opt-O3-pipeline.ll
+++ llvm/test/Other/opt-O3-pipeline.ll
@@ -192,6 +192,7 @@
; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Combine redundant instructions
; CHECK-NEXT: A No-Op Barrier Pass
+; CHECK-NEXT: Lower ThreadLocal Intrinsics
; CHECK-NEXT: Eliminate Available Externally Globals
; CHECK-NEXT: CallGraph Construction
; CHECK-NEXT: Deduce function attributes in RPO
Index: llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
===================================================================
--- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
+++ llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
@@ -192,6 +192,7 @@
; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Combine redundant instructions
; CHECK-NEXT: A No-Op Barrier Pass
+; CHECK-NEXT: Lower ThreadLocal Intrinsics
; CHECK-NEXT: Eliminate Available Externally Globals
; CHECK-NEXT: CallGraph Construction
; CHECK-NEXT: Deduce function attributes in RPO
Index: llvm/test/Other/opt-O2-pipeline.ll
===================================================================
--- llvm/test/Other/opt-O2-pipeline.ll
+++ llvm/test/Other/opt-O2-pipeline.ll
@@ -187,6 +187,7 @@
; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Combine redundant instructions
; CHECK-NEXT: A No-Op Barrier Pass
+; CHECK-NEXT: Lower ThreadLocal Intrinsics
; CHECK-NEXT: Eliminate Available Externally Globals
; CHECK-NEXT: CallGraph Construction
; CHECK-NEXT: Deduce function attributes in RPO
Index: llvm/test/Other/new-pm-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-defaults.ll
+++ llvm/test/Other/new-pm-defaults.ll
@@ -209,6 +209,7 @@
; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass
; CHECK-O-NEXT: Finished CGSCC pass manager run.
; CHECK-O-NEXT: Finished llvm::Module pass manager run.
+; CHECK-O-NEXT: Running pass: LowerThreadLocalIntrinsicPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
; CHECK-DEFAULT-NEXT: Running pass: EliminateAvailableExternallyPass
Index: llvm/test/Other/new-pm-O0-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-O0-defaults.ll
+++ llvm/test/Other/new-pm-O0-defaults.ll
@@ -32,6 +32,7 @@
; CHECK-DEFAULT-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-MATRIX-NEXT: Running pass: LowerMatrixIntrinsicsPass
; CHECK-MATRIX-NEXT: Running analysis: TargetIRAnalysis
+; CHECK-DEFAULT-NEXT: Running pass: LowerThreadLocalIntrinsicPass
; CHECK-PRE-LINK-NEXT: Running pass: CanonicalizeAliasesPass
; CHECK-PRE-LINK-NEXT: Running pass: NameAnonGlobalPass
; CHECK-THINLTO-NEXT: Running pass: Annotation2MetadataPass
Index: llvm/test/Other/new-pass-manager.ll
===================================================================
--- llvm/test/Other/new-pass-manager.ll
+++ llvm/test/Other/new-pass-manager.ll
@@ -366,6 +366,7 @@
; CHECK-EXT-NEXT: Starting llvm::Function pass manager run.
; CHECK-EXT-NEXT: Running pass: {{.*}}Bye
; CHECK-EXT-NEXT: Finished llvm::Function pass manager run.
+; CHECK-O0-NEXT: Running pass: LowerThreadLocalIntrinsicPass
; CHECK-O0-NEXT: Finished llvm::Module pass manager run
; RUN: opt -disable-output -disable-verify -debug-pass-manager \
Index: llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
+++ llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
@@ -192,6 +192,7 @@
; GCN-O1-NEXT: Optimization Remark Emitter
; GCN-O1-NEXT: Combine redundant instructions
; GCN-O1-NEXT: A No-Op Barrier Pass
+; GCN-O1-NEXT: Lower ThreadLocal Intrinsics
; GCN-O1-NEXT: CallGraph Construction
; GCN-O1-NEXT: Deduce function attributes in RPO
; GCN-O1-NEXT: Global Variable Optimizer
@@ -543,6 +544,7 @@
; GCN-O2-NEXT: Optimization Remark Emitter
; GCN-O2-NEXT: Combine redundant instructions
; GCN-O2-NEXT: A No-Op Barrier Pass
+; GCN-O2-NEXT: Lower ThreadLocal Intrinsics
; GCN-O2-NEXT: Eliminate Available Externally Globals
; GCN-O2-NEXT: CallGraph Construction
; GCN-O2-NEXT: Deduce function attributes in RPO
@@ -907,6 +909,7 @@
; GCN-O3-NEXT: Optimization Remark Emitter
; GCN-O3-NEXT: Combine redundant instructions
; GCN-O3-NEXT: A No-Op Barrier Pass
+; GCN-O3-NEXT: Lower ThreadLocal Intrinsics
; GCN-O3-NEXT: Eliminate Available Externally Globals
; GCN-O3-NEXT: CallGraph Construction
; GCN-O3-NEXT: Deduce function attributes in RPO
Index: llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp
@@ -0,0 +1,75 @@
+//===- LowerThreadLocalIntrinsic.cpp - Lower the threadlocal intrinsic
+//---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.threadlocal intrinsic to a direct reference to the
+// thread local variable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static bool lowerThreadLocalIntrinsic(Module &M) {
+ // Check if we can cheaply rule out the possibility of not having any work to
+ // do.
+ Function *ThreadLocalDecl =
+ M.getFunction(Intrinsic::getName(Intrinsic::threadlocal));
+ if (!ThreadLocalDecl || ThreadLocalDecl->use_empty())
+ return false;
+
+ for (auto Itr = ThreadLocalDecl->users().begin(),
+ E = ThreadLocalDecl->users().end();
+ Itr != E;) {
+ Instruction *I = cast<Instruction>(*Itr);
+ ++Itr;
+ I->replaceAllUsesWith(I->getOperand(0));
+ I->eraseFromParent();
+ }
+
+ ThreadLocalDecl->eraseFromParent();
+
+ return true;
+}
+
+PreservedAnalyses
+LowerThreadLocalIntrinsicPass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (lowerThreadLocalIntrinsic(M))
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+}
+
+namespace {
+struct LowerThreadLocalIntrinsicLegacyPass : public ModulePass {
+ static char ID;
+ LowerThreadLocalIntrinsicLegacyPass() : ModulePass(ID) {
+ initializeLowerThreadLocalIntrinsicLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+};
+} // namespace
+
+bool LowerThreadLocalIntrinsicLegacyPass::runOnModule(Module &M) {
+ return lowerThreadLocalIntrinsic(M);
+}
+
+char LowerThreadLocalIntrinsicLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerThreadLocalIntrinsicLegacyPass,
+ "lower-threadlocal-intrinsic", "Lower ThreadLocal Intrinsics",
+ false, false)
+
+Pass *llvm::createLowerThreadLocalIntrinsicPass() {
+ return new LowerThreadLocalIntrinsicLegacyPass();
+}
Index: llvm/lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -50,6 +50,7 @@
LowerExpectIntrinsic.cpp
LowerGuardIntrinsic.cpp
LowerMatrixIntrinsics.cpp
+ LowerThreadLocalIntrinsic.cpp
LowerWidenableCondition.cpp
MakeGuardsExplicit.cpp
MemCpyOptimizer.cpp
Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -558,6 +558,7 @@
}
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+ MPM.add(createLowerThreadLocalIntrinsicPass());
if (PrepareForLTO || PrepareForThinLTO) {
MPM.add(createCanonicalizeAliasesPass());
@@ -669,6 +670,7 @@
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ MPM.add(createLowerThreadLocalIntrinsicPass());
if (RunPartialInlining)
MPM.add(createPartialInliningPass());
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -181,6 +181,7 @@
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
+#include "llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
@@ -1385,6 +1386,8 @@
// Add the core simplification pipeline.
MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None));
+ MPM.addPass(LowerThreadLocalIntrinsicPass());
+
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
@@ -1836,6 +1839,8 @@
MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
}
+ MPM.addPass(LowerThreadLocalIntrinsicPass());
+
for (auto &C : OptimizerLastEPCallbacks)
C(MPM, Level);
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -452,6 +452,12 @@
return createCallHelper(TheFn, Ops, this);
}
+CallInst *IRBuilderBase::CreateThreadLocal(Value *Ptr) {
+ return CreateIntrinsic(
+ llvm::Intrinsic::threadlocal, llvm::None,
+ {CreatePointerBitCastOrAddrSpaceCast(Ptr, getInt8PtrTy())});
+}
+
CallInst *
IRBuilderBase::CreateAssumption(Value *Cond,
ArrayRef<OperandBundleDef> OpBundles) {
Index: llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h
@@ -0,0 +1,29 @@
+//===--- LowerThreadLocalIntrinsic.h - Lower the threadlocal intrinsic
+//---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.threadlocal intrinsic to a direct reference to the
+// thread local variable.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct LowerThreadLocalIntrinsicPass
+ : PassInfoMixin<LowerThreadLocalIntrinsicPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H
Index: llvm/include/llvm/Transforms/Scalar.h
===================================================================
--- llvm/include/llvm/Transforms/Scalar.h
+++ llvm/include/llvm/Transforms/Scalar.h
@@ -383,6 +383,13 @@
//
Pass *createLowerMatrixIntrinsicsMinimalPass();
+//===----------------------------------------------------------------------===//
+//
+// createLowerThreadLocalIntrinsic - Lower threadlocal intrinsics to direct
+// reference of the thread_local variable.
+//
+Pass *createLowerThreadLocalIntrinsicPass();
+
//===----------------------------------------------------------------------===//
//
// LowerWidenableCondition - Lower widenable condition to i1 true.
Index: llvm/include/llvm/InitializePasses.h
===================================================================
--- llvm/include/llvm/InitializePasses.h
+++ llvm/include/llvm/InitializePasses.h
@@ -268,6 +268,7 @@
void initializeLowerIntrinsicsPass(PassRegistry&);
void initializeLowerInvokeLegacyPassPass(PassRegistry&);
void initializeLowerSwitchLegacyPassPass(PassRegistry &);
+void initializeLowerThreadLocalIntrinsicLegacyPassPass(PassRegistry &);
void initializeLowerTypeTestsPass(PassRegistry&);
void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &);
void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &);
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1306,6 +1306,10 @@
def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+
+// Intrinsic to obtain the address of a thread_local variable.
+def int_threadlocal : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty]>;
+
//===---------------- Vector Predication Intrinsics --------------===//
// Speculatable Binary operators
Index: llvm/include/llvm/IR/IRBuilder.h
===================================================================
--- llvm/include/llvm/IR/IRBuilder.h
+++ llvm/include/llvm/IR/IRBuilder.h
@@ -751,6 +751,9 @@
/// If the pointer isn't i8* it will be converted.
CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr);
+ /// Create a threadlocal intrinsic.
+ CallInst *CreateThreadLocal(Value *Ptr);
+
/// Create a call to Masked Load intrinsic
LLVM_ATTRIBUTE_DEPRECATED(
CallInst *CreateMaskedLoad(Value *Ptr, unsigned Alignment, Value *Mask,
Index: clang/test/CodeGenCoroutines/coro-tls.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCoroutines/coro-tls.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O3 -emit-llvm %s -o - | FileCheck %s
+
+#include "Inputs/coroutine.h"
+
+namespace coro = std::experimental::coroutines_v1;
+
+struct awaitable {
+ bool await_ready() { return false; }
+ void await_suspend(coro::coroutine_handle<> h);
+ void await_resume() {}
+};
+awaitable switch_to_new_thread();
+
+struct task {
+ struct promise_type {
+ task get_return_object() { return {}; }
+ coro::suspend_never initial_suspend() { return {}; }
+ coro::suspend_never final_suspend() noexcept { return {}; }
+ void return_void() {}
+ void unhandled_exception() {}
+ };
+};
+
+void check(int *i, int *j);
+
+thread_local int tls_variable = 0;
+
+bool non_coroutine() {
+ auto *i = &tls_variable;
+ auto *j = &tls_variable;
+ return i == j;
+}
+
+// CHECK-LABEL: define zeroext i1 @_Z13non_coroutinev()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i1 true
+
+
+task resuming_on_new_thread() {
+ auto *i = &tls_variable;
+ co_await switch_to_new_thread();
+ auto *j = &tls_variable;
+ check(i, j);
+}
+
+// This test checks that two arguments passed to "check" will be different.
+// The first one will be a value loaded from the frame, and the second is
+// the current address of tsl_variable.
+
+// CHECK-LABEL: define internal fastcc void @_Z22resuming_on_new_threadv.resume
+// CHECK: %[[RELOAD_ADDR:.+reload.addr]] = getelementptr inbounds %_Z22resuming_on_new_threadv.Frame, %_Z22resuming_on_new_threadv.Frame* %FramePtr
+// CHECK: %[[TMP:.+]] = bitcast i8** %[[RELOAD_ADDR]] to i32**
+// CHECK: %[[RELOAD:.+]] = load i32*, i32** %[[TMP]]
+// CHECK: tail call void @_Z5checkPiS_(i32* %[[RELOAD]], i32* nonnull @tls_variable)
Index: clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp
===================================================================
--- clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp
+++ clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp
@@ -31,7 +31,8 @@
// CHECK-LABEL: define i32 @_Z5get_bv()
// CHECK-NOT: call
-// CHECK: load i32, i32* @b
+// CHECK: %[[TMP:.+]] = bitcast i8* bitcast (i32* @b to i8*) to i32*
+// CHECK-NEXT: load i32, i32* %[[TMP]]
// CHECK-NOT: call
// CHECK: }
int get_b() { return b; }
@@ -52,7 +53,8 @@
// LINUX-LABEL: define weak_odr {{.*}} @_ZTW1c()
// CHECK-NOT: br i1
// CHECK-NOT: call
-// CHECK: ret i32* @c
+// CHECK: %[[TMP:.+]] = bitcast i8* bitcast (i32* @c to i8*) to i32*
+// CHECK: ret i32* %[[TMP]]
// CHECK: }
thread_local int c = 0;
Index: clang/test/CodeGen/lto-newpm-pipeline.c
===================================================================
--- clang/test/CodeGen/lto-newpm-pipeline.c
+++ clang/test/CodeGen/lto-newpm-pipeline.c
@@ -29,6 +29,7 @@
// CHECK-FULL-O0: Running pass: AlwaysInlinerPass
// CHECK-FULL-O0-NEXT: Running analysis: InnerAnalysisManagerProxy
// CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis
+// CHECK-FULL-O0-NEXT: Running pass: LowerThreadLocalIntrinsicPass
// CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass
// CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass
// CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass
@@ -38,6 +39,7 @@
// CHECK-THIN-O0: Running pass: AlwaysInlinerPass
// CHECK-THIN-O0-NEXT: Running analysis: InnerAnalysisManagerProxy
// CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis
+// CHECK-THIN-O0-NEXT: Running pass: LowerThreadLocalIntrinsicPass
// CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass
// CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass
// CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass
Index: clang/lib/CodeGen/ItaniumCXXABI.cpp
===================================================================
--- clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2915,9 +2915,11 @@
Builder.SetInsertPoint(ExitBB);
}
+ llvm::Value *Val = Var;
+ if (CGM.getLangOpts().Coroutines)
+ Val = Builder.CreateThreadLocal(Val);
// For a reference, the result of the wrapper function is a pointer to
// the referenced object.
- llvm::Value *Val = Var;
if (VD->getType()->isReferenceType()) {
CharUnits Align = CGM.getContext().getDeclAlign(VD);
Val = Builder.CreateAlignedLoad(Val, Align);
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2518,7 +2518,8 @@
const Expr *E, const VarDecl *VD) {
QualType T = E->getType();
- // If it's thread_local, emit a call to its wrapper function instead.
+ // If it's a dynamic thread_local, and the ABI requires a wrapper function,
+ // emit a call to its wrapper function instead.
if (VD->getTLSKind() == VarDecl::TLS_Dynamic &&
CGF.CGM.getCXXABI().usesThreadWrapperFunction(VD))
return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T);
@@ -2530,15 +2531,20 @@
return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
}
+ bool ShouldEmitPrivateCopy = CGF.getLangOpts().OpenMP &&
+ !CGF.getLangOpts().OpenMPSimd &&
+ VD->hasAttr<OMPThreadPrivateDeclAttr>();
llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
+ if (VD->getTLSKind() != VarDecl::TLS_None && !ShouldEmitPrivateCopy &&
+ CGF.getLangOpts().Coroutines)
+ V = CGF.Builder.CreateThreadLocal(V);
llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
Address Addr(V, Alignment);
// Emit reference to the private copy of the variable if it is an OpenMP
// threadprivate variable.
- if (CGF.getLangOpts().OpenMP && !CGF.getLangOpts().OpenMPSimd &&
- VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
+ if (ShouldEmitPrivateCopy) {
return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy,
E->getExprLoc());
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits