lxfind updated this revision to Diff 310575.
lxfind added a comment.
Herald added subscribers: nikic, kerbowa, jvesely.

Fix all failing tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92661/new/

https://reviews.llvm.org/D92661

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/ItaniumCXXABI.cpp
  clang/test/CodeGen/lto-newpm-pipeline.c
  clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp
  clang/test/CodeGenCoroutines/coro-tls.cpp
  llvm/include/llvm/IR/IRBuilder.h
  llvm/include/llvm/IR/Intrinsics.td
  llvm/include/llvm/InitializePasses.h
  llvm/include/llvm/Transforms/Scalar.h
  llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h
  llvm/lib/IR/IRBuilder.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/lib/Transforms/Scalar/CMakeLists.txt
  llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp
  llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
  llvm/test/Other/new-pass-manager.ll
  llvm/test/Other/new-pm-O0-defaults.ll
  llvm/test/Other/new-pm-defaults.ll
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll
  llvm/test/Other/pass-pipelines.ll

Index: llvm/test/Other/pass-pipelines.ll
===================================================================
--- llvm/test/Other/pass-pipelines.ll
+++ llvm/test/Other/pass-pipelines.ll
@@ -72,6 +72,7 @@
 ; Next we break out of the main Function passes inside the CGSCC pipeline with
 ; a barrier pass.
 ; CHECK-O2: A No-Op Barrier Pass
+; CHECK-O2-NEXT: Lower ThreadLocal Intrinsics
 ; CHECK-O2-NEXT: Eliminate Available Externally
 ; Inferring function attribute should be right after the CGSCC pipeline, before
 ; any other optimizations/analyses.
Index: llvm/test/Other/opt-Os-pipeline.ll
===================================================================
--- llvm/test/Other/opt-Os-pipeline.ll
+++ llvm/test/Other/opt-Os-pipeline.ll
@@ -173,6 +173,7 @@
 ; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         Combine redundant instructions
 ; CHECK-NEXT:     A No-Op Barrier Pass
+; CHECK-NEXT:     Lower ThreadLocal Intrinsics
 ; CHECK-NEXT:     Eliminate Available Externally Globals
 ; CHECK-NEXT:     CallGraph Construction
 ; CHECK-NEXT:     Deduce function attributes in RPO
Index: llvm/test/Other/opt-O3-pipeline.ll
===================================================================
--- llvm/test/Other/opt-O3-pipeline.ll
+++ llvm/test/Other/opt-O3-pipeline.ll
@@ -192,6 +192,7 @@
 ; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         Combine redundant instructions
 ; CHECK-NEXT:     A No-Op Barrier Pass
+; CHECK-NEXT:     Lower ThreadLocal Intrinsics
 ; CHECK-NEXT:     Eliminate Available Externally Globals
 ; CHECK-NEXT:     CallGraph Construction
 ; CHECK-NEXT:     Deduce function attributes in RPO
Index: llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
===================================================================
--- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
+++ llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
@@ -192,6 +192,7 @@
 ; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         Combine redundant instructions
 ; CHECK-NEXT:     A No-Op Barrier Pass
+; CHECK-NEXT:     Lower ThreadLocal Intrinsics
 ; CHECK-NEXT:     Eliminate Available Externally Globals
 ; CHECK-NEXT:     CallGraph Construction
 ; CHECK-NEXT:     Deduce function attributes in RPO
Index: llvm/test/Other/opt-O2-pipeline.ll
===================================================================
--- llvm/test/Other/opt-O2-pipeline.ll
+++ llvm/test/Other/opt-O2-pipeline.ll
@@ -187,6 +187,7 @@
 ; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         Combine redundant instructions
 ; CHECK-NEXT:     A No-Op Barrier Pass
+; CHECK-NEXT:     Lower ThreadLocal Intrinsics
 ; CHECK-NEXT:     Eliminate Available Externally Globals
 ; CHECK-NEXT:     CallGraph Construction
 ; CHECK-NEXT:     Deduce function attributes in RPO
Index: llvm/test/Other/new-pm-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-defaults.ll
+++ llvm/test/Other/new-pm-defaults.ll
@@ -209,6 +209,7 @@
 ; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass
 ; CHECK-O-NEXT: Finished CGSCC pass manager run.
 ; CHECK-O-NEXT: Finished llvm::Module pass manager run.
+; CHECK-O-NEXT: Running pass: LowerThreadLocalIntrinsicPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
 ; CHECK-O-NEXT: Running pass: GlobalDCEPass
 ; CHECK-DEFAULT-NEXT: Running pass: EliminateAvailableExternallyPass
Index: llvm/test/Other/new-pm-O0-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-O0-defaults.ll
+++ llvm/test/Other/new-pm-O0-defaults.ll
@@ -32,6 +32,7 @@
 ; CHECK-DEFAULT-NEXT: Running analysis: ProfileSummaryAnalysis
 ; CHECK-MATRIX-NEXT: Running pass: LowerMatrixIntrinsicsPass
 ; CHECK-MATRIX-NEXT: Running analysis: TargetIRAnalysis
+; CHECK-DEFAULT-NEXT: Running pass: LowerThreadLocalIntrinsicPass
 ; CHECK-PRE-LINK-NEXT: Running pass: CanonicalizeAliasesPass
 ; CHECK-PRE-LINK-NEXT: Running pass: NameAnonGlobalPass
 ; CHECK-THINLTO-NEXT: Running pass: Annotation2MetadataPass
Index: llvm/test/Other/new-pass-manager.ll
===================================================================
--- llvm/test/Other/new-pass-manager.ll
+++ llvm/test/Other/new-pass-manager.ll
@@ -366,6 +366,7 @@
 ; CHECK-EXT-NEXT: Starting llvm::Function pass manager run.
 ; CHECK-EXT-NEXT: Running pass: {{.*}}Bye
 ; CHECK-EXT-NEXT: Finished llvm::Function pass manager run.
+; CHECK-O0-NEXT: Running pass: LowerThreadLocalIntrinsicPass
 ; CHECK-O0-NEXT: Finished llvm::Module pass manager run
 
 ; RUN: opt -disable-output -disable-verify -debug-pass-manager \
Index: llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
+++ llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
@@ -192,6 +192,7 @@
 ; GCN-O1-NEXT:       Optimization Remark Emitter
 ; GCN-O1-NEXT:       Combine redundant instructions
 ; GCN-O1-NEXT:     A No-Op Barrier Pass
+; GCN-O1-NEXT:     Lower ThreadLocal Intrinsics
 ; GCN-O1-NEXT:     CallGraph Construction
 ; GCN-O1-NEXT:     Deduce function attributes in RPO
 ; GCN-O1-NEXT:     Global Variable Optimizer
@@ -543,6 +544,7 @@
 ; GCN-O2-NEXT:       Optimization Remark Emitter
 ; GCN-O2-NEXT:       Combine redundant instructions
 ; GCN-O2-NEXT:     A No-Op Barrier Pass
+; GCN-O2-NEXT:     Lower ThreadLocal Intrinsics
 ; GCN-O2-NEXT:     Eliminate Available Externally Globals
 ; GCN-O2-NEXT:     CallGraph Construction
 ; GCN-O2-NEXT:     Deduce function attributes in RPO
@@ -907,6 +909,7 @@
 ; GCN-O3-NEXT:       Optimization Remark Emitter
 ; GCN-O3-NEXT:       Combine redundant instructions
 ; GCN-O3-NEXT:     A No-Op Barrier Pass
+; GCN-O3-NEXT:     Lower ThreadLocal Intrinsics
 ; GCN-O3-NEXT:     Eliminate Available Externally Globals
 ; GCN-O3-NEXT:     CallGraph Construction
 ; GCN-O3-NEXT:     Deduce function attributes in RPO
Index: llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp
@@ -0,0 +1,75 @@
+//===- LowerThreadLocalIntrinsic.cpp - Lower the threadlocal intrinsic
+//---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.threadlocal intrinsic to a direct reference to the
+// thread local variable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static bool lowerThreadLocalIntrinsic(Module &M) {
+  // Check if we can cheaply rule out the possibility of not having any work to
+  // do.
+  Function *ThreadLocalDecl =
+      M.getFunction(Intrinsic::getName(Intrinsic::threadlocal));
+  if (!ThreadLocalDecl || ThreadLocalDecl->use_empty())
+    return false;
+
+  for (auto Itr = ThreadLocalDecl->users().begin(),
+            E = ThreadLocalDecl->users().end();
+       Itr != E;) {
+    Instruction *I = cast<Instruction>(*Itr);
+    ++Itr;
+    I->replaceAllUsesWith(I->getOperand(0));
+    I->eraseFromParent();
+  }
+
+  ThreadLocalDecl->eraseFromParent();
+
+  return true;
+}
+
+PreservedAnalyses
+LowerThreadLocalIntrinsicPass::run(Module &M, ModuleAnalysisManager &AM) {
+  if (lowerThreadLocalIntrinsic(M))
+    return PreservedAnalyses::none();
+
+  return PreservedAnalyses::all();
+}
+
+namespace {
+struct LowerThreadLocalIntrinsicLegacyPass : public ModulePass {
+  static char ID;
+  LowerThreadLocalIntrinsicLegacyPass() : ModulePass(ID) {
+    initializeLowerThreadLocalIntrinsicLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override;
+};
+} // namespace
+
+bool LowerThreadLocalIntrinsicLegacyPass::runOnModule(Module &M) {
+  return lowerThreadLocalIntrinsic(M);
+}
+
+char LowerThreadLocalIntrinsicLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerThreadLocalIntrinsicLegacyPass,
+                "lower-threadlocal-intrinsic", "Lower ThreadLocal Intrinsics",
+                false, false)
+
+Pass *llvm::createLowerThreadLocalIntrinsicPass() {
+  return new LowerThreadLocalIntrinsicLegacyPass();
+}
Index: llvm/lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -50,6 +50,7 @@
   LowerExpectIntrinsic.cpp
   LowerGuardIntrinsic.cpp
   LowerMatrixIntrinsics.cpp
+  LowerThreadLocalIntrinsic.cpp
   LowerWidenableCondition.cpp
   MakeGuardsExplicit.cpp
   MemCpyOptimizer.cpp
Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -558,6 +558,7 @@
     }
 
     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+    MPM.add(createLowerThreadLocalIntrinsicPass());
 
     if (PrepareForLTO || PrepareForThinLTO) {
       MPM.add(createCanonicalizeAliasesPass());
@@ -669,6 +670,7 @@
   // pass manager that we are specifically trying to avoid. To prevent this
   // we must insert a no-op module pass to reset the pass manager.
   MPM.add(createBarrierNoopPass());
+  MPM.add(createLowerThreadLocalIntrinsicPass());
 
   if (RunPartialInlining)
     MPM.add(createPartialInliningPass());
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -181,6 +181,7 @@
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
+#include "llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h"
 #include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
 #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
@@ -1385,6 +1386,8 @@
   // Add the core simplification pipeline.
   MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None));
 
+  MPM.addPass(LowerThreadLocalIntrinsicPass());
+
   // Now add the optimization pipeline.
   MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
 
@@ -1836,6 +1839,8 @@
     MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
   }
 
+  MPM.addPass(LowerThreadLocalIntrinsicPass());
+
   for (auto &C : OptimizerLastEPCallbacks)
     C(MPM, Level);
 
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -452,6 +452,12 @@
   return createCallHelper(TheFn, Ops, this);
 }
 
+CallInst *IRBuilderBase::CreateThreadLocal(Value *Ptr) {
+  return CreateIntrinsic(
+      llvm::Intrinsic::threadlocal, llvm::None,
+      {CreatePointerBitCastOrAddrSpaceCast(Ptr, getInt8PtrTy())});
+}
+
 CallInst *
 IRBuilderBase::CreateAssumption(Value *Cond,
                                 ArrayRef<OperandBundleDef> OpBundles) {
Index: llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h
@@ -0,0 +1,29 @@
+//===--- LowerThreadLocalIntrinsic.h - Lower the threadlocal intrinsic
+//---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.threadlocal intrinsic to a direct reference to the
+// thread local variable.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct LowerThreadLocalIntrinsicPass
+    : PassInfoMixin<LowerThreadLocalIntrinsicPass> {
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H
Index: llvm/include/llvm/Transforms/Scalar.h
===================================================================
--- llvm/include/llvm/Transforms/Scalar.h
+++ llvm/include/llvm/Transforms/Scalar.h
@@ -383,6 +383,13 @@
 //
 Pass *createLowerMatrixIntrinsicsMinimalPass();
 
+//===----------------------------------------------------------------------===//
+//
+// createLowerThreadLocalIntrinsic - Lower threadlocal intrinsics to direct
+//                                   reference of the thread_local variable.
+//
+Pass *createLowerThreadLocalIntrinsicPass();
+
 //===----------------------------------------------------------------------===//
 //
 // LowerWidenableCondition - Lower widenable condition to i1 true.
Index: llvm/include/llvm/InitializePasses.h
===================================================================
--- llvm/include/llvm/InitializePasses.h
+++ llvm/include/llvm/InitializePasses.h
@@ -268,6 +268,7 @@
 void initializeLowerIntrinsicsPass(PassRegistry&);
 void initializeLowerInvokeLegacyPassPass(PassRegistry&);
 void initializeLowerSwitchLegacyPassPass(PassRegistry &);
+void initializeLowerThreadLocalIntrinsicLegacyPassPass(PassRegistry &);
 void initializeLowerTypeTestsPass(PassRegistry&);
 void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &);
 void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &);
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1306,6 +1306,10 @@
 def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty],
                            [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
 
+
+// Intrinsic to obtain the address of a thread_local variable.
+def int_threadlocal : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty]>;
+
 //===---------------- Vector Predication Intrinsics --------------===//
 
 // Speculatable Binary operators
Index: llvm/include/llvm/IR/IRBuilder.h
===================================================================
--- llvm/include/llvm/IR/IRBuilder.h
+++ llvm/include/llvm/IR/IRBuilder.h
@@ -751,6 +751,9 @@
   /// If the pointer isn't i8* it will be converted.
   CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr);
 
+  /// Create a threadlocal intrinsic.
+  CallInst *CreateThreadLocal(Value *Ptr);
+
   /// Create a call to Masked Load intrinsic
   LLVM_ATTRIBUTE_DEPRECATED(
       CallInst *CreateMaskedLoad(Value *Ptr, unsigned Alignment, Value *Mask,
Index: clang/test/CodeGenCoroutines/coro-tls.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCoroutines/coro-tls.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O3 -emit-llvm %s -o - | FileCheck %s
+
+#include "Inputs/coroutine.h"
+
+namespace coro = std::experimental::coroutines_v1;
+
+struct awaitable {
+  bool await_ready() { return false; }
+  void await_suspend(coro::coroutine_handle<> h);
+  void await_resume() {}
+};
+awaitable switch_to_new_thread();
+
+struct task {
+  struct promise_type {
+    task get_return_object() { return {}; }
+    coro::suspend_never initial_suspend() { return {}; }
+    coro::suspend_never final_suspend() noexcept { return {}; }
+    void return_void() {}
+    void unhandled_exception() {}
+  };
+};
+
+void check(int *i, int *j);
+
+thread_local int tls_variable = 0;
+
+bool non_coroutine() {
+  auto *i = &tls_variable;
+  auto *j = &tls_variable;
+  return i == j;
+}
+
+// CHECK-LABEL: define zeroext i1 @_Z13non_coroutinev()
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i1 true
+
+
+task resuming_on_new_thread() {
+  auto *i = &tls_variable;
+  co_await switch_to_new_thread();
+  auto *j = &tls_variable;
+  check(i, j);
+}
+
+// This test checks that two arguments passed to "check" will be different.
+// The first one will be a value loaded from the frame, and the second is
+// the current address of tsl_variable.
+
+// CHECK-LABEL: define internal fastcc void @_Z22resuming_on_new_threadv.resume
+// CHECK:         %[[RELOAD_ADDR:.+reload.addr]] = getelementptr inbounds %_Z22resuming_on_new_threadv.Frame, %_Z22resuming_on_new_threadv.Frame* %FramePtr
+// CHECK:         %[[TMP:.+]] = bitcast i8** %[[RELOAD_ADDR]] to i32**
+// CHECK:         %[[RELOAD:.+]] = load i32*, i32** %[[TMP]]
+// CHECK:         tail call void @_Z5checkPiS_(i32* %[[RELOAD]], i32* nonnull @tls_variable)
Index: clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp
===================================================================
--- clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp
+++ clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp
@@ -31,7 +31,8 @@
 
 // CHECK-LABEL: define i32 @_Z5get_bv()
 // CHECK-NOT: call
-// CHECK: load i32, i32* @b
+// CHECK: %[[TMP:.+]] = bitcast i8* bitcast (i32* @b to i8*) to i32*
+// CHECK-NEXT: load i32, i32* %[[TMP]]
 // CHECK-NOT: call
 // CHECK: }
 int get_b() { return b; }
@@ -52,7 +53,8 @@
 // LINUX-LABEL: define weak_odr {{.*}} @_ZTW1c()
 // CHECK-NOT: br i1
 // CHECK-NOT: call
-// CHECK: ret i32* @c
+// CHECK: %[[TMP:.+]] = bitcast i8* bitcast (i32* @c to i8*) to i32*
+// CHECK: ret i32* %[[TMP]]
 // CHECK: }
 
 thread_local int c = 0;
Index: clang/test/CodeGen/lto-newpm-pipeline.c
===================================================================
--- clang/test/CodeGen/lto-newpm-pipeline.c
+++ clang/test/CodeGen/lto-newpm-pipeline.c
@@ -29,6 +29,7 @@
 // CHECK-FULL-O0: Running pass: AlwaysInlinerPass
 // CHECK-FULL-O0-NEXT: Running analysis: InnerAnalysisManagerProxy
 // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis
+// CHECK-FULL-O0-NEXT: Running pass: LowerThreadLocalIntrinsicPass
 // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass
 // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass
 // CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass
@@ -38,6 +39,7 @@
 // CHECK-THIN-O0: Running pass: AlwaysInlinerPass
 // CHECK-THIN-O0-NEXT: Running analysis: InnerAnalysisManagerProxy
 // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis
+// CHECK-THIN-O0-NEXT: Running pass: LowerThreadLocalIntrinsicPass
 // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass
 // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass
 // CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass
Index: clang/lib/CodeGen/ItaniumCXXABI.cpp
===================================================================
--- clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2915,9 +2915,11 @@
       Builder.SetInsertPoint(ExitBB);
     }
 
+    llvm::Value *Val = Var;
+    if (CGM.getLangOpts().Coroutines)
+      Val = Builder.CreateThreadLocal(Val);
     // For a reference, the result of the wrapper function is a pointer to
     // the referenced object.
-    llvm::Value *Val = Var;
     if (VD->getType()->isReferenceType()) {
       CharUnits Align = CGM.getContext().getDeclAlign(VD);
       Val = Builder.CreateAlignedLoad(Val, Align);
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2518,7 +2518,8 @@
                                       const Expr *E, const VarDecl *VD) {
   QualType T = E->getType();
 
-  // If it's thread_local, emit a call to its wrapper function instead.
+  // If it's a dynamic thread_local, and the ABI requires a wrapper function,
+  // emit a call to its wrapper function instead.
   if (VD->getTLSKind() == VarDecl::TLS_Dynamic &&
       CGF.CGM.getCXXABI().usesThreadWrapperFunction(VD))
     return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T);
@@ -2530,15 +2531,20 @@
       return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
   }
 
+  bool ShouldEmitPrivateCopy = CGF.getLangOpts().OpenMP &&
+                               !CGF.getLangOpts().OpenMPSimd &&
+                               VD->hasAttr<OMPThreadPrivateDeclAttr>();
   llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
+  if (VD->getTLSKind() != VarDecl::TLS_None && !ShouldEmitPrivateCopy &&
+      CGF.getLangOpts().Coroutines)
+    V = CGF.Builder.CreateThreadLocal(V);
   llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
   V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
   CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
   Address Addr(V, Alignment);
   // Emit reference to the private copy of the variable if it is an OpenMP
   // threadprivate variable.
-  if (CGF.getLangOpts().OpenMP && !CGF.getLangOpts().OpenMPSimd &&
-      VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
+  if (ShouldEmitPrivateCopy) {
     return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy,
                                           E->getExprLoc());
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to