jdoerfert created this revision. jdoerfert added reviewers: anchu-rajendran, kiranktp, fghanim. Herald added subscribers: llvm-commits, cfe-commits, sstefan1, guansong, bollu, hiraditya, yaxunl. Herald added projects: clang, LLVM.
During code generation we might change/add basic blocks so keeping a list of them is fairly easy to break. Nested parallel regions were enough. The new scheme does recompute the list of blocks to be outlined once it is needed. We also need to keep track of the alloca insertion point (which we already communicate via the callback to the user) as we place allocas as well. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D82470 Files: clang/test/OpenMP/cancel_codegen.cpp clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp =================================================================== --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -6,13 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Verifier.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "gtest/gtest.h" @@ -402,6 +403,201 @@ EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin()); } +TEST_F(OpenMPIRBuilderTest, ParallelNested) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumInnerBodiesGenerated = 0; + unsigned NumOuterBodiesGenerated = 0; + unsigned NumFinalizationPoints = 0; + + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumInnerBodiesGenerated; + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumOuterBodiesGenerated; + Builder.restoreIP(CodeGenIP); + BasicBlock *CGBB = CodeGenIP.getBlock(); + BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint()); + CGBB->getTerminator()->eraseFromParent(); + ; + + IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( + InsertPointTy(CGBB, CGBB->end()), InnerBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP); + Builder.CreateBr(NewBB); + }; + + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, OuterBodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false); + + EXPECT_EQ(NumInnerBodiesGenerated, 1U); + EXPECT_EQ(NumOuterBodiesGenerated, 1U); + EXPECT_EQ(NumFinalizationPoints, 2U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + OMPBuilder.finalize(); + + EXPECT_EQ(M->size(), 5U); + for (Function &OutlinedFn : *M) { + if (F == &OutlinedFn || OutlinedFn.isDeclaration()) + continue; + EXPECT_FALSE(verifyModule(*M, &errs())); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); + + EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); + EXPECT_EQ(OutlinedFn.arg_size(), 2U); + + EXPECT_EQ(OutlinedFn.getNumUses(), 1U); + User *Usr = OutlinedFn.user_back(); + ASSERT_TRUE(isa<ConstantExpr>(Usr)); + CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); + } +} + +TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumInnerBodiesGenerated = 0; + unsigned NumOuterBodiesGenerated = 0; + unsigned NumFinalizationPoints = 0; + + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumInnerBodiesGenerated; + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumOuterBodiesGenerated; + Builder.restoreIP(CodeGenIP); + BasicBlock *CGBB = CodeGenIP.getBlock(); + BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint()); + BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt()); + CGBB->getTerminator()->eraseFromParent(); + ; + NewBB1->getTerminator()->eraseFromParent(); + ; + + IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.CreateParallel( + InsertPointTy(CGBB, CGBB->end()), InnerBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP1); + Builder.CreateBr(NewBB1); + + IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.CreateParallel( + InsertPointTy(NewBB1, NewBB1->end()), InnerBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP2); + Builder.CreateBr(NewBB2); + }; + + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, OuterBodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false); + + EXPECT_EQ(NumInnerBodiesGenerated, 2U); + EXPECT_EQ(NumOuterBodiesGenerated, 1U); + EXPECT_EQ(NumFinalizationPoints, 3U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + OMPBuilder.finalize(); + + EXPECT_EQ(M->size(), 6U); + for (Function &OutlinedFn : *M) { + if (F == &OutlinedFn || OutlinedFn.isDeclaration()) + continue; + EXPECT_FALSE(verifyModule(*M, &errs())); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); + + EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); + EXPECT_EQ(OutlinedFn.arg_size(), 2U); + + unsigned NumAllocas = 0; + for (Instruction &I : instructions(OutlinedFn)) + NumAllocas += isa<AllocaInst>(I); + EXPECT_EQ(NumAllocas, 1U); + + EXPECT_EQ(OutlinedFn.getNumUses(), 1U); + User *Usr = OutlinedFn.user_back(); + ASSERT_TRUE(isa<ConstantExpr>(Usr)); + CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); + } +} + TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -128,13 +128,16 @@ void OpenMPIRBuilder::initialize() { initializeTypes(M); } void OpenMPIRBuilder::finalize() { + SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; + SmallVector<BasicBlock *, 32> Blocks; for (OutlineInfo &OI : OutlineInfos) { - assert(!OI.Blocks.empty() && - "Outlined regions should have at least a single block!"); - BasicBlock *RegEntryBB = OI.Blocks.front(); - Function *OuterFn = RegEntryBB->getParent(); + ParallelRegionBlockSet.clear(); + Blocks.clear(); + OI.collectBlocks(ParallelRegionBlockSet, Blocks); + + Function *OuterFn = OI.EntryBB->getParent(); CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -144,6 +147,8 @@ /* Suffix */ ".omp_par"); LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); + LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() + << " Exit: " << OI.ExitBB->getName() << "\n"); assert(Extractor.isEligible() && "Expected OpenMP outlining to be possible!"); @@ -163,12 +168,12 @@ // made our own entry block after all. { BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); - assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB); - assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry); - RegEntryBB->moveBefore(&ArtificialEntry); + assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); + assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); + OI.EntryBB->moveBefore(&ArtificialEntry); ArtificialEntry.eraseFromParent(); } - assert(&OutlinedFn->getEntryBlock() == RegEntryBB); + assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); assert(OutlinedFn && OutlinedFn->getNumUses() == 1); // Run a user callback, e.g. to add attributes. @@ -425,17 +430,32 @@ // we want to delete at the end. SmallVector<Instruction *, 4> ToBeDeleted; - Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); - AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); - AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); + // The alloca builder is managed internally basically like a stack. The + // insertion point guards keep the old top value alive while we update it for + // the body. + // + // TODO: We now have an internal AllocaBuilder and the AllocaIP in the + // callback, one might suffice. + IRBuilder<>::InsertPointGuard AIPG(AllocaBuilder); + + // For the first outermost region we need to initialize the alloca builder. + if (!AllocaBuilder.GetInsertBlock()) + AllocaBuilder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); + + // Use the debug location of the pragma for alloca related code as well. + AllocaBuilder.SetCurrentDebugLocation(Loc.DL); + + AllocaInst *TIDAddr = AllocaBuilder.CreateAlloca(Int32, nullptr, "tid.addr"); + AllocaInst *ZeroAddr = + AllocaBuilder.CreateAlloca(Int32, nullptr, "zero.addr"); // If there is an if condition we actually use the TIDAddr and ZeroAddr in the // program, otherwise we only need them for modeling purposes to get the // associated arguments in the outlined function. In the former case, // initialize the allocas properly, in the latter case, delete them later. if (IfCondition) { - Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); - Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); + AllocaBuilder.CreateStore(Constant::getNullValue(Int32), TIDAddr); + AllocaBuilder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); } else { ToBeDeleted.push_back(TIDAddr); ToBeDeleted.push_back(ZeroAddr); @@ -479,14 +499,14 @@ // of the outlined function. InsertPointTy AllocaIP(PRegEntryBB, PRegEntryBB->getTerminator()->getIterator()); - Builder.restoreIP(AllocaIP); + AllocaBuilder.restoreIP(AllocaIP); AllocaInst *PrivTIDAddr = - Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); - Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid"); + AllocaBuilder.CreateAlloca(Int32, nullptr, "tid.addr.local"); + Instruction *PrivTID = AllocaBuilder.CreateLoad(PrivTIDAddr, "tid"); // Add some fake uses for OpenMP provided arguments. - ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use")); - ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use")); + ToBeDeleted.push_back(AllocaBuilder.CreateLoad(TIDAddr, "tid.addr.use")); + ToBeDeleted.push_back(AllocaBuilder.CreateLoad(ZeroAddr, "zero.addr.use")); // ThenBB // | @@ -618,20 +638,12 @@ InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); FiniCB(PreFiniIP); - SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; - SmallVector<BasicBlock *, 32> Worklist; - ParallelRegionBlockSet.insert(PRegEntryBB); - ParallelRegionBlockSet.insert(PRegExitBB); + OI.EntryBB = PRegEntryBB; + OI.ExitBB = PRegExitBB; - // Collect all blocks in-between PRegEntryBB and PRegExitBB. - Worklist.push_back(PRegEntryBB); - while (!Worklist.empty()) { - BasicBlock *BB = Worklist.pop_back_val(); - OI.Blocks.push_back(BB); - for (BasicBlock *SuccBB : successors(BB)) - if (ParallelRegionBlockSet.insert(SuccBB).second) - Worklist.push_back(SuccBB); - } + SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; + SmallVector<BasicBlock *, 32> Blocks; + OI.collectBlocks(ParallelRegionBlockSet, Blocks); // Ensure a single exit node for the outlined region by creating one. // We might have multiple incoming edges to the exit now due to finalizations, @@ -639,10 +651,10 @@ BasicBlock *PRegOutlinedExitBB = PRegExitBB; PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); PRegOutlinedExitBB->setName("omp.par.outlined.exit"); - OI.Blocks.push_back(PRegOutlinedExitBB); + Blocks.push_back(PRegOutlinedExitBB); CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -693,12 +705,16 @@ LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); PrivHelper(*Input); } + LLVM_DEBUG({ + for (Value *Output : Outputs) + LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); + }); assert(Outputs.empty() && "OpenMP outlining should not produce live-out values!"); LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); LLVM_DEBUG({ - for (auto *BB : OI.Blocks) + for (auto *BB : Blocks) dbgs() << " PBR: " << BB->getName() << "\n"; }); @@ -996,3 +1012,20 @@ std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); } + +void OpenMPIRBuilder::OutlineInfo::collectBlocks( + SmallPtrSetImpl<BasicBlock *> &BlockSet, + SmallVectorImpl<BasicBlock *> &BlockVector) { + SmallVector<BasicBlock *, 32> Worklist; + BlockSet.insert(EntryBB); + BlockSet.insert(ExitBB); + + Worklist.push_back(EntryBB); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + BlockVector.push_back(BB); + for (BasicBlock *SuccBB : successors(BB)) + if (BlockSet.insert(SuccBB).second) + Worklist.push_back(SuccBB); + } +} Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -28,7 +28,8 @@ public: /// Create a new OpenMPIRBuilder operating on the given module \p M. This will /// not have an effect on \p M (see initialize). - OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} + OpenMPIRBuilder(Module &M) + : M(M), Builder(M.getContext()), AllocaBuilder(M.getContext()) {} /// Initialize the internal state, this will put structures types and /// potentially other helpers into the underlying module. Must be called @@ -276,6 +277,9 @@ /// The LLVM-IR Builder used to create IR. IRBuilder<> Builder; + /// The LLVM-IR Builder used to create alloca instructions. + IRBuilder<> AllocaBuilder; + /// Map to remember source location strings StringMap<Constant *> SrcLocStrMap; @@ -285,9 +289,14 @@ /// Helper that contains information about regions we need to outline /// during finalization. struct OutlineInfo { - SmallVector<BasicBlock *, 32> Blocks; using PostOutlineCBTy = std::function<void(Function &)>; PostOutlineCBTy PostOutlineCB; + BasicBlock *EntryBB, *ExitBB; + + /// Collect all blocks in between EntryBB and ExitBB in both the given + /// vector and set. + void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet, + SmallVectorImpl<BasicBlock *> &BlockVector); }; /// Collection of regions that need to be outlined during finalization. Index: clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c =================================================================== --- /dev/null +++ clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c @@ -0,0 +1,110 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER +// %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o /tmp/t1 %s +// %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch /tmp/t1 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s + +// expected-no-diagnostics + +// TODO: Teach the update script to check new functions too. + +#ifndef HEADER +#define HEADER + +// ALL-LABEL: @_Z17nested_parallel_0v( +// ALL-NEXT: entry: +// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// ALL-NEXT: br label [[OMP_PARALLEL:%.*]] +// ALL: omp_parallel: +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*)) +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT12:%.*]] +// ALL: omp.par.outlined.exit12: +// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// ALL: omp.par.exit.split: +// ALL-NEXT: ret void +// +void nested_parallel_0(void) { +#pragma omp parallel + { +#pragma omp parallel + { + } + } +} + +// ALL-LABEL: @_Z17nested_parallel_1Pfid( +// ALL-NEXT: entry: +// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 +// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// ALL-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 +// ALL-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// ALL-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// ALL-NEXT: br label [[OMP_PARALLEL:%.*]] +// ALL: omp_parallel: +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]] +// ALL: omp.par.outlined.exit13: +// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// ALL: omp.par.exit.split: +// ALL-NEXT: ret void +// +void nested_parallel_1(float *r, int a, double b) { +#pragma omp parallel + { +#pragma omp parallel + { + *r = a + b; + } + } +} + +// ALL-LABEL: @_Z17nested_parallel_2Pfid( +// ALL-NEXT: entry: +// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 +// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// ALL-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 +// ALL-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// ALL-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// ALL-NEXT: br label [[OMP_PARALLEL:%.*]] +// ALL: omp_parallel: +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]] +// ALL: omp.par.outlined.exit55: +// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// ALL: omp.par.exit.split: +// ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// ALL-NEXT: [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double +// ALL-NEXT: [[TMP1:%.*]] = load double, double* [[B_ADDR]], align 8 +// ALL-NEXT: [[ADD57:%.*]] = fadd double [[CONV56]], [[TMP1]] +// ALL-NEXT: [[CONV58:%.*]] = fptrunc double [[ADD57]] to float +// ALL-NEXT: [[TMP2:%.*]] = load float*, float** [[R_ADDR]], align 8 +// ALL-NEXT: store float [[CONV58]], float* [[TMP2]], align 4 +// ALL-NEXT: ret void +// +void nested_parallel_2(float *r, int a, double b) { +#pragma omp parallel + { + *r = a + b; +#pragma omp parallel + { + *r = a + b; +#pragma omp parallel + { + *r = a + b; + } + *r = a + b; +#pragma omp parallel + { + *r = a + b; + } + *r = a + b; + } + *r = a + b; + } + *r = a + b; +} + +#endif Index: clang/test/OpenMP/cancel_codegen.cpp =================================================================== --- clang/test/OpenMP/cancel_codegen.cpp +++ clang/test/OpenMP/cancel_codegen.cpp @@ -175,7 +175,7 @@ // IRBUILDER: define internal void @main -// IRBUILDER: [[RETURN:omp.par.exit[^:]*]] +// IRBUILDER: [[RETURN:omp.par.outlined.exit[^:]*]] // IRBUILDER-NEXT: ret void // IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}}, @@ -192,10 +192,8 @@ // IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0 // IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]] // IRBUILDER: [[EXIT]] -// IRBUILDER: br label %[[EXIT2:.+]] -// IRBUILDER: [[CONTINUE]] -// IRBUILDER: br label %[[ELSE:.+]] -// IRBUILDER: [[EXIT2]] // IRBUILDER: br label %[[RETURN]] +// IRBUILDER: [[CONTINUE]] +// IRBUILDER: br label %[[ELSE2:.+]] #endif
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits