jdoerfert created this revision. jdoerfert added reviewers: rogfer01, ABataev, JonChesterfield, kiranchandramohan, fghanim. Herald added subscribers: cfe-commits, guansong, bollu, hiraditya. Herald added projects: clang, LLVM.
In order to fix PR44560 and to prepare for loop transformations we now finalize a function late, which will also do the outlining late. The logic is as before but the actual outlining step happens now after the function was fully constructed. Once we have loop transformations we can apply them in the finalize step before the outlining. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D74372 Files: clang/lib/CodeGen/CodeGenFunction.cpp llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp =================================================================== --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -96,7 +96,7 @@ EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID); Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_F(OpenMPIRBuilderTest, CreateCancel) { @@ -151,7 +151,7 @@ OMPBuilder.popFinalizationCB(); Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { @@ -212,7 +212,7 @@ OMPBuilder.popFinalizationCB(); Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { @@ -267,7 +267,7 @@ OMPBuilder.popFinalizationCB(); Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); } TEST_F(OpenMPIRBuilderTest, DbgLoc) { @@ -362,9 +362,9 @@ auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; - IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( - Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); - + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); EXPECT_EQ(NumFinalizationPoints, 1U); @@ -372,10 +372,12 @@ Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_NE(PrivAI, nullptr); Function *OutlinedFn = PrivAI->getFunction(); EXPECT_NE(F, OutlinedFn); - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse)); EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); @@ -470,6 +472,7 @@ Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); + OMPBuilder.finalize(); EXPECT_NE(PrivAI, nullptr); Function *OutlinedFn = PrivAI->getFunction(); @@ -595,6 +598,7 @@ Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); + OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -93,6 +93,56 @@ void OpenMPIRBuilder::initialize() { initializeTypes(M); } +void OpenMPIRBuilder::finalize() { + for (OutlineInfo &OI : OutlineInfos) { + assert(!OI.Blocks.empty() && + "Outlined regions should have at least a single block!"); + BasicBlock *RegEntryBB = OI.Blocks.front(); + Function *OuterFn = RegEntryBB->getParent(); + CodeExtractorAnalysisCache CEAC(*OuterFn); + CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, + /* AggregateArgs */ false, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* Suffix */ ".omp_par"); + + LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); + + // Add some known attributes to the outlined function. + Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); + + LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); + LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); + + // For compability with the clang CG we move the outlined function after the + // one with the parallel region. + OutlinedFn->removeFromParent(); + M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); + + // Remove the artificial entry introduced by the extractor right away, we + // made our own entry block after all. + { + BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); + assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB); + assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry); + RegEntryBB->moveBefore(&ArtificialEntry); + ArtificialEntry.eraseFromParent(); + } + assert(&OutlinedFn->getEntryBlock() == RegEntryBB); + assert(OutlinedFn && OutlinedFn->getNumUses() == 1); + + // Run a user callback, e.g. to add attributes. + if (OI.PostOutlineCB) + OI.PostOutlineCB(*OutlinedFn); + } + + // Allow finalize to be called multiple times. + OutlineInfos.clear(); +} + Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, IdentFlag LocFlags) { // Enable "C-mode". @@ -415,17 +465,18 @@ // PRegionExitBB <- A common exit to simplify block collection. // - LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB); - LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); + OutlineInfo &OI = getNewOutlineInfo(); SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; - SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist; + SmallVector<BasicBlock *, 32> Worklist; ParallelRegionBlockSet.insert(PRegEntryBB); ParallelRegionBlockSet.insert(PRegExitBB); @@ -433,14 +484,14 @@ Worklist.push_back(PRegEntryBB); while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); - ParallelRegionBlocks.push_back(BB); + OI.Blocks.push_back(BB); for (BasicBlock *SuccBB : successors(BB)) if (ParallelRegionBlockSet.insert(SuccBB).second) Worklist.push_back(SuccBB); } CodeExtractorAnalysisCache CEAC(*OuterFn); - CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr, + CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ false, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, @@ -455,7 +506,7 @@ Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); - LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); FunctionCallee TIDRTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num); @@ -496,139 +547,122 @@ PrivHelper(*Output); } - LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n"); + LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); LLVM_DEBUG({ - for (auto *BB : ParallelRegionBlocks) + for (auto *BB : OI.Blocks) dbgs() << " PBR: " << BB->getName() << "\n"; }); - // Add some known attributes to the outlined function. - Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); - OutlinedFn->addParamAttr(0, Attribute::NoAlias); - OutlinedFn->addParamAttr(1, Attribute::NoAlias); - OutlinedFn->addFnAttr(Attribute::NoUnwind); - OutlinedFn->addFnAttr(Attribute::NoRecurse); - - LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n"); - LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); - - // For compability with the clang CG we move the outlined function after the - // one with the parallel region. - OutlinedFn->removeFromParent(); - M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); - - // Remove the artificial entry introduced by the extractor right away, we - // made our own entry block after all. - { - BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); - assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB); - assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry); - PRegEntryBB->moveBefore(&ArtificialEntry); - ArtificialEntry.eraseFromParent(); - } - LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n"); - assert(&OutlinedFn->getEntryBlock() == PRegEntryBB); - - assert(OutlinedFn && OutlinedFn->getNumUses() == 1); - assert(OutlinedFn->arg_size() >= 2 && - "Expected at least tid and bounded tid as arguments"); - unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2; - - CallInst *CI = cast<CallInst>(OutlinedFn->user_back()); - CI->getParent()->setName("omp_parallel"); - Builder.SetInsertPoint(CI); - - // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); - Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars), - Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)}; - - SmallVector<Value *, 16> RealArgs; - RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); - RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); - - FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); - if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { - if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { - llvm::LLVMContext &Ctx = F->getContext(); - MDBuilder MDB(Ctx); - // Annotate the callback behavior of the __kmpc_fork_call: - // - The callback callee is argument number 2 (microtask). - // - The first two arguments of the callback callee are unknown (-1). - // - All variadic arguments to the __kmpc_fork_call are passed to the - // callback callee. - F->addMetadata( - llvm::LLVMContext::MD_callback, - *llvm::MDNode::get( - Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, - /* VarArgsArePassed */ true)})); + FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); + if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { + if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { + llvm::LLVMContext &Ctx = F->getContext(); + MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_call: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_call are passed to the + // callback callee. + F->addMetadata( + llvm::LLVMContext::MD_callback, + *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, + /* VarArgsArePassed */ true)})); + } } - } - - Builder.CreateCall(RTLFn, RealArgs); - - LLVM_DEBUG(dbgs() << "With fork_call placed: " - << *Builder.GetInsertBlock()->getParent() << "\n"); - - InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); - InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); - UI->eraseFromParent(); - - // Initialize the local TID stack location with the argument value. - Builder.SetInsertPoint(PrivTID); - Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin(); - Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); - // If no "if" clause was present we do not need the call created during - // outlining, otherwise we reuse it in the serialized parallel region. - if (!ElseTI) { - CI->eraseFromParent(); - } else { - - // If an "if" clause was present we are now generating the serialized - // version into the "else" branch. - Builder.SetInsertPoint(ElseTI); - - // Build calls __kmpc_serialized_parallel(&Ident, GTid); - Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), - SerializedParallelCallArgs); - - // OutlinedFn(>id, &zero, CapturedStruct); - CI->removeFromParent(); - Builder.Insert(CI); - - // __kmpc_end_serialized_parallel(&Ident, GTid); - Value *EndArgs[] = {Ident, ThreadID}; - Builder.CreateCall( - getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), - EndArgs); - - LLVM_DEBUG(dbgs() << "With serialized parallel region: " - << *Builder.GetInsertBlock()->getParent() << "\n"); - } - - // Adjust the finalization stack, verify the adjustment, and call the - // finalize function a last time to finalize values between the pre-fini block - // and the exit block if we left the parallel "the normal way". - auto FiniInfo = FinalizationStack.pop_back_val(); - (void)FiniInfo; - assert(FiniInfo.DK == OMPD_parallel && - "Unexpected finalization stack state!"); - - Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); - assert(PreFiniTI->getNumSuccessors() == 1 && - PreFiniTI->getSuccessor(0)->size() == 1 && - isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) && - "Unexpected CFG structure!"); + OI.PostOutlineCB = [=](Function &OutlinedFn) { + // Add some known attributes. + OutlinedFn.addParamAttr(0, Attribute::NoAlias); + OutlinedFn.addParamAttr(1, Attribute::NoAlias); + OutlinedFn.addFnAttr(Attribute::NoUnwind); + OutlinedFn.addFnAttr(Attribute::NoRecurse); + + assert(OutlinedFn.arg_size() >= 2 && + "Expected at least tid and bounded tid as arguments"); + unsigned NumCapturedVars = + OutlinedFn.arg_size() - /* tid & bounded tid */ 2; + + CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); + CI->getParent()->setName("omp_parallel"); + Builder.SetInsertPoint(CI); + + // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); + Value *ForkCallArgs[] = { + Ident, Builder.getInt32(NumCapturedVars), + Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; + + SmallVector<Value *, 16> RealArgs; + RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); + RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); + + Builder.CreateCall(RTLFn, RealArgs); + + LLVM_DEBUG(dbgs() << "With fork_call placed: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + + InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); + + // Initialize the local TID stack location with the argument value. + Builder.SetInsertPoint(PrivTID); + Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); + Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr); + + // If no "if" clause was present we do not need the call created during + // outlining, otherwise we reuse it in the serialized parallel region. + if (!ElseTI) { + CI->eraseFromParent(); + } else { + + // If an "if" clause was present we are now generating the serialized + // version into the "else" branch. + Builder.SetInsertPoint(ElseTI); + + // Build calls __kmpc_serialized_parallel(&Ident, GTid); + Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel), + SerializedParallelCallArgs); + + // OutlinedFn(>id, &zero, CapturedStruct); + CI->removeFromParent(); + Builder.Insert(CI); + + // __kmpc_end_serialized_parallel(&Ident, GTid); + Value *EndArgs[] = {Ident, ThreadID}; + Builder.CreateCall( + getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel), + EndArgs); + + LLVM_DEBUG(dbgs() << "With serialized parallel region: " + << *Builder.GetInsertBlock()->getParent() << "\n"); + } + + for (Instruction *I : ToBeDeleted) + I->eraseFromParent(); + }; + + // Adjust the finalization stack, verify the adjustment, and call the + // finalize function a last time to finalize values between the pre-fini + // block and the exit block if we left the parallel "the normal way". + auto FiniInfo = FinalizationStack.pop_back_val(); + (void)FiniInfo; + assert(FiniInfo.DK == OMPD_parallel && + "Unexpected finalization stack state!"); - InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); - FiniCB(PreFiniIP); + Instruction *PreFiniTI = PRegPreFiniBB->getTerminator(); + OuterFn->dump(); + PreFiniTI->dump(); + assert(PreFiniTI->getNumSuccessors() == 1 && + PreFiniTI->getSuccessor(0) == PRegExitBB && + "Unexpected CFG structure!"); - for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); + InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator()); + FiniCB(PreFiniIP); - return AfterIP; + InsertPointTy ExitIP(UI->getParent(), UI->getParent()->end()); + UI->eraseFromParent(); + return ExitIP; } void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -29,12 +29,16 @@ /// Create a new OpenMPIRBuilder operating on the given module \p M. This will /// not have an effect on \p M (see initialize). OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} + ~OpenMPIRBuilder() { finalize(); } /// Initialize the internal state, this will put structures types and /// potentially other helpers into the underlying module. Must be called /// before any other method and only once! void initialize(); + /// Finalize the underlying module, e.g., by outlining regions. + void finalize(); + /// Add attributes known for \p FnID to \p Fn. void addAttributes(omp::RuntimeFunction FnID, Function &Fn); @@ -256,6 +260,23 @@ /// Map to remember existing ident_t*. DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap; + /// Helper that contains information about regions we need to outline + /// during finalization. + struct OutlineInfo { + SmallVector<BasicBlock *, 32> Blocks; + using PostOutlineCBTy = std::function<void(Function &)>; + PostOutlineCBTy PostOutlineCB; + }; + + /// Collection of regions that need to be outlined during finalization. + SmallVector<OutlineInfo, 16> OutlineInfos; + + /// Get a handle for a new region that will be outlined later. + OutlineInfo &getNewOutlineInfo() { + OutlineInfos.push_back({}); + return OutlineInfos.back(); + } + /// An ordered map of auto-generated variables to their unique names. /// It stores variables with the following names: 1) ".gomp_critical_user_" + /// <critical_section_name> + ".var" for "omp critical" directives; 2) Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -32,6 +32,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/FrontendDiagnostic.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/FPEnv.h" @@ -104,6 +105,12 @@ if (getLangOpts().OpenMP && CurFn) CGM.getOpenMPRuntime().functionFinished(*this); + + // If we have an OpenMPIRBuilder we want to finalize functions (incl. + // outlining etc) at some point. Doing it once the function codegen is done + // seems to be a reasonable spot. + if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) + OMPBuilder->finalize(); } // Map the LangOption for rounding mode into
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits