================ @@ -0,0 +1,446 @@ +//===- LowerWorkshare.cpp - special cases for bufferization -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the lowering of omp.workshare to other omp constructs. +// +// This pass is tasked with parallelizing the loops nested in +// workshare.loop_wrapper while both the Fortran to mlir lowering and the hlfir +// to fir lowering pipelines are responsible for emitting the +// workshare.loop_wrapper ops where appropriate according to the +// `shouldUseWorkshareLowering` function. +// +//===----------------------------------------------------------------------===// + +#include <flang/Optimizer/Builder/FIRBuilder.h> +#include <flang/Optimizer/Dialect/FIROps.h> +#include <flang/Optimizer/Dialect/FIRType.h> +#include <flang/Optimizer/HLFIR/HLFIROps.h> +#include <flang/Optimizer/OpenMP/Passes.h> +#include <llvm/ADT/BreadthFirstIterator.h> +#include <llvm/ADT/STLExtras.h> +#include <llvm/ADT/SmallVectorExtras.h> +#include <llvm/ADT/iterator_range.h> +#include <llvm/Support/ErrorHandling.h> +#include <mlir/Dialect/Arith/IR/Arith.h> +#include <mlir/Dialect/LLVMIR/LLVMTypes.h> +#include <mlir/Dialect/OpenMP/OpenMPClauseOperands.h> +#include <mlir/Dialect/OpenMP/OpenMPDialect.h> +#include <mlir/Dialect/SCF/IR/SCF.h> +#include <mlir/IR/BuiltinOps.h> +#include <mlir/IR/IRMapping.h> +#include <mlir/IR/OpDefinition.h> +#include <mlir/IR/PatternMatch.h> +#include <mlir/IR/Visitors.h> +#include <mlir/Interfaces/SideEffectInterfaces.h> +#include <mlir/Support/LLVM.h> +#include <mlir/Transforms/GreedyPatternRewriteDriver.h> + +#include <variant> + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { + +// Checks for nesting pattern below as we need to avoid sharing the work of +// statements which are nested in some constructs such as omp.critical or +// another omp.parallel. +// +// omp.workshare { // `wsOp` +// ... +// omp.T { // `parent` +// ... +// `op` +// +template <typename T> +static bool isNestedIn(omp::WorkshareOp wsOp, Operation *op) { + T parent = op->getParentOfType<T>(); + if (!parent) + return false; + return wsOp->isProperAncestor(parent); +} + +bool shouldUseWorkshareLowering(Operation *op) { + auto parentWorkshare = op->getParentOfType<omp::WorkshareOp>(); + + if (!parentWorkshare) + return false; + + if (isNestedIn<omp::CriticalOp>(parentWorkshare, op)) + return false; + + // 2.8.3 workshare Construct + // For a parallel construct, the construct is a unit of work with respect to + // the workshare construct. The statements contained in the parallel construct + // are executed by a new thread team. + if (isNestedIn<omp::ParallelOp>(parentWorkshare, op)) + return false; + + // 2.8.2 single Construct + // Binding The binding thread set for a single region is the current team. A + // single region binds to the innermost enclosing parallel region. + // Description Only one of the encountering threads will execute the + // structured block associated with the single construct. + if (isNestedIn<omp::SingleOp>(parentWorkshare, op)) + return false; + + return true; +} + +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool mustParallelizeOp(Operation *op) { + return op + ->walk([&](Operation *nested) { + // We need to be careful not to pick up workshare.loop_wrapper in nested + // omp.parallel{omp.workshare} regions, i.e. make sure that `nested` + // binds to the workshare region we are currently handling. + // + // For example: + // + // omp.parallel { + // omp.workshare { // currently handling this + // omp.parallel { + // omp.workshare { // nested workshare + // omp.workshare.loop_wrapper {} + // + // Therefore, we skip if we encounter a nested omp.workshare. + if (isa<omp::WorkshareOp>(op)) + return WalkResult::skip(); + if (isa<omp::WorkshareLoopWrapperOp>(op)) + return WalkResult::interrupt(); + return WalkResult::advance(); + }) + .wasInterrupted(); +} + +static bool isSafeToParallelize(Operation *op) { + return isa<hlfir::DeclareOp>(op) || isa<fir::DeclareOp>(op) || + isMemoryEffectFree(op); +} + +/// Simple shallow copies suffice for our purposes in this pass, so we implement +/// this simpler alternative to the full fledged `createCopyFunc` in the +/// frontend +static mlir::func::FuncOp createCopyFunc(mlir::Location loc, mlir::Type varType, + fir::FirOpBuilder builder) { + mlir::ModuleOp module = builder.getModule(); + auto rt = cast<fir::ReferenceType>(varType); + mlir::Type eleTy = rt.getEleTy(); + std::string copyFuncName = + fir::getTypeAsString(eleTy, builder.getKindMap(), "_workshare_copy"); + + if (auto decl = module.lookupSymbol<mlir::func::FuncOp>(copyFuncName)) + return decl; + // create function + mlir::OpBuilder::InsertionGuard guard(builder); + mlir::OpBuilder modBuilder(module.getBodyRegion()); + llvm::SmallVector<mlir::Type> argsTy = {varType, varType}; + auto funcType = mlir::FunctionType::get(builder.getContext(), argsTy, {}); + mlir::func::FuncOp funcOp = + modBuilder.create<mlir::func::FuncOp>(loc, copyFuncName, funcType); + funcOp.setVisibility(mlir::SymbolTable::Visibility::Private); + builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy, + {loc, loc}); + builder.setInsertionPointToStart(&funcOp.getRegion().back()); + + Value loaded = builder.create<fir::LoadOp>(loc, funcOp.getArgument(0)); + builder.create<fir::StoreOp>(loc, loaded, funcOp.getArgument(1)); + + builder.create<mlir::func::ReturnOp>(loc); + return funcOp; +} + +static bool isUserOutsideSR(Operation *user, Operation *parentOp, + SingleRegion sr) { + while (user->getParentOp() != parentOp) + user = user->getParentOp(); + return sr.begin->getBlock() != user->getBlock() || + !(user->isBeforeInBlock(&*sr.end) && sr.begin->isBeforeInBlock(user)); +} + +static bool isTransitivelyUsedOutside(Value v, SingleRegion sr) { + Block *srBlock = sr.begin->getBlock(); + Operation *parentOp = srBlock->getParentOp(); + + for (auto &use : v.getUses()) { + Operation *user = use.getOwner(); + if (isUserOutsideSR(user, parentOp, sr)) + return true; + + // Results of nested users cannot be used outside of the SR + if (user->getBlock() != srBlock) + continue; + + // A non-safe to parallelize operation will be handled separately + if (!isSafeToParallelize(user)) + continue; + + for (auto res : user->getResults()) + if (isTransitivelyUsedOutside(res, sr)) + return true; + } + return false; +} + +/// We clone pure operations in both the parallel and single blocks. this +/// functions cleans them up if they end up with no uses +static void cleanupBlock(Block *block) { + for (Operation &op : llvm::make_early_inc_range( + llvm::make_range(block->rbegin(), block->rend()))) + if (isOpTriviallyDead(&op)) + op.erase(); +} ---------------- tblah wrote:
We don't tend to do this in individual passes - instead leaving it for whenever we next get around to running canonicalization. Is there something particularly dangerous about keeping around these dead operations after the end of the pass? If I remember correctly, `isOpTriviallyDead` shouldn't be expensive because MLIR already maintains the list of operation uses - so this probably isn't a big deal. https://github.com/llvm/llvm-project/pull/101446 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits