llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mlir-llvm Author: Michael Kruse (Meinersbur) <details> <summary>Changes</summary> Add the `omp.tile` loop transformations for the OpenMP dialect. Used for lowering a standalone `!$omp tile` in Flang. The pretty operation printing format is currently inconcistent between `omp.canonical_loop`, `omp.unroll_heuristic`, and `omp.tile`. Open for suggestions what the concistent format should be. PR Stack: * #<!-- -->160283 * #<!-- -->159773 * #<!-- -->160292 (this PR) * #<!-- -->160298 --- Patch is 45.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160292.diff 13 Files Affected: - (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td (+29) - (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td (+67-2) - (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+25-38) - (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+154) - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+42) - (added) mlir/test/Dialect/OpenMP/cli-tile.mlir (+138) - (added) mlir/test/Dialect/OpenMP/invalid-tile.mlir (+119) - (added) mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir (+101) - (added) mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir (+190) - (modified) mlir/test/mlir-tblgen/op-format-invalid.td (+1-1) - (modified) mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp (+1) - (modified) mlir/tools/mlir-tblgen/FormatGen.cpp (+1-1) - (modified) mlir/tools/mlir-tblgen/OpFormatGen.cpp (+1) ``````````diff diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 1eda5e4bc1618..8e43c4284d078 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -995,6 +995,35 @@ class OpenMP_NumTeamsClauseSkip< def OpenMP_NumTeamsClause : OpenMP_NumTeamsClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.1: [10.1.2] `sizes` clause +//===----------------------------------------------------------------------===// + +class OpenMP_SizesClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause<traits, arguments, assemblyFormat, description, + extraClassDeclaration> { + let arguments = (ins + Variadic<IntLikeType>:$sizes + ); + + let optAssemblyFormat = [{ + `sizes` `(` $sizes `:` type($sizes) `)` + }]; + + let description = [{ + The `sizes` clauses defines the size of a grid over a multi-dimensional + logical iteration space. This grid is used for loop transformations such as + `tile` and `strip`. The size per dimension can be a variable, but only + values that are not at least 2 make sense. It is not specified what happens + when smaller values are used, but should still result in a loop nest that + executes each logical iteration once. + }]; +} + +def OpenMP_SizesClause : OpenMP_SizesClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [10.1.2] `num_threads` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td index bbcfb87fa03c6..5ad4e4b5b61d1 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td @@ -38,6 +38,44 @@ def OpenMP_MapBoundsType : OpenMP_Type<"MapBounds", "map_bounds_ty"> { let summary = "Type for representing omp map clause bounds information"; } +//===---------------------------------------------------------------------===// +// OpenMP Canonical Loop Info Type +//===---------------------------------------------------------------------===// + +def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> { + let summary = "Type for representing a reference to a canonical loop"; + let description = [{ + A variable of type CanonicalLoopInfo refers to an OpenMP-compatible + canonical loop in the same function. Values of this type are not + available at runtime and therefore cannot be used by the program itself, + i.e. an opaque type. It is similar to the transform dialect's + `!transform.interface` type, but instead of implementing an interface + for each transformation, the OpenMP dialect itself defines possible + operations on this type. + + A value of type CanonicalLoopInfoType (in the following: CLI) value can be + + 1. created by omp.new_cli. + 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI + can only be associated once. + 3. passed to an omp loop transformation operation that modifies the loop + associated with the CLI. The CLI is the "applyee" and the operation is + the consumer. A CLI can only be consumed once. + 4. passed to an omp loop transformation operation to associate the cli with + a result of that transformation. The CLI is the "generatee" and the + operation is the generator. + + A CLI cannot + + 1. be returned from a function. + 2. be passed to operations that are not specifically designed to take a + CanonicalLoopInfoType, including AnyType. + + A CLI directly corresponds to an object of + OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR. + }]; +} + //===----------------------------------------------------------------------===// // Base classes for OpenMP dialect operations. //===----------------------------------------------------------------------===// @@ -211,8 +249,35 @@ class OpenMP_Op<string mnemonic, list<Trait> traits = [], // Doesn't actually create a C++ base class (only defines default values for // tablegen classes that derive from this). Use LoopTransformationInterface // instead for common operations. -class OpenMPTransform_Op<string mnemonic, list<Trait> traits = []> : - OpenMP_Op<mnemonic, !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits) > { +class OpenMPTransform_Op<string mnemonic, + list<Trait> traits = [], + list<OpenMP_Clause> clauses = []> : + OpenMP_Op<mnemonic, + traits = !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits), + clauses = clauses> { +} + +// Base clause for loop transformations using the standard syntax. +// +// omp.opname ($generatees) <- ($applyees) clause(...) clause(...) ... <attr-dicr> +// omp.opname ($applyees) clause(...) clause(...) ... <attr-dict> +// +// $generatees is optional and is assumed to be empty if omitted +class OpenMPTransformBase_Op<string mnemonic, + list<Trait> traits = [], + list<OpenMP_Clause> clauses = []> : + OpenMPTransform_Op<mnemonic, + traits = !listconcat(traits, [AttrSizedOperandSegments]), + clauses = clauses> { + + let arguments = !con( + (ins Variadic<CanonicalLoopInfoType>:$generatees, + Variadic<CanonicalLoopInfoType>:$applyees + ), clausesArgs); + + let assemblyFormat = [{ custom<LoopTransformClis>($generatees, $applyees) }] + # clausesAssemblyFormat + # [{ attr-dict }]; } #endif // OPENMP_OP_BASE diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 5c77e215467e4..b73091ea0ca53 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -357,44 +357,6 @@ def SingleOp : OpenMP_Op<"single", traits = [ let hasVerifier = 1; } -//===---------------------------------------------------------------------===// -// OpenMP Canonical Loop Info Type -//===---------------------------------------------------------------------===// - -def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> { - let summary = "Type for representing a reference to a canonical loop"; - let description = [{ - A variable of type CanonicalLoopInfo refers to an OpenMP-compatible - canonical loop in the same function. Values of this type are not - available at runtime and therefore cannot be used by the program itself, - i.e. an opaque type. It is similar to the transform dialect's - `!transform.interface` type, but instead of implementing an interface - for each transformation, the OpenMP dialect itself defines possible - operations on this type. - - A value of type CanonicalLoopInfoType (in the following: CLI) value can be - - 1. created by omp.new_cli. - 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI - can only be associated once. - 3. passed to an omp loop transformation operation that modifies the loop - associated with the CLI. The CLI is the "applyee" and the operation is - the consumer. A CLI can only be consumed once. - 4. passed to an omp loop transformation operation to associate the cli with - a result of that transformation. The CLI is the "generatee" and the - operation is the generator. - - A CLI cannot - - 1. be returned from a function. - 2. be passed to operations that are not specifically designed to take a - CanonicalLoopInfoType, including AnyType. - - A CLI directly corresponds to an object of - OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR. - }]; -} - //===---------------------------------------------------------------------===// // OpenMP Canonical Loop Info Creation //===---------------------------------------------------------------------===// @@ -563,6 +525,31 @@ def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> { let hasCustomAssemblyFormat = 1; } +//===----------------------------------------------------------------------===// +// OpenMP tile operation +//===----------------------------------------------------------------------===// + +def TileOp : OpenMPTransformBase_Op<"tile", + clauses = [OpenMP_SizesClause]> { + let summary = "OpenMP tile operation"; + let description = [{ + Represents the OpenMP tile directive introduced in OpenMP 5.1. + + The construct partitions the logical iteration space of the affected loops + into equally-sized tiles, then creates two sets of nested loops. The outer + loops, called the grid loops, iterate over all tiles. The inner loops, + called the intratile loops, iterate over the logical iterations of a tile. + The sizes clause determines the size of a tile. + + Currently, the affected loops must be rectangular (the tripcount of the + inner loop must not depend on any iv of an surrounding affected loop) and + perfectly nested (except for the innermost affected loop, no operations + other than the nested loop and the terminator in the loop body). + }] # clausesDescription; + + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // 2.8.3 Workshare Construct //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 1674891410194..f681b0346f489 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/ADT/bit.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Support/InterleavedRange.h" #include <cstddef> #include <iterator> #include <optional> @@ -3299,6 +3300,9 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) { Value result = getResult(); auto [newCli, gen, cons] = decodeCli(result); + // Structured binding `gen` cannot be captured in lambdas before C++20 + OpOperand *generator = gen; + // Derive the CLI variable name from its generator: // * "canonloop" for omp.canonical_loop // * custom name for loop transformation generatees @@ -3317,6 +3321,24 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) { .Case([&](UnrollHeuristicOp op) -> std::string { llvm_unreachable("heuristic unrolling does not generate a loop"); }) + .Case([&](TileOp op) -> std::string { + auto [generateesFirst, generateesCount] = + op.getGenerateesODSOperandIndexAndLength(); + unsigned firstGrid = generateesFirst; + unsigned firstIntratile = generateesFirst + generateesCount / 2; + unsigned end = generateesFirst + generateesCount; + unsigned opnum = generator->getOperandNumber(); + // In the OpenMP apply and looprange clauses, indices are 1-based + if (firstGrid <= opnum && opnum < firstIntratile) { + unsigned gridnum = opnum - firstGrid + 1; + return ("grid" + Twine(gridnum)).str(); + } + if (firstIntratile <= opnum && opnum < end) { + unsigned intratilenum = opnum - firstIntratile + 1; + return ("intratile" + Twine(intratilenum)).str(); + } + llvm_unreachable("Unexpected generatee argument"); + }) .Default([&](Operation *op) { assert(false && "TODO: Custom name for this operation"); return "transformed"; @@ -3545,6 +3567,138 @@ UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() { return {0, 0}; } +//===----------------------------------------------------------------------===// +// TileOp +//===----------------------------------------------------------------------===// + +static void printLoopTransformClis(OpAsmPrinter &p, TileOp op, + OperandRange generatees, + OperandRange applyees) { + if (!generatees.empty()) + p << '(' << llvm::interleaved(generatees) << ')'; + + if (!applyees.empty()) + p << " <- (" << llvm::interleaved(applyees) << ')'; +} + +static ParseResult parseLoopTransformClis( + OpAsmParser &parser, + SmallVectorImpl<OpAsmParser::UnresolvedOperand> &generateesOperands, + SmallVectorImpl<OpAsmParser::UnresolvedOperand> &applyeesOperands) { + if (parser.parseOptionalLess()) { + // Syntax 1: generatees present + + if (parser.parseOperandList(generateesOperands, + mlir::OpAsmParser::Delimiter::Paren)) + return failure(); + + if (parser.parseLess()) + return failure(); + } else { + // Syntax 2: generatees omitted + } + + // Parse `<-` (`<` has already been parsed) + if (parser.parseMinus()) + return failure(); + + if (parser.parseOperandList(applyeesOperands, + mlir::OpAsmParser::Delimiter::Paren)) + return failure(); + + return success(); +} + +LogicalResult TileOp::verify() { + if (getApplyees().empty()) + return emitOpError() << "must apply to at least one loop"; + + if (getSizes().size() != getApplyees().size()) + return emitOpError() << "there must be one tile size for each applyee"; + + if (!getGeneratees().empty() && + 2 * getSizes().size() != getGeneratees().size()) + return emitOpError() + << "expecting two times the number of generatees than applyees"; + + DenseSet<Value> parentIVs; + + Value parent = getApplyees().front(); + for (auto &&applyee : llvm::drop_begin(getApplyees())) { + auto [parentCreate, parentGen, parentCons] = decodeCli(parent); + auto [create, gen, cons] = decodeCli(applyee); + + if (!parentGen) + return emitOpError() << "applyee CLI has no generator"; + + auto parentLoop = dyn_cast_or_null<CanonicalLoopOp>(parentGen->getOwner()); + if (!parentGen) + return emitOpError() + << "currently only supports omp.canonical_loop as applyee"; + + parentIVs.insert(parentLoop.getInductionVar()); + + if (!gen) + return emitOpError() << "applyee CLI has no generator"; + auto loop = dyn_cast_or_null<CanonicalLoopOp>(gen->getOwner()); + if (!loop) + return emitOpError() + << "currently only supports omp.canonical_loop as applyee"; + + // Canonical loop must be perfectly nested, i.e. the body of the parent must + // only contain the omp.canonical_loop of the nested loops, and + // omp.terminator + bool isPerfectlyNested = [&]() { + auto &parentBody = parentLoop.getRegion(); + if (!parentBody.hasOneBlock()) + return false; + auto &parentBlock = parentBody.getBlocks().front(); + + auto nestedLoopIt = parentBlock.begin(); + if (nestedLoopIt == parentBlock.end() || + (&*nestedLoopIt != loop.getOperation())) + return false; + + auto termIt = std::next(nestedLoopIt); + if (termIt == parentBlock.end() || !isa<TerminatorOp>(termIt)) + return false; + + if (std::next(termIt) != parentBlock.end()) + return false; + + return true; + }(); + if (!isPerfectlyNested) + return emitOpError() << "tiled loop nest must be perfectly nested"; + + if (parentIVs.contains(loop.getTripCount())) + return emitOpError() << "tiled loop nest must be rectangular"; + + parent = applyee; + } + + // TODO: The tile sizes must be computed before the loop, but checking this + // requires dominance analysis. For instance: + // + // %canonloop = omp.new_cli + // omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + // // write to %x + // omp.terminator + // } + // %ts = llvm.load %x + // omp.tile <- (%canonloop) sizes(%ts : i32) + + return success(); +} + +std::pair<unsigned, unsigned> TileOp ::getApplyeesODSOperandIndexAndLength() { + return getODSOperandIndexAndLength(odsIndex_applyees); +} + +std::pair<unsigned, unsigned> TileOp::getGenerateesODSOperandIndexAndLength() { + return getODSOperandIndexAndLength(odsIndex_generatees); +} + //===----------------------------------------------------------------------===// // Critical construct (2.17.1) //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 4921a1990b6e8..171ac61dd66fe 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3154,6 +3154,45 @@ applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder, return success(); } +/// Apply a `#pragma omp tile` / `!$omp tile` transformation using the +/// OpenMPIRBuilder. +static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::OpenMPIRBuilder::LocationDescription loc(builder); + + SmallVector<llvm::CanonicalLoopInfo *> translatedLoops; + SmallVector<llvm::Value *> translatedSizes; + + for (Value size : op.getSizes()) { + llvm::Value *translatedSize = moduleTranslation.lookupValue(size); + assert(translatedSize && + "sizes clause arguments must already be translated"); + translatedSizes.push_back(translatedSize); + } + + for (Value applyee : op.getApplyees()) { + llvm::CanonicalLoopInfo *consBuilderCLI = + moduleTranslation.lookupOMPLoop(applyee); + assert(applyee && "Canonical loop must already been translated"); + translatedLoops.push_back(consBuilderCLI); + } + + auto generatedLoops = + ompBuilder->tileLoops(loc.DL, translatedLoops, translatedSizes); + if (!op.getGeneratees().empty()) { + for (auto [mlirLoop, genLoop] : + zip_equal(op.getGeneratees(), generatedLoops)) + moduleTranslation.mapOmpLoop(mlirLoop, genLoop); + } + + // CLIs can only be consumed once + for (Value applyee : op.getApplyees()) + moduleTranslation.invalidateOmpLoop(applyee); + + return success(); +} + /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. static llvm::AtomicOrdering convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) { @@ -6196,6 +6235,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, // the omp.canonical_loop. return applyUnrollHeuristic(op, builder, moduleTranslation); }) + .Case([&](omp::TileOp op) { + return applyTile(op, builder, moduleTranslation); + }) .Case([&](omp::TargetAllocMemOp) { return convertTargetAllocMemOp(*op, builder, moduleTranslation); }) diff --git a/mlir/test/Dialect/OpenMP/cli-tile.mlir b/mlir/test/Dialect/OpenMP/cli-tile.mlir new file mode 100644 index 0000000000000..73d54784c52b7 --- /dev/null +++ b/mlir/test/Dialect/OpenMP/cli-tile.mlir @@ -0,0 +1,138 @@ +// RUN: mlir-opt %s | FileCheck %s --enable-var-scope +// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope + + +// Raw syntax check (MLIR output is always pretty-printed) +// CHECK-LABEL: @omp_tile_raw( +// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) { +func.func @omp_tile_raw(%tc : i32, %ts : i32) -> () { + // CHECK-NEXT: %canonloop = omp.new_cli + %canonloop = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: %grid1 = omp.new_cli + %grid = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: %intratile1 = omp.new_cli + %intratile = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { + "omp.can... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/160292 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits