https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/145257
>From 2f1558ae8c1c90a6091dbc821fd5438f5136b8ae Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache <nico.vasila...@amd.com> Date: Wed, 18 Jun 2025 19:14:31 +0200 Subject: [PATCH 1/2] [mlir][transform] Plumb a simplified form of AffineMin folding into transform.pad-tiling-interface This revision introduces a simple variant of AffineMin folding in makeComposedFoldedAffineApply and makes use of it in transform.pad-tiling-interface. Since this version explicitly call ValueBoundsInterface, it may be too expensive and is only activate behind a flag. It results in better foldings when mixing tiling and padding, including with dynamic shapes. This should be further composed with #145068 to provide full simplification and address the remaining TODO in the test. --- .../mlir/Dialect/Affine/IR/AffineOps.h | 18 ++- .../mlir/Interfaces/ValueBoundsOpInterface.h | 2 +- mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 134 ++++++++++++++---- .../Linalg/Transforms/PadTilingInterface.cpp | 5 +- .../lib/Interfaces/ValueBoundsOpInterface.cpp | 2 +- ...m-op-pad-tiling-interface-multiple-of.mlir | 131 +++++++++++++++++ 6 files changed, 251 insertions(+), 41 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h index 6fdb72c370e6d..2091faa6b0b02 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -410,9 +410,11 @@ void canonicalizeSetAndOperands(IntegerSet *set, /// other AffineApplyOps supplying those operands. The operands of the resulting /// AffineApplyOp do not change the length of AffineApplyOp chains. AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, - ArrayRef<OpFoldResult> operands); + ArrayRef<OpFoldResult> operands, + bool composeAffineMin = false); AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e, - ArrayRef<OpFoldResult> operands); + ArrayRef<OpFoldResult> operands, + bool composeAffineMin = false); /// Constructs an AffineApplyOp that applies `map` to `operands` after composing /// the map with the maps of any other AffineApplyOp supplying the operands, @@ -421,16 +423,19 @@ AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e, /// map. OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, - ArrayRef<OpFoldResult> operands); + ArrayRef<OpFoldResult> operands, + bool composeAffineMin = false); /// Variant of `makeComposedFoldedAffineApply` that applies to an expression. OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineExpr expr, - ArrayRef<OpFoldResult> operands); + ArrayRef<OpFoldResult> operands, + bool composeAffineMin = false); /// Variant of `makeComposedFoldedAffineApply` suitable for multi-result maps. /// Note that this may create as many affine.apply operations as the map has /// results given that affine.apply must be single-result. SmallVector<OpFoldResult> makeComposedFoldedMultiResultAffineApply( - OpBuilder &b, Location loc, AffineMap map, ArrayRef<OpFoldResult> operands); + OpBuilder &b, Location loc, AffineMap map, ArrayRef<OpFoldResult> operands, + bool composeAffineMin = false); /// Returns an AffineMinOp obtained by composing `map` and `operands` with /// AffineApplyOps supplying those operands. @@ -459,7 +464,8 @@ OpFoldResult makeComposedFoldedAffineMax(OpBuilder &b, Location loc, /// terminal symbol, i.e., a symbol defined at the top level or a block/function /// argument. void fullyComposeAffineMapAndOperands(AffineMap *map, - SmallVectorImpl<Value> *operands); + SmallVectorImpl<Value> *operands, + bool composeAffineMin = false); } // namespace affine } // namespace mlir diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h index 337314143c80c..523df173093fa 100644 --- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h +++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h @@ -135,7 +135,7 @@ class ValueBoundsConstraintSet /// Construct a variable for a map and its operands. Variable(AffineMap map, ArrayRef<Variable> mapOperands); - Variable(AffineMap map, ArrayRef<Value> mapOperands); + Variable(AffineMap map, ValueRange mapOperands); MLIRContext *getContext() const { return map.getContext(); } diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index 3d09c6a9b2c24..06b7910736727 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -11,12 +11,14 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/UB/IR/UBOps.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" +#include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineExprVisitor.h" #include "mlir/IR/IRMapping.h" #include "mlir/IR/IntegerSet.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Value.h" #include "mlir/Interfaces/ShapedOpInterfaces.h" #include "mlir/Interfaces/ValueBoundsOpInterface.h" #include "mlir/Transforms/InliningUtils.h" @@ -26,7 +28,9 @@ #include "llvm/ADT/SmallVectorExtras.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/LogicalResult.h" #include "llvm/Support/MathExtras.h" +#include <limits> #include <numeric> #include <optional> @@ -1042,6 +1046,59 @@ simplifyMapWithOperands(AffineMap &map, ArrayRef<Value> operands) { map.getContext()); } +/// Assuming `dimOrSym` is a quantity in `map` that is defined by `minOp`, +/// replaces the patterns: +/// ``` +/// dimOrSym.ceildiv(cst) * cst +/// (dimOrSym + cst - 1).floordiv(cst) * cst +/// ``` +/// by `cst` in `map`. +/// This simplification is valid iff `minOp` is guaranteed to be nonnegative. +/// Additionally, allows the caller to pass `affineMinKnownToBeNonNegative` to +/// inject static information that may not be statically discoverable. +/// Warning: ValueBoundsConstraintSet::computeConstantBound is needed to check +/// for the nonnegative case, if `affineMinKnownToBeNonNegative` is false. +static LogicalResult replaceAffineMinBoundingBoxExpression( + AffineMinOp minOp, AffineExpr dimOrSym, AffineMap *map, + bool affineMinKnownToBeNonNegative = false) { + auto affineMinMap = minOp.getAffineMap(); + if (!affineMinKnownToBeNonNegative) { + ValueRange values = minOp->getOperands(); + for (unsigned i = 0, e = affineMinMap.getNumResults(); i < e; ++i) { + AffineMap row = affineMinMap.getSubMap(ArrayRef<unsigned>{i}); + FailureOr<int64_t> lowerBound = + ValueBoundsConstraintSet::computeConstantBound( + presburger::BoundType::LB, {row, values}, + /*stopCondition=*/nullptr, + /*closedUB=*/true); + if (failed(lowerBound) || lowerBound.value() < 0) + return failure(); + } + } + + AffineMap initialMap = *map; + for (unsigned i = 0, e = affineMinMap.getNumResults(); i != e; ++i) { + auto m = affineMinMap.getSubMap(ArrayRef<unsigned>{i}); + // TODO: this should also work with nonnegative symbolic divisors. + if (!m.isSingleConstant()) + continue; + + auto cst = m.getSingleConstantResult(); + DenseMap<AffineExpr, AffineExpr> repl; + // dimOrSym.ceilDiv(cst) * cst -> cst + repl[dimOrSym.ceilDiv(cst) * cst] = + getAffineConstantExpr(cst, minOp.getContext()); + // (dimOrSym + cst - 1).floorDiv(cst) * cst -> cst + repl[(dimOrSym + cst - 1).floorDiv(cst) * cst] = + getAffineConstantExpr(cst, minOp.getContext()); + auto newMap = map->replace(repl); + if (newMap == *map) + continue; + *map = newMap; + } + return success(*map != initialMap); +} + /// Replace all occurrences of AffineExpr at position `pos` in `map` by the /// defining AffineApplyOp expression and operands. /// When `dimOrSymbolPosition < dims.size()`, AffineDimExpr@[pos] is replaced. @@ -1052,10 +1109,13 @@ simplifyMapWithOperands(AffineMap &map, ArrayRef<Value> operands) { /// 2. `map` dim and symbols are gradually shifted to higher positions. /// 3. Old `dim` and `sym` entries are replaced by nullptr /// This avoids the need for any bookkeeping. +/// If `replaceAffineMin` is set to true, additionally triggers more expensive +/// replacements involving affine_min operations. static LogicalResult replaceDimOrSym(AffineMap *map, unsigned dimOrSymbolPosition, SmallVectorImpl<Value> &dims, - SmallVectorImpl<Value> &syms) { + SmallVectorImpl<Value> &syms, + bool replaceAffineMin) { MLIRContext *ctx = map->getContext(); bool isDimReplacement = (dimOrSymbolPosition < dims.size()); unsigned pos = isDimReplacement ? dimOrSymbolPosition @@ -1064,6 +1124,13 @@ static LogicalResult replaceDimOrSym(AffineMap *map, if (!v) return failure(); + auto minOp = v.getDefiningOp<AffineMinOp>(); + if (minOp && replaceAffineMin) { + AffineExpr dimOrSym = isDimReplacement ? getAffineDimExpr(pos, ctx) + : getAffineSymbolExpr(pos, ctx); + return replaceAffineMinBoundingBoxExpression(minOp, dimOrSym, map); + } + auto affineApply = v.getDefiningOp<AffineApplyOp>(); if (!affineApply) return failure(); @@ -1101,7 +1168,8 @@ static LogicalResult replaceDimOrSym(AffineMap *map, /// iteratively. Perform canonicalization of map and operands as well as /// AffineMap simplification. `map` and `operands` are mutated in place. static void composeAffineMapAndOperands(AffineMap *map, - SmallVectorImpl<Value> *operands) { + SmallVectorImpl<Value> *operands, + bool composeAffineMin = false) { if (map->getNumResults() == 0) { canonicalizeMapAndOperands(map, operands); *map = simplifyAffineMap(*map); @@ -1122,7 +1190,8 @@ static void composeAffineMapAndOperands(AffineMap *map, while (true) { bool changed = false; for (unsigned pos = 0; pos != dims.size() + syms.size(); ++pos) - if ((changed |= succeeded(replaceDimOrSym(map, pos, dims, syms)))) + if ((changed |= + succeeded(replaceDimOrSym(map, pos, dims, syms, composeAffineMin)))) break; if (!changed) break; @@ -1163,38 +1232,41 @@ static void composeAffineMapAndOperands(AffineMap *map, } void mlir::affine::fullyComposeAffineMapAndOperands( - AffineMap *map, SmallVectorImpl<Value> *operands) { + AffineMap *map, SmallVectorImpl<Value> *operands, bool composeAffineMin) { while (llvm::any_of(*operands, [](Value v) { return isa_and_nonnull<AffineApplyOp>(v.getDefiningOp()); })) { - composeAffineMapAndOperands(map, operands); + composeAffineMapAndOperands(map, operands, composeAffineMin); } } AffineApplyOp mlir::affine::makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, - ArrayRef<OpFoldResult> operands) { + ArrayRef<OpFoldResult> operands, + bool composeAffineMin) { SmallVector<Value> valueOperands; map = foldAttributesIntoMap(b, map, operands, valueOperands); - composeAffineMapAndOperands(&map, &valueOperands); + composeAffineMapAndOperands(&map, &valueOperands, composeAffineMin); assert(map); return b.create<AffineApplyOp>(loc, map, valueOperands); } AffineApplyOp mlir::affine::makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e, - ArrayRef<OpFoldResult> operands) { + ArrayRef<OpFoldResult> operands, + bool composeAffineMin) { return makeComposedAffineApply( b, loc, AffineMap::inferFromExprList(ArrayRef<AffineExpr>{e}, b.getContext()) .front(), - operands); + operands, composeAffineMin); } /// Composes the given affine map with the given list of operands, pulling in /// the maps from any affine.apply operations that supply the operands. static void composeMultiResultAffineMap(AffineMap &map, - SmallVectorImpl<Value> &operands) { + SmallVectorImpl<Value> &operands, + bool composeAffineMin = false) { // Compose and canonicalize each expression in the map individually because // composition only applies to single-result maps, collecting potentially // duplicate operands in a single list with shifted dimensions and symbols. @@ -1203,7 +1275,8 @@ static void composeMultiResultAffineMap(AffineMap &map, for (unsigned i : llvm::seq<unsigned>(0, map.getNumResults())) { SmallVector<Value> submapOperands(operands.begin(), operands.end()); AffineMap submap = map.getSubMap({i}); - fullyComposeAffineMapAndOperands(&submap, &submapOperands); + fullyComposeAffineMapAndOperands(&submap, &submapOperands, + composeAffineMin); canonicalizeMapAndOperands(&submap, &submapOperands); unsigned numNewDims = submap.getNumDims(); submap = submap.shiftDims(dims.size()).shiftSymbols(symbols.size()); @@ -1221,10 +1294,9 @@ static void composeMultiResultAffineMap(AffineMap &map, canonicalizeMapAndOperands(&map, &operands); } -OpFoldResult -mlir::affine::makeComposedFoldedAffineApply(OpBuilder &b, Location loc, - AffineMap map, - ArrayRef<OpFoldResult> operands) { +OpFoldResult mlir::affine::makeComposedFoldedAffineApply( + OpBuilder &b, Location loc, AffineMap map, ArrayRef<OpFoldResult> operands, + bool composeAffineMin) { assert(map.getNumResults() == 1 && "building affine.apply with !=1 result"); // Create new builder without a listener, so that no notification is @@ -1236,7 +1308,7 @@ mlir::affine::makeComposedFoldedAffineApply(OpBuilder &b, Location loc, // Create op. AffineApplyOp applyOp = - makeComposedAffineApply(newBuilder, loc, map, operands); + makeComposedAffineApply(newBuilder, loc, map, operands, composeAffineMin); // Get constant operands. SmallVector<Attribute> constOperands(applyOp->getNumOperands()); @@ -1256,26 +1328,25 @@ mlir::affine::makeComposedFoldedAffineApply(OpBuilder &b, Location loc, return llvm::getSingleElement(foldResults); } -OpFoldResult -mlir::affine::makeComposedFoldedAffineApply(OpBuilder &b, Location loc, - AffineExpr expr, - ArrayRef<OpFoldResult> operands) { +OpFoldResult mlir::affine::makeComposedFoldedAffineApply( + OpBuilder &b, Location loc, AffineExpr expr, + ArrayRef<OpFoldResult> operands, bool composeAffineMin) { return makeComposedFoldedAffineApply( b, loc, AffineMap::inferFromExprList(ArrayRef<AffineExpr>{expr}, b.getContext()) .front(), - operands); + operands, composeAffineMin); } SmallVector<OpFoldResult> mlir::affine::makeComposedFoldedMultiResultAffineApply( - OpBuilder &b, Location loc, AffineMap map, - ArrayRef<OpFoldResult> operands) { - return llvm::map_to_vector(llvm::seq<unsigned>(0, map.getNumResults()), - [&](unsigned i) { - return makeComposedFoldedAffineApply( - b, loc, map.getSubMap({i}), operands); - }); + OpBuilder &b, Location loc, AffineMap map, ArrayRef<OpFoldResult> operands, + bool composeAffineMin) { + return llvm::map_to_vector( + llvm::seq<unsigned>(0, map.getNumResults()), [&](unsigned i) { + return makeComposedFoldedAffineApply(b, loc, map.getSubMap({i}), + operands, composeAffineMin); + }); } template <typename OpTy> @@ -3024,7 +3095,8 @@ void AffineIfOp::build(OpBuilder &builder, OperationState &result, /// `set` by composing the maps of such affine.apply ops with the integer /// set constraints. static void composeSetAndOperands(IntegerSet &set, - SmallVectorImpl<Value> &operands) { + SmallVectorImpl<Value> &operands, + bool composeAffineMin) { // We will simply reuse the API of the map composition by viewing the LHSs of // the equalities and inequalities of `set` as the affine exprs of an affine // map. Convert to equivalent map, compose, and convert back to set. @@ -3035,7 +3107,7 @@ static void composeSetAndOperands(IntegerSet &set, [](Value v) { return v.getDefiningOp<AffineApplyOp>(); })) return; - composeAffineMapAndOperands(&map, &operands); + composeAffineMapAndOperands(&map, &operands, composeAffineMin); set = IntegerSet::get(map.getNumDims(), map.getNumSymbols(), map.getResults(), set.getEqFlags()); } @@ -3044,7 +3116,7 @@ static void composeSetAndOperands(IntegerSet &set, LogicalResult AffineIfOp::fold(FoldAdaptor, SmallVectorImpl<OpFoldResult> &) { auto set = getIntegerSet(); SmallVector<Value, 4> operands(getOperands()); - composeSetAndOperands(set, operands); + composeSetAndOperands(set, operands, /*composeAffineMin=*/false); canonicalizeSetAndOperands(&set, &operands); // Check if the canonicalization or composition led to any change. diff --git a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp index 5383ae48aeb3a..42dac0776bace 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp @@ -84,7 +84,7 @@ SmallVector<OpFoldResult> linalg::computePaddedShape( getDimsToSize(rewriter, indexingSizes, options); // For each dimension in the operand's shape, iterate over indexingSizes and - // add + // add the various term contributions. for (const auto &enResults : enumerate(indexingMap.getResults())) { int64_t resultIndex = enResults.index(); AffineMap partialIndexingMap = indexingMap.getSubMap( @@ -122,7 +122,8 @@ SmallVector<OpFoldResult> linalg::computePaddedShape( AffineMap composedMap = projectedMap.compose(ceilMap); OpFoldResult paddingDimOfr = affine::makeComposedFoldedAffineApply( rewriter, loc, composedMap, - {indexingSizes[paddingDim], paddingSize}); + {indexingSizes[paddingDim], paddingSize}, + /*composeAffineMin=*/true); terms.push_back(paddingDimOfr); } else { // Otherwise just set to paddingSize. diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index 87f883c2e6485..d858ab3a6406a 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -146,7 +146,7 @@ ValueBoundsConstraintSet::Variable::Variable(AffineMap map, } ValueBoundsConstraintSet::Variable::Variable(AffineMap map, - ArrayRef<Value> mapOperands) + ValueRange mapOperands) : Variable(map, llvm::map_to_vector(mapOperands, [](Value v) { return Variable(v); })) {} diff --git a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir index 5ac35c14be3fb..4fcbcbb2a18e3 100644 --- a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir @@ -136,3 +136,134 @@ module { } } } + +// ----- + +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<()[s0, s1] -> (-s1 + (s0 ceildiv 16) * 16)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 16)> + +// CHECK-LABEL: pad_lhs +func.func @pad_lhs( + %arg0: tensor<24x?xf32>, %arg1: tensor<?x25xf32>, %arg2: tensor<24x25xf32>) + -> tensor<24x25xf32> +{ + // CHECK: %[[D0_0:.*]] = tensor.dim + // CHECK: %[[D0_1:.*]] = tensor.dim + // CHECK: %[[H0:.*]] = affine.apply #[[$MAP0]]()[%[[D0_0]], %[[D0_1]]] + // CHECK: tensor.pad %{{.*}} low[0, 0] high[0, %[[H0]]] + // CHECK: : tensor<24x?xf32> to tensor<24x?xf32> + + // CHECK: %[[D0_2:.*]] = tensor.dim + // CHECK: %[[H1:.*]] = affine.apply #[[$MAP0]]()[%[[D0_0]], %[[D0_2]]] + // CHECK: tensor.pad %{{.*}} low[0, 0] high[%[[H1]], 0] + // CHECK: : tensor<?x25xf32> to tensor<?x25xf32> + // CHECK: scf.for %{{.*}} -> (tensor<24x25xf32>) + + // TODO: Not yet simplified enough.. + // CHECK: linalg.matmul ins(%{{.*}}, %{{.*}}: tensor<8x?xf32>, tensor<?x25xf32>) outs(%extracted_slice_5 : tensor<8x25xf32>) -> tensor<8x25xf32> + + // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0] [8, 25] [1, 1] + // CHECK-SAME: : tensor<8x25xf32> into tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x?xf32>, tensor<?x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + + // Pad then tile should produce static shapes. + %matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul to padding_sizes [8, 16] pad_to_multiple_of { + padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], + padding_dimensions=[0, 2] + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + + %m, %l0, %l1 = transform.structured.tile_using_for %matmul_padded tile_sizes [8, 0, 16] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 16)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0) -> (-d0 + 16)> + +// CHECK-LABEL: pad_lhs +func.func @pad_lhs( + %arg0: tensor<24x?xf32>, %arg1: tensor<?x25xf32>, %arg2: tensor<24x25xf32>) + -> tensor<24x25xf32> +{ + // CHECK: scf.for %{{.*}} -> (tensor<24x25xf32>) + // CHECK: %[[MIN:.*]] = affine.min #[[$MAP0]](%{{.*}}) + // CHECK: %[[H0:.*]] = affine.apply #[[$MAP1]](%[[MIN]]) + // CHECK: tensor.pad %{{.*}} low[0, 0] high[0, %[[H0]]] + // CHECK: : tensor<8x?xf32> to tensor<8x16xf32> + + // CHECK: %[[H1:.*]] = affine.apply #[[$MAP1]](%[[MIN]]) + // CHECK: tensor.pad %{{.*}} low[0, 0] high[%[[H1]], 0] + // CHECK: : tensor<?x25xf32> to tensor<16x25xf32> + + // CHECK: linalg.matmul ins(%{{.*}}, %{{.*}} : tensor<8x16xf32>, tensor<16x25xf32>) outs(%{{.*}} : tensor<8x25xf32>) -> tensor<8x25xf32> + + // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0] [8, 25] [1, 1] + // CHECK-SAME: : tensor<8x25xf32> into tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x?xf32>, tensor<?x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + + // Tile then pad should produce static shapes. + %m, %l0, %l1 = transform.structured.tile_using_for %matmul tile_sizes [8, 0, 16] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + + %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 16] pad_to_multiple_of { + padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], + padding_dimensions=[0, 2] + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> (-d0 + 20, 8)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 16)> +// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0) -> (-d0 + 8)> +// CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0) -> (-d0 + 16)> + +// CHECK-LABEL: pad_lhs +func.func @pad_lhs( + %arg0: tensor<20x?xf32>, %arg1: tensor<?x25xf32>, %arg2: tensor<20x25xf32>) + -> tensor<20x25xf32> +{ + // CHECK: linalg.matmul ins(%{{.*}}, %{{.*}} : tensor<8x16xf32>, tensor<16x25xf32>) outs(%{{.*}} : tensor<8x25xf32>) -> tensor<8x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<20x?xf32>, tensor<?x25xf32>) outs(%arg2 : tensor<20x25xf32>) -> tensor<20x25xf32> + func.return %0 : tensor<20x25xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + + // Tile then pad should produce static shapes. + %m, %l0, %l1 = transform.structured.tile_using_for %matmul tile_sizes [8, 0, 16] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + + %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 16] pad_to_multiple_of { + padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], + padding_dimensions=[0, 2] + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + + transform.yield + } +} + >From 512be3adf3bda6baeb41da93a94fd235c1eb39b9 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache <nico.vasila...@amd.com> Date: Sun, 22 Jun 2025 14:24:48 +0200 Subject: [PATCH 2/2] [mlir][transform] Drop redundant padding_dimensions spec from pad_tiling_interface This revision aligns padding specification in pad_tiling_interface to that of tiling specification. Dimensions that should be skipped are specified by "padding by 0". Trailing dimensions that are ignored are automatically completed to "pad to 0". --- .../Linalg/TransformOps/LinalgTransformOps.td | 25 +++++++---- .../TransformOps/LinalgTransformOps.cpp | 21 +-------- .../Linalg/Transforms/PadTilingInterface.cpp | 45 +++++++++---------- ...m-op-pad-tiling-interface-multiple-of.mlir | 28 +++++------- .../transform-op-pad-tiling-interface.mlir | 12 ++--- 5 files changed, 54 insertions(+), 77 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index cf3f2b70580da..c5650470fdc8d 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -1195,17 +1195,29 @@ def PadTilingInterfaceOp : Op<Transform_Dialect, "structured.pad_tiling_interfac TransformOpInterface, ReportTrackingListenerFailuresOpTrait]> { let description = [{ - Pads the operations pointed to by the target handle using the options - provided as operation attributes. The operation returns a handle to the - padded operation and to the padding operation ("tensor.pad"). + Pads the **iteration domain** of the operations pointed to by the target + handle using the options provided as operation attributes. Padding the + iteration domain induces a padding of the operands that is consistent + across the op semantics and, unlike for simple elementwise ops, may not be + trivially deducible or specifiable on operands only (e.g. convolutions). + + The specification of `padding_sizes` follows that of `tile_sizes` during + tiling: the value "0" on a particular iterator encode "no padding". Like in + the case of tiling, an automatic completion by 0 to the operation rank + occurs. + + This transformation returns a handle to the padded operation and to the + padding operation ("tensor.pad"). TODO: in the future this should be moved out of a specific Linalg implementation file and into a more general "Structured" file. #### Return modes - This operation ignores non-Linalg ops and drops them in the return. - In the future, this operation will support all TilingInterfaceOps. + This operation ignores non-IndexingMapOpInterface ops and drops them in the + return. In the future, this operation will support all TilingInterfaceOps + for which the contract between iteration domain and operands can be + reified. This operation may produce a definite failure if the padding fails for any reason. @@ -1219,7 +1231,6 @@ def PadTilingInterfaceOp : Op<Transform_Dialect, "structured.pad_tiling_interfac let arguments = (ins TransformHandleTypeInterface:$target, DefaultValuedAttr<ArrayAttr, "{}">:$padding_values, - DefaultValuedAttr<I64ArrayAttr, "{}">:$padding_dimensions, Variadic<TransformAnyParamTypeOrAnyHandle>:$padding_sizes, DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">: $static_padding_sizes, @@ -1245,11 +1256,9 @@ def PadTilingInterfaceOp : Op<Transform_Dialect, "structured.pad_tiling_interfac // add/mul ring at the moment. // TODO: support other operations (e.g. min, max etc). OpBuilder<(ins "Value":$target, - "ArrayRef<int64_t>":$paddingDimensions, CArg<"ArrayRef<int64_t>", "{}">:$staticPaddingSizes, CArg<"bool", "false">:$padToMultipleOf)>, OpBuilder<(ins "Value":$target, - "ArrayRef<int64_t>":$paddingDimensions, "ArrayRef<OpFoldResult>":$mixedPadPaddingSizes, CArg<"bool", "false">:$usePrescribedTensorShapes)> ]; diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 5d55adbf46f36..d9a0ba02f4fe4 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -2163,7 +2163,6 @@ LogicalResult transform::PadOp::verify() { void transform::PadTilingInterfaceOp::build(OpBuilder &b, OperationState &result, Value target, - ArrayRef<int64_t> paddingDimensions, ArrayRef<int64_t> paddingSizes, bool padToMultipleOf) { auto resultType = transform::AnyOpType::get(b.getContext()); @@ -2172,7 +2171,6 @@ void transform::PadTilingInterfaceOp::build(OpBuilder &b, /*types=*/TypeRange{resultType, resultType}, /*target=*/target, /*paddingValues=*/ArrayAttr(), // let inference handle this - /*paddingDimensions=*/b.getI64ArrayAttr(paddingDimensions), /*paddingSizes=*/ValueRange{}, /*paddingSizes=*/ (paddingSizes.empty() ? DenseI64ArrayAttr() @@ -2183,7 +2181,6 @@ void transform::PadTilingInterfaceOp::build(OpBuilder &b, void transform::PadTilingInterfaceOp::build( OpBuilder &b, OperationState &result, Value target, - ArrayRef<int64_t> paddingDimensions, ArrayRef<OpFoldResult> mixedPaddingSizes, bool padToMultipleOf) { auto resultType = transform::AnyOpType::get(b.getContext()); SmallVector<int64_t> staticPaddingSizes; @@ -2195,7 +2192,6 @@ void transform::PadTilingInterfaceOp::build( /*types=*/TypeRange{resultType, resultType}, /*target=*/target, /*paddingValues=*/ArrayAttr(), // let inference handle this - /*paddingDimensions=*/b.getI64ArrayAttr(paddingDimensions), /*paddingSizes=*/dynamicPaddingSizes, /*paddingSizes=*/staticPaddingSizes, /*usePrescribedTensorShapes=*/padToMultipleOf); @@ -2277,8 +2273,6 @@ transform::PadTilingInterfaceOp::apply(transform::TransformRewriter &rewriter, TilingInterface paddedOp; PadTilingInterfaceOptions options; options.setPaddingValues(paddingValues) - .setPaddingDimensions( - extractFromIntegerArrayAttr<int64_t>(getPaddingDimensions())) .setPaddingSizes(getMixedPaddingSizes()) .setPadToMultipleOf(getPadToMultipleOf()); @@ -2303,20 +2297,7 @@ transform::PadTilingInterfaceOp::apply(transform::TransformRewriter &rewriter, return DiagnosedSilenceableFailure::success(); } -LogicalResult transform::PadTilingInterfaceOp::verify() { - SmallVector<int64_t> paddingDimensions = - extractFromIntegerArrayAttr<int64_t>(getPaddingDimensions()); - if (any_of(paddingDimensions, - [](int64_t paddingDimension) { return paddingDimension < 0; })) { - return emitOpError() << "expects padding_dimensions to contain positive " - "integers, found " - << getPaddingDimensions(); - } - if (getMixedPaddingSizes().size() != paddingDimensions.size()) { - return emitOpError() << "expects as many multiples as padding_dimensions"; - } - return success(); -} +LogicalResult transform::PadTilingInterfaceOp::verify() { return success(); } //===---------------------------------------------------------------------===// // HoistPadOp diff --git a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp index 42dac0776bace..eda3373b4d639 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp @@ -32,29 +32,27 @@ using namespace mlir::tensor; #define DBGSNL() (llvm::dbgs() << "\n") /// Form a "full-rank" padding specification so that the application is easy. -static llvm::SmallDenseMap<int64_t, OpFoldResult> -getDimsToSize(Builder &b, ArrayRef<OpFoldResult> indexingSizes, - const PadTilingInterfaceOptions &options) { - llvm::SmallDenseMap<int64_t, OpFoldResult> dimsToSize; - for (const auto &[paddingDim, paddingSize] : - llvm::zip_equal(options.paddingDimensions, options.paddingSizes)) { - dimsToSize[paddingDim] = paddingSize; - } +static SmallVector<OpFoldResult> +getFullRankPaddingSizes(Builder &b, ArrayRef<OpFoldResult> indexingSizes, + const PadTilingInterfaceOptions &options) { + SmallVector<OpFoldResult> paddingSizes; // Complete the padding specification to specify all dimensions. - for (int64_t idx = 0, e = indexingSizes.size(); idx != e; ++idx) { - if (dimsToSize.find(idx) != dimsToSize.end()) - continue; - // If a dimension is not specified, either complete with: + for (size_t idx = 0, e = indexingSizes.size(); idx != e; ++idx) { + // Complete to zero if needed. + paddingSizes.push_back(options.paddingSizes.size() > idx + ? options.paddingSizes[idx] + : b.getIndexAttr(0)); + // If a dimension is zero (either specified or completed), replace by: // - 1 if we are padding to the next multiple of. // - indexingSizes[idx] otherwise - dimsToSize[idx] = - options.padToMultipleOf ? b.getIndexAttr(1) : indexingSizes[idx]; - } - for (int64_t idx = 0, e = indexingSizes.size(); idx != e; ++idx) { - LLVM_DEBUG(DBGS() << "----idx: " << idx << " : " << dimsToSize[idx] + if (isZeroInteger(paddingSizes[idx])) { + paddingSizes[idx] = + options.padToMultipleOf ? b.getIndexAttr(1) : indexingSizes[idx]; + } + LLVM_DEBUG(DBGS() << "----idx: " << idx << " : " << paddingSizes[idx] << "\n"); } - return dimsToSize; + return paddingSizes; } /// Compute the padded shape of the given value `v` of `RankedTensorType` given @@ -80,8 +78,8 @@ SmallVector<OpFoldResult> linalg::computePaddedShape( "rank"); // "Full-rank" padding specification. - llvm::SmallDenseMap<int64_t, OpFoldResult> dimsToSize = - getDimsToSize(rewriter, indexingSizes, options); + SmallVector<OpFoldResult> paddingSizes = + getFullRankPaddingSizes(rewriter, indexingSizes, options); // For each dimension in the operand's shape, iterate over indexingSizes and // add the various term contributions. @@ -97,7 +95,9 @@ SmallVector<OpFoldResult> linalg::computePaddedShape( // Find all padding dimensions that contribute to this operand dimension // and compute the padded term contribution to the final padded shape. SmallVector<OpFoldResult> terms; - for (const auto &[paddingDim, paddingSize] : dimsToSize) { + for (size_t paddingDim = 0, e = paddingSizes.size(); paddingDim != e; + ++paddingDim) { + OpFoldResult paddingSize = paddingSizes[paddingDim]; LLVM_DEBUG(DBGS() << "------try apply padding of dim: " << paddingDim << " to: " << paddingSize << "\n"); if (!enResults.value().isFunctionOfDim(paddingDim)) @@ -224,9 +224,6 @@ linalg::rewriteAsPaddedOp(RewriterBase &rewriter, TilingInterface opToPad, SmallVector<tensor::PadOp> &padOps, PadSizeComputationFunction computePaddingSizeFun) { LLVM_DEBUG(DBGS() << "Start rewriteAsPaddedOp : " << opToPad << "\n"); - assert(constOptions.paddingSizes.size() == - constOptions.paddingDimensions.size() && - "invalid number of elements in padToMultipleOf"); Location loc = opToPad.getLoc(); PadTilingInterfaceOptions options(constOptions); diff --git a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir index 4fcbcbb2a18e3..2bba309953570 100644 --- a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir @@ -36,8 +36,7 @@ module attributes {transform.with_named_sequence} { // Tile to 5 then pad to 8 (supposedly to better hit vector ops). %matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul tile_sizes [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) %matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul_l1 to padding_sizes [8] pad_to_multiple_of { - padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], - padding_dimensions=[0] + padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) transform.yield @@ -73,9 +72,8 @@ module { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [3, 5] pad_to_multiple_of { - padding_dimensions = [0, 2], - padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32] + %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [3, 0, 5] pad_to_multiple_of { + padding_values = [0.0 : f32, 0.0 : f32, 0.0 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) transform.yield } @@ -128,9 +126,8 @@ module { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [3, 5] pad_to_multiple_of { - padding_dimensions = [0, 2], - padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32] + %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [3, 0, 5] pad_to_multiple_of { + padding_values = [0.0 : f32, 0.0 : f32, 0.0 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) transform.yield } @@ -174,9 +171,8 @@ module attributes {transform.with_named_sequence} { : (!transform.any_op) -> !transform.any_op // Pad then tile should produce static shapes. - %matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul to padding_sizes [8, 16] pad_to_multiple_of { - padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], - padding_dimensions=[0, 2] + %matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul to padding_sizes [8, 0, 16] pad_to_multiple_of { + padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) %m, %l0, %l1 = transform.structured.tile_using_for %matmul_padded tile_sizes [8, 0, 16] @@ -223,9 +219,8 @@ module attributes {transform.with_named_sequence} { %m, %l0, %l1 = transform.structured.tile_using_for %matmul tile_sizes [8, 0, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 16] pad_to_multiple_of { - padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], - padding_dimensions=[0, 2] + %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 0, 16] pad_to_multiple_of { + padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) transform.yield @@ -258,9 +253,8 @@ module attributes {transform.with_named_sequence} { %m, %l0, %l1 = transform.structured.tile_using_for %matmul tile_sizes [8, 0, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 16] pad_to_multiple_of { - padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], - padding_dimensions=[0, 2] + %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 0, 16] pad_to_multiple_of { + padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) transform.yield diff --git a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface.mlir b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface.mlir index f0a410fa4015f..26c03ed309c05 100644 --- a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface.mlir @@ -18,8 +18,7 @@ module attributes {transform.with_named_sequence} { : (!transform.any_op) -> (!transform.any_op, !transform.any_op) %fill_padded, %_ = transform.structured.pad_tiling_interface %fill_l1 to padding_sizes [8] { - padding_values=[0.0 : f32, 0.0 : f32], - padding_dimensions=[0] + padding_values=[0.0 : f32, 0.0 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) transform.yield @@ -55,8 +54,7 @@ module attributes {transform.with_named_sequence} { // Tile to 5 then pad to 8 (supposedly to better hit vector ops). %matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul tile_sizes [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) %matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul_l1 to padding_sizes [8] { - padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], - padding_dimensions=[0] + padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) transform.yield @@ -91,8 +89,7 @@ module { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [8, 14] { - padding_dimensions = [0, 2], + %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [8, 0, 14] { padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) transform.yield @@ -147,8 +144,7 @@ module { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [8, 14] { - padding_dimensions = [0, 2], + %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [8, 0, 14] { padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) transform.yield _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits