Author: Alex Zinenko Date: 2020-12-08T10:43:35+01:00 New Revision: 2fe30a3534dad9f982a3d840b4bfa4870b2ba5bc
URL: https://github.com/llvm/llvm-project/commit/2fe30a3534dad9f982a3d840b4bfa4870b2ba5bc DIFF: https://github.com/llvm/llvm-project/commit/2fe30a3534dad9f982a3d840b4bfa4870b2ba5bc.diff LOG: [mlir] properly support min/max in affine parallelization The existing implementation of the affine parallelization silently copies over the lower and upper bound maps from affine.for to affine.parallel. However, the semantics of these maps differ between these two ops: in affine.for, a max(min) of results is taken for the lower(upper) bound; in affine.parallel, multiple induction variables can be defined an each result corresponds to one induction variable. Thus the existing implementation could generate invalid IR or IR that passes the verifier but has different semantics than the original code. Fix the parallelization utility to emit dedicated min/max operations before the affine.parallel in such cases. Disallow parallelization if min/max would have been in an operation without the AffineScope trait, e.g., in another loop, since the result of these operations is not considered a valid affine dimension identifier and may not be properly handled by the affine analyses. Reviewed By: wsmoses Differential Revision: https://reviews.llvm.org/D92763 Added: Modified: mlir/lib/Dialect/Affine/Utils/Utils.cpp mlir/test/Dialect/Affine/parallelize.mlir Removed: ################################################################################ diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp index 7892dfbc7a48..e5f5a6d8998f 100644 --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -134,11 +134,43 @@ static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) { void mlir::affineParallelize(AffineForOp forOp) { Location loc = forOp.getLoc(); OpBuilder outsideBuilder(forOp); + + // If a loop has a 'max' in the lower bound, emit it outside the parallel loop + // as it does not have implicit 'max' behavior. + AffineMap lowerBoundMap = forOp.getLowerBoundMap(); + ValueRange lowerBoundOperands = forOp.getLowerBoundOperands(); + AffineMap upperBoundMap = forOp.getUpperBoundMap(); + ValueRange upperBoundOperands = forOp.getUpperBoundOperands(); + + bool needsMax = lowerBoundMap.getNumResults() > 1; + bool needsMin = upperBoundMap.getNumResults() > 1; + AffineMap identityMap; + if (needsMax || needsMin) { + if (forOp->getParentOp() && + !forOp->getParentOp()->hasTrait<OpTrait::AffineScope>()) + return; + + identityMap = AffineMap::getMultiDimIdentityMap(1, loc->getContext()); + } + if (needsMax) { + auto maxOp = outsideBuilder.create<AffineMaxOp>(loc, lowerBoundMap, + lowerBoundOperands); + lowerBoundMap = identityMap; + lowerBoundOperands = maxOp->getResults(); + } + + // Same for the upper bound. + if (needsMin) { + auto minOp = outsideBuilder.create<AffineMinOp>(loc, upperBoundMap, + upperBoundOperands); + upperBoundMap = identityMap; + upperBoundOperands = minOp->getResults(); + } + // Creating empty 1-D affine.parallel op. AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>( - loc, llvm::None, llvm::None, forOp.getLowerBoundMap(), - forOp.getLowerBoundOperands(), forOp.getUpperBoundMap(), - forOp.getUpperBoundOperands()); + loc, llvm::None, llvm::None, lowerBoundMap, lowerBoundOperands, + upperBoundMap, upperBoundOperands); // Steal the body of the old affine for op and erase it. newPloop.region().takeBody(forOp.region()); forOp.erase(); diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir index 8e6cb05f46a0..cbc80a092e76 100644 --- a/mlir/test/Dialect/Affine/parallelize.mlir +++ b/mlir/test/Dialect/Affine/parallelize.mlir @@ -114,3 +114,33 @@ func @non_affine_load() { } return } + +// CHECK-LABEL: for_with_minmax +func @for_with_minmax(%m: memref<?xf32>, %lb0: index, %lb1: index, + %ub0: index, %ub1: index) { + // CHECK: %[[lb:.*]] = affine.max + // CHECK: %[[ub:.*]] = affine.min + // CHECK: affine.parallel (%{{.*}}) = (%[[lb]]) to (%[[ub]]) + affine.for %i = max affine_map<(d0, d1) -> (d0, d1)>(%lb0, %lb1) + to min affine_map<(d0, d1) -> (d0, d1)>(%ub0, %ub1) { + affine.load %m[%i] : memref<?xf32> + } + return +} + +// CHECK-LABEL: nested_for_with_minmax +func @nested_for_with_minmax(%m: memref<?xf32>, %lb0: index, + %ub0: index, %ub1: index) { + // CHECK: affine.parallel + affine.for %j = 0 to 10 { + // Cannot parallelize the inner loop because we would need to compute + // affine.max for its lower bound inside the loop, and that is not (yet) + // considered as a valid affine dimension. + // CHECK: affine.for + affine.for %i = max affine_map<(d0, d1) -> (d0, d1)>(%lb0, %j) + to min affine_map<(d0, d1) -> (d0, d1)>(%ub0, %ub1) { + affine.load %m[%i] : memref<?xf32> + } + } + return +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits