================ @@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) { op.erase(); } +/// Create a new for loop for the remaining iterations (partiaIteration) +/// after a for loop has been peeled. This is followed by correcting the +/// loop bounds for both loops given the index (splitBound) where the +/// iteration space is to be split up. +static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp, + scf::ForOp &partialIteration, + Value &splitBound) { + RewriterBase::InsertionGuard guard(b); + auto lbInt = getConstantIntValue(forOp.getLowerBound()); + auto ubInt = getConstantIntValue(forOp.getUpperBound()); + auto stepInt = getConstantIntValue(forOp.getStep()); + + // No specialization necessary if step already divides upper bound evenly. + if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0) + return failure(); + // No specialization necessary if step size is 1. + if (stepInt == static_cast<int64_t>(1)) + return failure(); + + // Create ForOp for partial iteration. + b.setInsertionPointAfter(forOp); + partialIteration = cast<scf::ForOp>(b.clone(*forOp.getOperation())); + partialIteration.getLowerBoundMutable().assign(splitBound); + forOp.replaceAllUsesWith(partialIteration->getResults()); + partialIteration.getInitArgsMutable().assign(forOp->getResults()); + + // Set new upper loop bound. + b.updateRootInPlace( + forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); }); + + return success(); +} + +/// Convert single-iteration for loop to if-else block. +static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) { + Location loc = forOp->getLoc(); + IRMapping mapping; + mapping.map(forOp.getInductionVar(), forOp.getLowerBound()); + for (auto [arg, operand] : + llvm::zip_equal(forOp.getRegionIterArgs(), forOp.getInitsMutable())) { + mapping.map(arg, operand.get()); + } + b.setInsertionPoint(forOp); + auto cond = + b.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, + forOp.getLowerBound(), forOp.getUpperBound()); + auto ifOp = b.create<scf::IfOp>(loc, forOp->getResultTypes(), cond, true); + // then branch + SmallVector<Value> bbArgReplacements; + bbArgReplacements.push_back(forOp.getLowerBound()); + llvm::append_range(bbArgReplacements, forOp.getInitArgs()); + + b.inlineBlockBefore(forOp.getBody(), ifOp.thenBlock(), + ifOp.thenBlock()->begin(), bbArgReplacements); + // else branch + b.setInsertionPointToStart(ifOp.elseBlock()); + if (!forOp->getResultTypes().empty()) { + b.create<scf::YieldOp>(loc, forOp.getInits()); + } + b.replaceOp(forOp, ifOp->getResults()); + return ifOp; +} + +/// Rewrite a for loop with bounds/step that potentially do not divide the +/// iteration space evenly into a chain of for loops where the step is a +/// power of 2 and decreases exponentially across subsequent loops. Helps +/// divide the iteration space across all resulting peeled loops evenly. +/// +/// Optionally, convert all single iteration for loops to if-else +/// blocks when convert_single_iter_loops_to_if attribute is set to true or +/// alternatively with the convert-single-iter-loops-to-if option for the +/// scf-for-loop-continuous-peeling pass. +static LogicalResult continuousPeelForLoop(RewriterBase &b, ForOp forOp, + ForOp &partialIteration, + bool convertSingleIterLoopsToIf) { + + scf::ForOp currentLoop; + auto lbInt = getConstantIntValue(forOp.getLowerBound()); + auto stepInt = getConstantIntValue(forOp.getStep()); + + // Step size must be a known positive constant greater than 1. + if (!stepInt || stepInt <= static_cast<int64_t>(1)) + return failure(); + + Value initialUb = forOp.getUpperBound(); + Value initialStep = forOp.getStep(); + uint64_t loopStep = *stepInt; + currentLoop = forOp; + AffineExpr sym0, sym1, sym2; + bindSymbols(b.getContext(), sym0, sym1, sym2); + AffineMap defaultSplitMap = + AffineMap::get(0, 3, {sym1 - ((sym1 - sym0) % sym2)}); + AffineMap powerSplitMap = AffineMap::get(0, 3, {sym1 - (sym1 % sym2)}); + bool usePowerSplit = (lbInt.has_value()) && + (*lbInt % *stepInt == static_cast<int64_t>(0)) && + (loopStep == llvm::bit_floor(loopStep)); + AffineMap splitMap = usePowerSplit ? powerSplitMap : defaultSplitMap; + SmallVector<scf::ForOp> loops; + while (loopStep) { + b.setInsertionPoint(currentLoop); + auto constStepOp = + b.create<arith::ConstantIndexOp>(currentLoop.getLoc(), loopStep); + b.updateRootInPlace(currentLoop, [&]() { + currentLoop.getStepMutable().assign(constStepOp); + }); + b.setInsertionPoint(currentLoop); + Value splitBound = b.createOrFold<affine::AffineApplyOp>( + currentLoop.getLoc(), splitMap, + ValueRange{currentLoop.getLowerBound(), currentLoop.getUpperBound(), + currentLoop.getStep()}); + LogicalResult status = + splitLoopHelper(b, currentLoop, partialIteration, splitBound); + + // Canonicalize min/max affine operations + // It uses scf::rewritePeeledMinMaxOp to identify operations to be replaced, + // they are then replaced by the current step size. + // TODO: Alternative method - update affine map to reflect the loop step + // Example: min(ub - iv, 8) -> min(ub - iv, 4) + currentLoop.walk([&](Operation *affineOp) { + if (isa<AffineMinOp, AffineMaxOp>(affineOp)) { + FailureOr<AffineApplyOp> result = scf::rewritePeeledMinMaxOp( + b, affineOp, currentLoop.getInductionVar(), initialUb, initialStep, + /*insideLoop=*/true); + // correct the step of the newly created affine op + if (!failed(result)) + b.replaceOp(result.value(), currentLoop.getStep()); ---------------- matthias-springer wrote:
I don't understand this part. There is no guarantee that `affineOp` looks like `min(ub - iv, 8)`. It could be an arbitrary affine.min/affine.max op. https://github.com/llvm/llvm-project/pull/71555 _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits