================ @@ -283,13 +316,166 @@ mlir::Value ReductionProcessor::createScalarCombiner( return reductionOp; } +/// Create reduction combiner region for reduction variables which are boxed +/// arrays +static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, + ReductionProcessor::ReductionIdentifier redId, + fir::BaseBoxType boxTy, mlir::Value lhs, + mlir::Value rhs) { + fir::SequenceType seqTy = + mlir::dyn_cast_or_null<fir::SequenceType>(boxTy.getEleTy()); + // TODO: support allocatable arrays: !fir.box<!fir.heap<!fir.array<...>>> + if (!seqTy || seqTy.hasUnknownShape()) + TODO(loc, "Unsupported boxed type in OpenMP reduction"); + + // load fir.ref<fir.box<...>> + mlir::Value lhsAddr = lhs; + lhs = builder.create<fir::LoadOp>(loc, lhs); + rhs = builder.create<fir::LoadOp>(loc, rhs); + + const unsigned rank = seqTy.getDimension(); + llvm::SmallVector<mlir::Value> extents; + extents.reserve(rank); + llvm::SmallVector<mlir::Value> lbAndExtents; + lbAndExtents.reserve(rank * 2); + + // Get box lowerbounds and extents: + mlir::Type idxTy = builder.getIndexType(); + for (unsigned i = 0; i < rank; ++i) { + // TODO: ideally we want to hoist box reads out of the critical section. + // We could do this by having box dimensions in block arguments like + // OpenACC does + mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); + auto dimInfo = + builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, lhs, dim); + extents.push_back(dimInfo.getExtent()); + lbAndExtents.push_back(dimInfo.getLowerBound()); + lbAndExtents.push_back(dimInfo.getExtent()); + } + + auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank); + auto shapeShift = + builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents); + + // Iterate over array elements, applying the equivalent scalar reduction: + + // A hlfir::elemental here gets inlined with a temporary so create the + // loop nest directly. + // This function already controls all of the code in this region so we + // know this won't miss any opportuinties for clever elemental inlining + hlfir::LoopNest nest = + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + builder.setInsertionPointToStart(nest.innerLoop.getBody()); + mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); + auto lhsEleAddr = builder.create<fir::ArrayCoorOp>( + loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, + nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{}); + auto rhsEleAddr = builder.create<fir::ArrayCoorOp>( + loc, refTy, rhs, shapeShift, /*slice=*/mlir::Value{}, + nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{}); + auto lhsEle = builder.create<fir::LoadOp>(loc, lhsEleAddr); + auto rhsEle = builder.create<fir::LoadOp>(loc, rhsEleAddr); + mlir::Value scalarReduction = ReductionProcessor::createScalarCombiner( + builder, loc, redId, refTy, lhsEle, rhsEle); + builder.create<fir::StoreOp>(loc, scalarReduction, lhsEleAddr); + + builder.setInsertionPointAfter(nest.outerLoop); + builder.create<mlir::omp::YieldOp>(loc, lhsAddr); +} + +// generate combiner region for reduction operations +static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, + ReductionProcessor::ReductionIdentifier redId, + mlir::Type ty, mlir::Value lhs, mlir::Value rhs, + bool isByRef) { + ty = fir::unwrapRefType(ty); + + if (fir::isa_trivial(ty)) { + mlir::Value lhsLoaded = builder.loadIfRef(loc, lhs); + mlir::Value rhsLoaded = builder.loadIfRef(loc, rhs); + + mlir::Value result = ReductionProcessor::createScalarCombiner( + builder, loc, redId, ty, lhsLoaded, rhsLoaded); + if (isByRef) { + builder.create<fir::StoreOp>(loc, result, lhs); + builder.create<mlir::omp::YieldOp>(loc, lhs); + } else { + builder.create<mlir::omp::YieldOp>(loc, result); + } + return; + } + // all arrays should have been boxed + if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) { + genBoxCombiner(builder, loc, redId, boxTy, lhs, rhs); + return; + } + + TODO(loc, "OpenMP genCombiner for unsupported reduction variable type"); +} + +static mlir::Value +createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc, + const ReductionProcessor::ReductionIdentifier redId, + mlir::Type type, bool isByRef) { + mlir::Type ty = fir::unwrapRefType(type); + mlir::Value initValue = ReductionProcessor::getReductionInitValue( + loc, fir::unwrapSeqOrBoxedSeqType(ty), redId, builder); + + if (fir::isa_trivial(ty)) { + if (isByRef) { + mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty); + builder.createStoreWithConvert(loc, initValue, alloca); + return alloca; + } + // by val + return initValue; + } + + // all arrays are boxed + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) { + assert(isByRef && "passing arrays by value is unsupported"); + // TODO: support allocatable arrays: !fir.box<!fir.heap<!fir.array<...>>> + mlir::Type innerTy = fir::extractSequenceType(boxTy); + if (!mlir::isa<fir::SequenceType>(innerTy)) + TODO(loc, "Unsupported boxed type for reduction"); + // Create the private copy from the initial fir.box: + hlfir::Entity source = hlfir::Entity{builder.getBlock()->getArgument(0)}; + + // from hlfir::createTempFromMold() but with the allocation changed to + // use alloca so that we don't have to free it ---------------- kiranchandramohan wrote:
That should come as part of support for delayed privatization. The privatization region will have an alloc and dealloc regions. So you can skip this for now. https://github.com/llvm/llvm-project/pull/84958 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits