https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/155987
>From 0373863b919e59130dcf57593f4283ece0dff12a Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Fri, 29 Aug 2025 02:04:49 -0500 Subject: [PATCH] [flang][OpenMP] Extend `do concurrent` mapping to device Upstreams further parts of `do concurrent` to OpenMP conversion pass from AMD's fork. This PR extends the pass by adding support for mapping to the device. --- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 1 + .../OpenMP/DoConcurrentConversion.cpp | 400 +++++++++++++++++- .../Transforms/DoConcurrent/basic_device.f90 | 83 ++++ .../Transforms/DoConcurrent/basic_device.mlir | 10 +- 4 files changed, 476 insertions(+), 18 deletions(-) create mode 100644 flang/test/Transforms/DoConcurrent/basic_device.f90 diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt index e0aebd0714c8f..b85ee7e861a4f 100644 --- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt @@ -26,6 +26,7 @@ add_flang_library(FlangOpenMPTransforms FIRSupport FortranSupport HLFIRDialect + FortranUtils MLIR_DEPS ${dialect_libs} diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index c928b76065ade..e975b86a6ba0d 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -6,17 +6,22 @@ // //===----------------------------------------------------------------------===// +#include "flang/Optimizer/Builder/DirectivesCommon.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/OpenMP/Utils.h" #include "flang/Support/OpenMP-utils.h" +#include "flang/Utils/OpenMP.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/IRMapping.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" namespace flangomp { #define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS @@ -107,6 +112,33 @@ struct InductionVariableInfo { using InductionVariableInfos = llvm::SmallVector<InductionVariableInfo>; +/// Collect the list of values used inside the loop but defined outside of it. +void collectLoopLiveIns(fir::DoConcurrentLoopOp loop, + llvm::SmallVectorImpl<mlir::Value> &liveIns) { + llvm::SmallDenseSet<mlir::Value> seenValues; + llvm::SmallDenseSet<mlir::Operation *> seenOps; + + for (auto [lb, ub, st] : llvm::zip_equal( + loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) { + liveIns.push_back(lb); + liveIns.push_back(ub); + liveIns.push_back(st); + } + + mlir::visitUsedValuesDefinedAbove( + loop.getRegion(), [&](mlir::OpOperand *operand) { + if (!seenValues.insert(operand->get()).second) + return; + + mlir::Operation *definingOp = operand->get().getDefiningOp(); + // We want to collect ops corresponding to live-ins only once. + if (definingOp && !seenOps.insert(definingOp).second) + return; + + liveIns.push_back(operand->get()); + }); +} + /// Collects values that are local to a loop: "loop-local values". A loop-local /// value is one that is used exclusively inside the loop but allocated outside /// of it. This usually corresponds to temporary values that are used inside the @@ -182,10 +214,6 @@ class DoConcurrentConversion mlir::LogicalResult matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - if (mapToDevice) - return doLoop.emitError( - "not yet implemented: Mapping `do concurrent` loops to device"); - looputils::InductionVariableInfos ivInfos; auto loop = mlir::cast<fir::DoConcurrentLoopOp>( doLoop.getRegion().back().getTerminator()); @@ -196,20 +224,72 @@ class DoConcurrentConversion for (mlir::Value indVar : *indVars) ivInfos.emplace_back(loop, indVar); + llvm::SmallVector<mlir::Value> loopNestLiveIns; + looputils::collectLoopLiveIns(loop, loopNestLiveIns); + assert(!loopNestLiveIns.empty()); + llvm::SetVector<mlir::Value> locals; looputils::collectLoopLocalValues(loop, locals); + // We do not want to map "loop-local" values to the device through + // `omp.map.info` ops. Therefore, we remove them from the list of live-ins. + loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns, + [&](mlir::Value liveIn) { + return locals.contains(liveIn); + }), + loopNestLiveIns.end()); + + mlir::omp::TargetOp targetOp; + mlir::omp::LoopNestOperands loopNestClauseOps; + mlir::IRMapping mapper; + + if (mapToDevice) { + mlir::ModuleOp module = doLoop->getParentOfType<mlir::ModuleOp>(); + bool isTargetDevice = + llvm::cast<mlir::omp::OffloadModuleInterface>(*module) + .getIsTargetDevice(); + + mlir::omp::TargetOperands targetClauseOps; + genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, + loopNestClauseOps, + isTargetDevice ? nullptr : &targetClauseOps); + + LiveInShapeInfoMap liveInShapeInfoMap; + fir::FirOpBuilder builder( + rewriter, + fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>())); + + for (mlir::Value liveIn : loopNestLiveIns) { + targetClauseOps.mapVars.push_back( + genMapInfoOpForLiveIn(builder, liveIn)); + liveInShapeInfoMap.insert( + {liveIn, TargetDeclareShapeCreationInfo(liveIn)}); + } + + targetOp = + genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, + targetClauseOps, loopNestClauseOps, liveInShapeInfoMap); + genTeamsOp(doLoop.getLoc(), rewriter); + } + mlir::omp::ParallelOp parallelOp = genParallelOp(doLoop.getLoc(), rewriter, ivInfos, mapper); - mlir::omp::LoopNestOperands loopNestClauseOps; - genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, - loopNestClauseOps); + + // Only set as composite when part of `distribute parallel do`. + parallelOp.setComposite(mapToDevice); + + if (!mapToDevice) + genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, + loopNestClauseOps); for (mlir::Value local : locals) looputils::localizeLoopLocalValue(local, parallelOp.getRegion(), rewriter); + if (mapToDevice) + genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true); + mlir::omp::LoopNestOp ompLoopNest = genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps, /*isComposite=*/mapToDevice); @@ -244,6 +324,51 @@ class DoConcurrentConversion } private: + struct TargetDeclareShapeCreationInfo { + // Note: We use `std::vector` (rather than `llvm::SmallVector` as usual) to + // interface more easily `ShapeShiftOp::getOrigins()` which returns + // `std::vector`. + std::vector<mlir::Value> startIndices{}; + std::vector<mlir::Value> extents{}; + + TargetDeclareShapeCreationInfo(mlir::Value liveIn) { + mlir::Value shape = nullptr; + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp); + + if (declareOp != nullptr) + shape = declareOp.getShape(); + + if (shape == nullptr) + return; + + auto shapeOp = + mlir::dyn_cast_if_present<fir::ShapeOp>(shape.getDefiningOp()); + auto shapeShiftOp = + mlir::dyn_cast_if_present<fir::ShapeShiftOp>(shape.getDefiningOp()); + + if (shapeOp == nullptr && shapeShiftOp == nullptr) + TODO(liveIn.getLoc(), + "Shapes not defined by `fir.shape` or `fir.shape_shift` op's are" + "not supported yet."); + + if (shapeShiftOp != nullptr) + startIndices = shapeShiftOp.getOrigins(); + + extents = shapeOp != nullptr + ? std::vector<mlir::Value>(shapeOp.getExtents().begin(), + shapeOp.getExtents().end()) + : shapeShiftOp.getExtents(); + } + + bool isShapedValue() const { return !extents.empty(); } + bool isShapeShiftedValue() const { return !startIndices.empty(); } + }; + + using LiveInShapeInfoMap = + llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>; + mlir::omp::ParallelOp genParallelOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, looputils::InductionVariableInfos &ivInfos, @@ -284,11 +409,11 @@ class DoConcurrentConversion return result; } - void - genLoopNestClauseOps(mlir::Location loc, - mlir::ConversionPatternRewriter &rewriter, - fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, - mlir::omp::LoopNestOperands &loopNestClauseOps) const { + void genLoopNestClauseOps( + mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, + mlir::omp::LoopNestOperands &loopNestClauseOps, + mlir::omp::TargetOperands *targetClauseOps = nullptr) const { assert(loopNestClauseOps.loopLowerBounds.empty() && "Loop nest bounds were already emitted!"); @@ -297,11 +422,19 @@ class DoConcurrentConversion bounds.push_back(var.getDefiningOp()->getResult(0)); }; + auto hostEvalCapture = [&](mlir::Value var, + llvm::SmallVectorImpl<mlir::Value> &bounds) { + populateBounds(var, bounds); + + if (targetClauseOps) + targetClauseOps->hostEvalVars.push_back(var); + }; + for (auto [lb, ub, st] : llvm::zip_equal( loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) { - populateBounds(lb, loopNestClauseOps.loopLowerBounds); - populateBounds(ub, loopNestClauseOps.loopUpperBounds); - populateBounds(st, loopNestClauseOps.loopSteps); + hostEvalCapture(lb, loopNestClauseOps.loopLowerBounds); + hostEvalCapture(ub, loopNestClauseOps.loopUpperBounds); + hostEvalCapture(st, loopNestClauseOps.loopSteps); } loopNestClauseOps.loopInclusive = rewriter.getUnitAttr(); @@ -439,6 +572,243 @@ class DoConcurrentConversion return loopNestOp; } + void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value liveIn, + mlir::Value rawAddr, + llvm::SmallVectorImpl<mlir::Value> &boundsOps) const { + fir::ExtendedValue extVal = + hlfir::translateToExtendedValue(rawAddr.getLoc(), builder, + hlfir::Entity{liveIn}, + /*contiguousHint=*/ + true) + .first; + fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( + builder, rawAddr, /*isOptional=*/false, rawAddr.getLoc()); + boundsOps = fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp, + mlir::omp::MapBoundsType>( + builder, info, extVal, + /*dataExvIsAssumedSize=*/false, rawAddr.getLoc()); + } + + mlir::omp::MapInfoOp genMapInfoOpForLiveIn(fir::FirOpBuilder &builder, + mlir::Value liveIn) const { + mlir::Value rawAddr = liveIn; + llvm::StringRef name; + + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp); + + if (declareOp != nullptr) { + // Use the raw address to avoid unboxing `fir.box` values whenever + // possible. Put differently, if we have access to the direct value memory + // reference/address, we use it. + rawAddr = declareOp.getOriginalBase(); + name = declareOp.getUniqName(); + } + + if (!llvm::isa<mlir::omp::PointerLikeType>(rawAddr.getType())) { + builder.setInsertionPointAfter(liveInDefiningOp); + auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal); + rawAddr = copyVal; + } + + mlir::Type liveInType = liveIn.getType(); + mlir::Type eleType = liveInType; + if (auto refType = mlir::dyn_cast<fir::ReferenceType>(liveInType)) + eleType = refType.getElementType(); + + llvm::omp::OpenMPOffloadMappingFlags mapFlag = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + + if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { + captureKind = mlir::omp::VariableCaptureKind::ByCopy; + } else if (!fir::isa_builtin_cptr_type(eleType)) { + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } + + llvm::SmallVector<mlir::Value> boundsOps; + genBoundsOps(builder, liveIn, rawAddr, boundsOps); + + return Fortran::utils::openmp::createMapInfoOp( + builder, liveIn.getLoc(), rawAddr, + /*varPtrPtr=*/{}, name.str(), boundsOps, + /*members=*/{}, + /*membersIndex=*/mlir::ArrayAttr{}, + static_cast< + std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( + mapFlag), + captureKind, rawAddr.getType()); + } + + mlir::omp::TargetOp + genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + mlir::IRMapping &mapper, llvm::ArrayRef<mlir::Value> mappedVars, + mlir::omp::TargetOperands &clauseOps, + mlir::omp::LoopNestOperands &loopNestClauseOps, + const LiveInShapeInfoMap &liveInShapeInfoMap) const { + auto targetOp = rewriter.create<mlir::omp::TargetOp>(loc, clauseOps); + auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp); + + mlir::Region ®ion = targetOp.getRegion(); + + llvm::SmallVector<mlir::Type> regionArgTypes; + llvm::SmallVector<mlir::Location> regionArgLocs; + + for (auto var : llvm::concat<const mlir::Value>(clauseOps.hostEvalVars, + clauseOps.mapVars)) { + regionArgTypes.push_back(var.getType()); + regionArgLocs.push_back(var.getLoc()); + } + + rewriter.createBlock(®ion, {}, regionArgTypes, regionArgLocs); + fir::FirOpBuilder builder( + rewriter, + fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>())); + + // Within the loop, it possible that we discover other values that need to + // mapped to the target region (the shape info values for arrays, for + // example). Therefore, the map block args might be extended and resized. + // Hence, we invoke `argIface.getMapBlockArgs()` every iteration to make + // sure we access the proper vector of data. + int idx = 0; + for (auto [mapInfoOp, mappedVar] : + llvm::zip_equal(clauseOps.mapVars, mappedVars)) { + auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp()); + hlfir::DeclareOp liveInDeclare = + genLiveInDeclare(builder, targetOp, argIface.getMapBlockArgs()[idx], + miOp, liveInShapeInfoMap.at(mappedVar)); + ++idx; + + // TODO If `mappedVar.getDefiningOp()` is a `fir::BoxAddrOp`, we probably + // need to "unpack" the box by getting the defining op of it's value. + // However, we did not hit this case in reality yet so leaving it as a + // todo for now. + + auto mapHostValueToDevice = [&](mlir::Value hostValue, + mlir::Value deviceValue) { + if (!llvm::isa<mlir::omp::PointerLikeType>(hostValue.getType())) + mapper.map(hostValue, + builder.loadIfRef(hostValue.getLoc(), deviceValue)); + else + mapper.map(hostValue, deviceValue); + }; + + mapHostValueToDevice(mappedVar, liveInDeclare.getOriginalBase()); + + if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>( + mappedVar.getDefiningOp())) + mapHostValueToDevice(origDeclareOp.getBase(), liveInDeclare.getBase()); + } + + for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(), + clauseOps.hostEvalVars)) + mapper.map(hostEval, arg); + + for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) { + loopNestClauseOps.loopLowerBounds[i] = + mapper.lookup(loopNestClauseOps.loopLowerBounds[i]); + loopNestClauseOps.loopUpperBounds[i] = + mapper.lookup(loopNestClauseOps.loopUpperBounds[i]); + loopNestClauseOps.loopSteps[i] = + mapper.lookup(loopNestClauseOps.loopSteps[i]); + } + + // Check if cloning the bounds introduced any dependency on the outer + // region. If so, then either clone them as well if they are + // MemoryEffectFree, or else copy them to a new temporary and add them to + // the map and block_argument lists and replace their uses with the new + // temporary. + Fortran::utils::openmp::cloneOrMapRegionOutsiders(builder, targetOp); + rewriter.setInsertionPoint( + rewriter.create<mlir::omp::TerminatorOp>(targetOp.getLoc())); + + return targetOp; + } + + hlfir::DeclareOp genLiveInDeclare( + fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp, + mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp, + const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { + mlir::Type liveInType = liveInArg.getType(); + std::string liveInName = liveInMapInfoOp.getName().has_value() + ? liveInMapInfoOp.getName().value().str() + : std::string(""); + if (fir::isa_ref_type(liveInType)) + liveInType = fir::unwrapRefType(liveInType); + + mlir::Value shape = [&]() -> mlir::Value { + if (!targetShapeCreationInfo.isShapedValue()) + return {}; + + llvm::SmallVector<mlir::Value> extentOperands; + llvm::SmallVector<mlir::Value> startIndexOperands; + + if (targetShapeCreationInfo.isShapeShiftedValue()) { + llvm::SmallVector<mlir::Value> shapeShiftOperands; + + size_t shapeIdx = 0; + for (auto [startIndex, extent] : + llvm::zip_equal(targetShapeCreationInfo.startIndices, + targetShapeCreationInfo.extents)) { + shapeShiftOperands.push_back( + Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, startIndex, + liveInName + ".start_idx.dim" + std::to_string(shapeIdx))); + shapeShiftOperands.push_back( + Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } + + auto shapeShiftType = fir::ShapeShiftType::get( + builder.getContext(), shapeShiftOperands.size() / 2); + return builder.create<fir::ShapeShiftOp>( + liveInArg.getLoc(), shapeShiftType, shapeShiftOperands); + } + + llvm::SmallVector<mlir::Value> shapeOperands; + size_t shapeIdx = 0; + for (auto extent : targetShapeCreationInfo.extents) { + shapeOperands.push_back(Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } + + return builder.create<fir::ShapeOp>(liveInArg.getLoc(), shapeOperands); + }(); + + return builder.create<hlfir::DeclareOp>(liveInArg.getLoc(), liveInArg, + liveInName, shape); + } + + mlir::omp::TeamsOp + genTeamsOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { + auto teamsOp = rewriter.create<mlir::omp::TeamsOp>( + loc, /*clauses=*/mlir::omp::TeamsOperands{}); + + rewriter.createBlock(&teamsOp.getRegion()); + rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc)); + + return teamsOp; + } + + mlir::omp::DistributeOp + genDistributeOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { + auto distOp = rewriter.create<mlir::omp::DistributeOp>( + loc, /*clauses=*/mlir::omp::DistributeOperands{}); + + rewriter.createBlock(&distOp.getRegion()); + return distOp; + } + bool mapToDevice; llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip; mlir::SymbolTable &moduleSymbolTable; diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 new file mode 100644 index 0000000000000..7bce696387646 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -0,0 +1,83 @@ +! Tests mapping of a basic `do concurrent` loop to +! `!$omp target teams distribute parallel do`. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s + +program do_concurrent_basic + implicit none + integer :: a(10) + integer :: i + + ! CHECK-DAG: %[[I_ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "i"} + ! CHECK: %[[I_ORIG_DECL:.*]]:2 = hlfir.declare %[[I_ORIG_ALLOC]] + + ! CHECK-DAG: %[[A_ADDR:.*]] = fir.address_of(@_QFEa) + ! CHECK: %[[A_SHAPE:.*]] = fir.shape %[[A_EXTENT:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[A_ORIG_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]](%[[A_SHAPE]]) + + ! CHECK-NOT: fir.do_loop + + ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 + ! CHECK: %[[HOST_LB:.*]] = fir.convert %[[C1]] : (i32) -> index + ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 + ! CHECK: %[[HOST_UB:.*]] = fir.convert %[[C10]] : (i32) -> index + ! CHECK: %[[HOST_STEP:.*]] = arith.constant 1 : index + + ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1 + ! CHECK: %[[C0:.*]] = arith.constant 0 : index + ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %{{c1.*}} : index + + ! CHECK: %[[A_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index) + ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index) + ! CHECK-SAME: extent(%[[A_EXTENT]] : index) + + ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}}) + ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) + + ! CHECK: omp.target + ! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_UB]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : index, index, index) + ! CHECK-SAME: map_entries( + ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, + ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, + ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, + ! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], + ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]] + + ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]] + ! CHECK: omp.teams { + ! CHECK-NEXT: omp.parallel { + + ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"} + ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + + ! CHECK-NEXT: omp.distribute { + ! CHECK-NEXT: omp.wsloop { + + ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 + ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#0 : !fir.ref<i32> + ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32> + ! CHECK-NEXT: %[[IV_VAL2:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32> + ! CHECK-NEXT: %[[IV_VAL_I64:.*]] = fir.convert %[[IV_VAL2]] : (i32) -> i64 + ! CHECK-NEXT: %[[ARR_ACCESS:.*]] = hlfir.designate %[[A_DEV_DECL]]#0 (%[[IV_VAL_I64]]) : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32> + ! CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref<i32> + ! CHECK-NEXT: omp.yield + ! CHECK-NEXT: } + + ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } + do concurrent (i=1:10) + a(i) = i + end do + + ! CHECK-NOT: fir.do_loop +end program do_concurrent_basic diff --git a/flang/test/Transforms/DoConcurrent/basic_device.mlir b/flang/test/Transforms/DoConcurrent/basic_device.mlir index 0ca48943864c8..fa511c3d46d58 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.mlir +++ b/flang/test/Transforms/DoConcurrent/basic_device.mlir @@ -1,4 +1,4 @@ -// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" -verify-diagnostics %s +// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" %s -o - | FileCheck %s func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_basic"} { %2 = fir.address_of(@_QFEa) : !fir.ref<!fir.array<10xi32>> @@ -11,8 +11,12 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas %8 = fir.convert %c10_i32 : (i32) -> index %c1 = arith.constant 1 : index - // expected-error@+2 {{not yet implemented: Mapping `do concurrent` loops to device}} - // expected-error@below {{failed to legalize operation 'fir.do_concurrent'}} + // CHECK: omp.target + // CHECK: omp.teams + // CHECK: omp.parallel + // CHECK: omp.distribute + // CHECK: omp.wsloop + // CHECK: omp.loop_nest fir.do_concurrent { %0 = fir.alloca i32 {bindc_name = "i"} %1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits