[llvm-branch-commits] [flang] [llvm] [openmp] [Flang] Add standalone tile support (PR #160298)

Michael Kruse via llvm-branch-commits Wed, 24 Sep 2025 04:20:03 -0700

https://github.com/Meinersbur updated 
https://github.com/llvm/llvm-project/pull/160298


>From bfe9c6b642ebc01f113dbf0a574e424e83f7162a Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-proj...@meinersbur.de>
Date: Tue, 23 Sep 2025 15:33:52 +0200
Subject: [PATCH 1/3] [flang] Add standalone tile support

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |  13 +
 flang/lib/Lower/OpenMP/ClauseProcessor.h      |   2 +
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 360 ++++++++++++------
 flang/lib/Lower/OpenMP/Utils.cpp              |  23 +-
 flang/lib/Lower/OpenMP/Utils.h                |   7 +
 .../lib/Semantics/check-directive-structure.h |   7 +-
 flang/lib/Semantics/check-omp-structure.cpp   |   8 +-
 flang/lib/Semantics/resolve-directives.cpp    |  16 +-
 flang/test/Lower/OpenMP/tile01.f90            |  58 +++
 flang/test/Lower/OpenMP/tile02.f90            |  88 +++++
 .../loop-transformation-construct02.f90       |   5 +-
 flang/test/Parser/OpenMP/tile-fail.f90        |  32 ++
 flang/test/Parser/OpenMP/tile.f90             |  15 +-
 flang/test/Semantics/OpenMP/tile01.f90        |  26 ++
 flang/test/Semantics/OpenMP/tile02.f90        |  15 +
 flang/test/Semantics/OpenMP/tile03.f90        |  15 +
 flang/test/Semantics/OpenMP/tile04.f90        |  38 ++
 flang/test/Semantics/OpenMP/tile05.f90        |  14 +
 flang/test/Semantics/OpenMP/tile06.f90        |  44 +++
 flang/test/Semantics/OpenMP/tile07.f90        |  35 ++
 flang/test/Semantics/OpenMP/tile08.f90        |  15 +
 llvm/include/llvm/Frontend/OpenMP/OMP.td      |   3 +
 openmp/runtime/test/transform/tile/intfor.f90 |  31 ++
 .../runtime/test/transform/tile/intfor_2d.f90 |  53 +++
 .../transform/tile/intfor_2d_varsizes.F90     |  60 +++
 25 files changed, 841 insertions(+), 142 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/tile01.f90
 create mode 100644 flang/test/Lower/OpenMP/tile02.f90
 create mode 100644 flang/test/Parser/OpenMP/tile-fail.f90
 create mode 100644 flang/test/Semantics/OpenMP/tile01.f90
 create mode 100644 flang/test/Semantics/OpenMP/tile02.f90
 create mode 100644 flang/test/Semantics/OpenMP/tile03.f90
 create mode 100644 flang/test/Semantics/OpenMP/tile04.f90
 create mode 100644 flang/test/Semantics/OpenMP/tile05.f90
 create mode 100644 flang/test/Semantics/OpenMP/tile06.f90
 create mode 100644 flang/test/Semantics/OpenMP/tile07.f90
 create mode 100644 flang/test/Semantics/OpenMP/tile08.f90
 create mode 100644 openmp/runtime/test/transform/tile/intfor.f90
 create mode 100644 openmp/runtime/test/transform/tile/intfor_2d.f90
 create mode 100644 openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp 
b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index a96884f5680ba..55eda7e3404c1 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -431,6 +431,19 @@ bool ClauseProcessor::processNumTasks(
   return false;
 }
 
+bool ClauseProcessor::processSizes(StatementContext &stmtCtx,
+                                   mlir::omp::SizesClauseOps &result) const {
+  if (auto *clause = findUniqueClause<omp::clause::Sizes>()) {
+    result.sizes.reserve(clause->v.size());
+    for (const ExprTy &vv : clause->v)
+      result.sizes.push_back(fir::getBase(converter.genExprValue(vv, 
stmtCtx)));
+
+    return true;
+  }
+
+  return false;
+}
+
 bool ClauseProcessor::processNumTeams(
     lower::StatementContext &stmtCtx,
     mlir::omp::NumTeamsClauseOps &result) const {
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h 
b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 324ea3c1047a5..9e352fa574a97 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -66,6 +66,8 @@ class ClauseProcessor {
                   mlir::omp::LoopRelatedClauseOps &loopResult,
                   mlir::omp::CollapseClauseOps &collapseResult,
                   llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const;
+  bool processSizes(StatementContext &stmtCtx,
+                    mlir::omp::SizesClauseOps &result) const;
   bool processDevice(lower::StatementContext &stmtCtx,
                      mlir::omp::DeviceClauseOps &result) const;
   bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const;
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 5681be664d450..7812d9fe00be2 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1984,125 +1984,241 @@ genLoopOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return loopOp;
 }
 
-static mlir::omp::CanonicalLoopOp
-genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
-                   semantics::SemanticsContext &semaCtx,
-                   lower::pft::Evaluation &eval, mlir::Location loc,
-                   const ConstructQueue &queue,
-                   ConstructQueue::const_iterator item,
-                   llvm::ArrayRef<const semantics::Symbol *> ivs,
-                   llvm::omp::Directive directive) {
+static void genCanonicalLoopNest(
+    lower::AbstractConverter &converter, lower::SymMap &symTable,
+    semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+    mlir::Location loc, const ConstructQueue &queue,
+    ConstructQueue::const_iterator item, size_t numLoops,
+    llvm::SmallVectorImpl<mlir::omp::CanonicalLoopOp> &loops) {
+  assert(loops.empty() && "Expecting empty list to fill");
+  assert(numLoops >= 1 && "Expecting at least one loop");
+
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
-  assert(ivs.size() == 1 && "Nested loops not yet implemented");
-  const semantics::Symbol *iv = ivs[0];
+  mlir::omp::LoopRelatedClauseOps loopInfo;
+  llvm::SmallVector<const semantics::Symbol *, 3> ivs;
+  collectLoopRelatedInfo(converter, loc, eval, numLoops, loopInfo, ivs);
+  assert(ivs.size() == numLoops &&
+         "Expected to parse as many loop variables as there are loops");
+
+  // Steps that follow:
+  // 1. Emit all of the loop's prologues (compute the tripcount)
+  // 2. Emit omp.canonical_loop nested inside each other (iteratively)
+  // 2.1. In the innermost omp.canonical_loop, emit the loop body prologue (in
+  // the body callback)
+  //
+  // Since emitting prologues and body code is split, remember prologue values
+  // for use when emitting the same loop's epilogues.
+  llvm::SmallVector<mlir::Value> tripcounts;
+  llvm::SmallVector<mlir::Value> clis;
+  llvm::SmallVector<lower::pft::Evaluation *> evals;
+  llvm::SmallVector<mlir::Type> loopVarTypes;
+  llvm::SmallVector<mlir::Value> loopStepVars;
+  llvm::SmallVector<mlir::Value> loopLBVars;
+  llvm::SmallVector<mlir::Value> blockArgs;
+
+  // Step 1: Loop prologues
+  // Computing the trip count must happen before entering the outermost loop
+  lower::pft::Evaluation *innermostEval = &eval.getFirstNestedEvaluation();
+  for ([[maybe_unused]] auto iv : ivs) {
+    if (innermostEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
+      // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct.
+      // Will need to add special cases for this combination.
+      TODO(loc, "DO CONCURRENT as canonical loop not supported");
+    }
+
+    auto &doLoopEval = innermostEval->getFirstNestedEvaluation();
+    evals.push_back(innermostEval);
+
+    // Get the loop bounds (and increment)
+    // auto &doLoopEval = nestedEval.getFirstNestedEvaluation();
+    auto *doStmt = doLoopEval.getIf<parser::NonLabelDoStmt>();
+    assert(doStmt && "Expected do loop to be in the nested evaluation");
+    auto &loopControl = 
std::get<std::optional<parser::LoopControl>>(doStmt->t);
+    assert(loopControl.has_value());
+    auto *bounds = std::get_if<parser::LoopControl::Bounds>(&loopControl->u);
+    assert(bounds && "Expected bounds for canonical loop");
+    lower::StatementContext stmtCtx;
+    mlir::Value loopLBVar = fir::getBase(
+        converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx));
+    mlir::Value loopUBVar = fir::getBase(
+        converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx));
+    mlir::Value loopStepVar = [&]() {
+      if (bounds->step) {
+        return fir::getBase(
+            converter.genExprValue(*semantics::GetExpr(bounds->step), 
stmtCtx));
+      }
 
-  auto &nestedEval = eval.getFirstNestedEvaluation();
-  if (nestedEval.getIf<parser::DoConstruct>()->IsDoConcurrent()) {
-    // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct. Will
-    // need to add special cases for this combination.
-    TODO(loc, "DO CONCURRENT as canonical loop not supported");
+      // If `step` is not present, assume it is `1`.
+      auto intTy = firOpBuilder.getI32Type();
+      return firOpBuilder.createIntegerConstant(loc, intTy, 1);
+    }();
+
+    // Get the integer kind for the loop variable and cast the loop bounds
+    size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size();
+    mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
+    loopVarTypes.push_back(loopVarType);
+    loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar);
+    loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar);
+    loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar);
+    loopLBVars.push_back(loopLBVar);
+    loopStepVars.push_back(loopStepVar);
+
+    // Start lowering
+    mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0);
+    mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1);
+    mlir::Value isDownwards = firOpBuilder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero);
+
+    // Ensure we are counting upwards. If not, negate step and swap lb and ub.
+    mlir::Value negStep =
+        firOpBuilder.create<mlir::arith::SubIOp>(loc, zero, loopStepVar);
+    mlir::Value incr = firOpBuilder.create<mlir::arith::SelectOp>(
+        loc, isDownwards, negStep, loopStepVar);
+    mlir::Value lb = firOpBuilder.create<mlir::arith::SelectOp>(
+        loc, isDownwards, loopUBVar, loopLBVar);
+    mlir::Value ub = firOpBuilder.create<mlir::arith::SelectOp>(
+        loc, isDownwards, loopLBVar, loopUBVar);
+
+    // Compute the trip count assuming lb <= ub. This guarantees that the 
result
+    // is non-negative and we can use unsigned arithmetic.
+    mlir::Value span = firOpBuilder.create<mlir::arith::SubIOp>(
+        loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw);
+    mlir::Value tcMinusOne =
+        firOpBuilder.create<mlir::arith::DivUIOp>(loc, span, incr);
+    mlir::Value tcIfLooping = firOpBuilder.create<mlir::arith::AddIOp>(
+        loc, tcMinusOne, one, ::mlir::arith::IntegerOverflowFlags::nuw);
+
+    // Fall back to 0 if lb > ub
+    mlir::Value isZeroTC = firOpBuilder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::slt, ub, lb);
+    mlir::Value tripcount = firOpBuilder.create<mlir::arith::SelectOp>(
+        loc, isZeroTC, zero, tcIfLooping);
+    tripcounts.push_back(tripcount);
+
+    // Create the CLI handle.
+    auto newcli = firOpBuilder.create<mlir::omp::NewCliOp>(loc);
+    mlir::Value cli = newcli.getResult();
+    clis.push_back(cli);
+
+    innermostEval = &*std::next(innermostEval->getNestedEvaluations().begin());
   }
 
-  // Get the loop bounds (and increment)
-  auto &doLoopEval = nestedEval.getFirstNestedEvaluation();
-  auto *doStmt = doLoopEval.getIf<parser::NonLabelDoStmt>();
-  assert(doStmt && "Expected do loop to be in the nested evaluation");
-  auto &loopControl = std::get<std::optional<parser::LoopControl>>(doStmt->t);
-  assert(loopControl.has_value());
-  auto *bounds = std::get_if<parser::LoopControl::Bounds>(&loopControl->u);
-  assert(bounds && "Expected bounds for canonical loop");
-  lower::StatementContext stmtCtx;
-  mlir::Value loopLBVar = fir::getBase(
-      converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx));
-  mlir::Value loopUBVar = fir::getBase(
-      converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx));
-  mlir::Value loopStepVar = [&]() {
-    if (bounds->step) {
-      return fir::getBase(
-          converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx));
-    }
+  // Step 2: Create nested canoncial loops
+  for (auto i : llvm::seq<size_t>(numLoops)) {
+    bool isInnermost = (i == numLoops - 1);
+    mlir::Type loopVarType = loopVarTypes[i];
+    mlir::Value tripcount = tripcounts[i];
+    mlir::Value cli = clis[i];
+    auto &&eval = evals[i];
+
+    auto ivCallback = [&, i, isInnermost](mlir::Operation *op)
+        -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
+      mlir::Region &region = op->getRegion(0);
+
+      // Create the op's region skeleton (BB taking the iv as argument)
+      firOpBuilder.createBlock(&region, {}, {loopVarType}, {loc});
+      blockArgs.push_back(region.front().getArgument(0));
+
+      // Step 2.1: Emit body prologue code
+      // Compute the translation from logical iteration number to the value of
+      // the loop's iteration variable only in the innermost body. Currently,
+      // loop transformations do not allow any instruction between loops, but
+      // this will change with
+      if (isInnermost) {
+        assert(blockArgs.size() == numLoops &&
+               "Expecting all block args to have been collected by now");
+        for (auto j : llvm::seq<size_t>(numLoops)) {
+          mlir::Value natIterNum = fir::getBase(blockArgs[j]);
+          mlir::Value scaled = firOpBuilder.create<mlir::arith::MulIOp>(
+              loc, natIterNum, loopStepVars[j]);
+          mlir::Value userVal = firOpBuilder.create<mlir::arith::AddIOp>(
+              loc, loopLBVars[j], scaled);
+
+          mlir::OpBuilder::InsertPoint insPt =
+              firOpBuilder.saveInsertionPoint();
+          firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+          mlir::Type tempTy = converter.genType(*ivs[j]);
+          firOpBuilder.restoreInsertionPoint(insPt);
+
+          // Write the loop value into loop variable
+          mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, 
userVal);
+          hlfir::Entity lhs{converter.getSymbolAddress(*ivs[j])};
+          lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs);
+          mlir::Operation *storeOp =
+              hlfir::AssignOp::create(firOpBuilder, loc, cvtVal, lhs);
+          firOpBuilder.setInsertionPointAfter(storeOp);
+        }
+      }
 
-    // If `step` is not present, assume it is `1`.
-    return firOpBuilder.createIntegerConstant(loc, firOpBuilder.getI32Type(),
-                                              1);
-  }();
+      return {ivs[i]};
+    };
 
-  // Get the integer kind for the loop variable and cast the loop bounds
-  size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size();
-  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
-  loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar);
-  loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar);
-  loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar);
-
-  // Start lowering
-  mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0);
-  mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1);
-  mlir::Value isDownwards = mlir::arith::CmpIOp::create(
-      firOpBuilder, loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero);
-
-  // Ensure we are counting upwards. If not, negate step and swap lb and ub.
-  mlir::Value negStep =
-      mlir::arith::SubIOp::create(firOpBuilder, loc, zero, loopStepVar);
-  mlir::Value incr = mlir::arith::SelectOp::create(
-      firOpBuilder, loc, isDownwards, negStep, loopStepVar);
-  mlir::Value lb = mlir::arith::SelectOp::create(firOpBuilder, loc, 
isDownwards,
-                                                 loopUBVar, loopLBVar);
-  mlir::Value ub = mlir::arith::SelectOp::create(firOpBuilder, loc, 
isDownwards,
-                                                 loopLBVar, loopUBVar);
-
-  // Compute the trip count assuming lb <= ub. This guarantees that the result
-  // is non-negative and we can use unsigned arithmetic.
-  mlir::Value span = mlir::arith::SubIOp::create(
-      firOpBuilder, loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw);
-  mlir::Value tcMinusOne =
-      mlir::arith::DivUIOp::create(firOpBuilder, loc, span, incr);
-  mlir::Value tcIfLooping =
-      mlir::arith::AddIOp::create(firOpBuilder, loc, tcMinusOne, one,
-                                  ::mlir::arith::IntegerOverflowFlags::nuw);
-
-  // Fall back to 0 if lb > ub
-  mlir::Value isZeroTC = mlir::arith::CmpIOp::create(
-      firOpBuilder, loc, mlir::arith::CmpIPredicate::slt, ub, lb);
-  mlir::Value tripcount = mlir::arith::SelectOp::create(
-      firOpBuilder, loc, isZeroTC, zero, tcIfLooping);
-
-  // Create the CLI handle.
-  auto newcli = mlir::omp::NewCliOp::create(firOpBuilder, loc);
-  mlir::Value cli = newcli.getResult();
-
-  auto ivCallback = [&](mlir::Operation *op)
-      -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
-    mlir::Region &region = op->getRegion(0);
-
-    // Create the op's region skeleton (BB taking the iv as argument)
-    firOpBuilder.createBlock(&region, {}, {loopVarType}, {loc});
-
-    // Compute the value of the loop variable from the logical iteration 
number.
-    mlir::Value natIterNum = fir::getBase(region.front().getArgument(0));
-    mlir::Value scaled =
-        mlir::arith::MulIOp::create(firOpBuilder, loc, natIterNum, 
loopStepVar);
-    mlir::Value userVal =
-        mlir::arith::AddIOp::create(firOpBuilder, loc, loopLBVar, scaled);
-
-    // Write loop value to loop variable
-    mlir::Operation *storeOp = setLoopVar(converter, loc, userVal, iv);
-
-    firOpBuilder.setInsertionPointAfter(storeOp);
-    return {iv};
-  };
+    // Create the omp.canonical_loop operation
+    auto opGenInfo = OpWithBodyGenInfo(converter, symTable, semaCtx, loc, 
*eval,
+                                       llvm::omp::Directive::OMPD_unknown)
+                         .setGenSkeletonOnly(!isInnermost)
+                         .setClauses(&item->clauses)
+                         .setPrivatize(false)
+                         .setGenRegionEntryCb(ivCallback);
+    auto canonLoop = genOpWithBody<mlir::omp::CanonicalLoopOp>(
+        std::move(opGenInfo), queue, item, tripcount, cli);
+    loops.push_back(canonLoop);
+
+    // Insert next loop nested inside last loop
+    firOpBuilder.setInsertionPoint(
+        canonLoop.getRegion().back().getTerminator());
+  }
 
-  // Create the omp.canonical_loop operation
-  auto canonLoop = genOpWithBody<mlir::omp::CanonicalLoopOp>(
-      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
-                        directive)
-          .setClauses(&item->clauses)
-          .setPrivatize(false)
-          .setGenRegionEntryCb(ivCallback),
-      queue, item, tripcount, cli);
+  firOpBuilder.setInsertionPointAfter(loops.front());
+}
+
+static void genTileOp(Fortran::lower::AbstractConverter &converter,
+                      Fortran::lower::SymMap &symTable,
+                      lower::StatementContext &stmtCtx,
+                      Fortran::semantics::SemanticsContext &semaCtx,
+                      Fortran::lower::pft::Evaluation &eval, mlir::Location 
loc,
+                      const ConstructQueue &queue,
+                      ConstructQueue::const_iterator item) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
-  firOpBuilder.setInsertionPointAfter(canonLoop);
-  return canonLoop;
+  mlir::omp::SizesClauseOps sizesClause;
+  ClauseProcessor cp(converter, semaCtx, item->clauses);
+  cp.processSizes(stmtCtx, sizesClause);
+
+  size_t numLoops = sizesClause.sizes.size();
+  llvm::SmallVector<mlir::omp::CanonicalLoopOp, 3> canonLoops;
+  canonLoops.reserve(numLoops);
+
+  genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item,
+                       numLoops, canonLoops);
+  assert((canonLoops.size() == numLoops) &&
+         "Expecting the predetermined number of loops");
+
+  llvm::SmallVector<mlir::Value, 3> applyees;
+  applyees.reserve(numLoops);
+  for (mlir::omp::CanonicalLoopOp l : canonLoops)
+    applyees.push_back(l.getCli());
+
+  // Emit the associated loops and create a CLI for each affected loop
+  llvm::SmallVector<mlir::Value, 3> gridGeneratees;
+  llvm::SmallVector<mlir::Value, 3> intratileGeneratees;
+  gridGeneratees.reserve(numLoops);
+  intratileGeneratees.reserve(numLoops);
+  for ([[maybe_unused]] auto i : llvm::seq<int>(0, sizesClause.sizes.size())) {
+    auto gridCLI = firOpBuilder.create<mlir::omp::NewCliOp>(loc);
+    gridGeneratees.push_back(gridCLI.getResult());
+    auto intratileCLI = firOpBuilder.create<mlir::omp::NewCliOp>(loc);
+    intratileGeneratees.push_back(intratileCLI.getResult());
+  }
+
+  llvm::SmallVector<mlir::Value, 6> generatees;
+  generatees.reserve(2 * numLoops);
+  generatees.append(gridGeneratees);
+  generatees.append(intratileGeneratees);
+
+  firOpBuilder.create<mlir::omp::TileOp>(loc, generatees, applyees,
+                                         sizesClause.sizes);
 }
 
 static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
@@ -2114,22 +2230,22 @@ static void 
genUnrollOp(Fortran::lower::AbstractConverter &converter,
                         ConstructQueue::const_iterator item) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
-  mlir::omp::LoopRelatedClauseOps loopInfo;
-  llvm::SmallVector<const semantics::Symbol *> iv;
-  collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv);
-
   // Clauses for unrolling not yet implemnted
   ClauseProcessor cp(converter, semaCtx, item->clauses);
   cp.processTODO<clause::Partial, clause::Full>(
       loc, llvm::omp::Directive::OMPD_unroll);
 
   // Emit the associated loop
-  auto canonLoop =
-      genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item,
-                         iv, llvm::omp::Directive::OMPD_unroll);
+  llvm::SmallVector<mlir::omp::CanonicalLoopOp, 1> canonLoops;
+  genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, 1,
+                       canonLoops);
+
+  llvm::SmallVector<mlir::Value, 1> applyees;
+  for (auto &&canonLoop : canonLoops)
+    applyees.push_back(canonLoop.getCli());
 
   // Apply unrolling to it
-  auto cli = canonLoop.getCli();
+  auto cli = llvm::getSingleElement(canonLoops).getCli();
   mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli);
 }
 
@@ -3362,13 +3478,9 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
     newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
                        item);
     break;
-  case llvm::omp::Directive::OMPD_tile: {
-    unsigned version = semaCtx.langOptions().OpenMPVersion;
-    if (!semaCtx.langOptions().OpenMPSimd)
-      TODO(loc, "Unhandled loop directive (" +
-                    llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+  case llvm::omp::Directive::OMPD_tile:
+    genTileOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
-  }
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 83b7ccb1ce0ee..4a392d4944fc8 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -667,6 +667,25 @@ int64_t collectLoopRelatedInfo(
     numCollapse = collapseValue;
   }
 
+  collectLoopRelatedInfo(converter, currentLocation, eval, numCollapse, result,
+                         iv);
+  return numCollapse;
+}
+
+void collectLoopRelatedInfo(
+    lower::AbstractConverter &converter, mlir::Location currentLocation,
+    lower::pft::Evaluation &eval, int64_t numCollapse,
+    mlir::omp::LoopRelatedClauseOps &result,
+    llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
+
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
+  // Collect the loops to collapse.
+  lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation();
+  if (doConstructEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
+    TODO(currentLocation, "Do Concurrent in Worksharing loop construct");
+  }
+
   // Collect sizes from tile directive if present.
   std::int64_t sizesLengthValue = 0l;
   if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) {
@@ -676,7 +695,7 @@ int64_t collectLoopRelatedInfo(
         });
   }
 
-  collapseValue = std::max(collapseValue, sizesLengthValue);
+  std::int64_t collapseValue = std::max(numCollapse, sizesLengthValue);
   std::size_t loopVarTypeSize = 0;
   do {
     lower::pft::Evaluation *doLoop =
@@ -709,8 +728,6 @@ int64_t collectLoopRelatedInfo(
   } while (collapseValue > 0);
 
   convertLoopBounds(converter, currentLocation, result, loopVarTypeSize);
-
-  return numCollapse;
 }
 
 } // namespace omp
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 5f191d89ae205..69499f9c7b621 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -165,6 +165,13 @@ int64_t collectLoopRelatedInfo(
     mlir::omp::LoopRelatedClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
 
+void collectLoopRelatedInfo(
+    lower::AbstractConverter &converter, mlir::Location currentLocation,
+    lower::pft::Evaluation &eval, std::int64_t collapseValue,
+    // const omp::List<omp::Clause> &clauses,
+    mlir::omp::LoopRelatedClauseOps &result,
+    llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
+
 void collectTileSizesFromOpenMPConstruct(
     const parser::OpenMPConstruct *ompCons,
     llvm::SmallVectorImpl<int64_t> &tileSizes,
diff --git a/flang/lib/Semantics/check-directive-structure.h 
b/flang/lib/Semantics/check-directive-structure.h
index b1bf3e550aebc..bd78d3cfe91e7 100644
--- a/flang/lib/Semantics/check-directive-structure.h
+++ b/flang/lib/Semantics/check-directive-structure.h
@@ -383,7 +383,8 @@ class DirectiveStructureChecker : public virtual 
BaseChecker {
       const C &clause, const parser::ScalarIntConstantExpr &i);
 
   void RequiresPositiveParameter(const C &clause,
-      const parser::ScalarIntExpr &i, llvm::StringRef paramName = "parameter");
+      const parser::ScalarIntExpr &i, llvm::StringRef paramName = "parameter",
+      bool allowZero = true);
 
   void OptionalConstantPositiveParameter(
       const C &clause, const std::optional<parser::ScalarIntConstantExpr> &o);
@@ -657,9 +658,9 @@ void DirectiveStructureChecker<D, C, PC, 
ClauseEnumSize>::SayNotMatching(
 template <typename D, typename C, typename PC, std::size_t ClauseEnumSize>
 void DirectiveStructureChecker<D, C, PC,
     ClauseEnumSize>::RequiresPositiveParameter(const C &clause,
-    const parser::ScalarIntExpr &i, llvm::StringRef paramName) {
+    const parser::ScalarIntExpr &i, llvm::StringRef paramName, bool allowZero) 
{
   if (const auto v{GetIntValue(i)}) {
-    if (*v < 0) {
+    if (*v < (allowZero ? 0 : 1)) {
       context_.Say(GetContext().clauseSource,
           "The %s of the %s clause must be "
           "a positive integer expression"_err_en_US,
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index c39daef6b0ea9..ab182c7674062 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -2749,6 +2749,13 @@ void OmpStructureChecker::Enter(const parser::OmpClause 
&x) {
   }
 }
 
+void OmpStructureChecker::Enter(const parser::OmpClause::Sizes &c) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_sizes);
+  for (const parser::Cosubscript &v : c.v)
+    RequiresPositiveParameter(llvm::omp::Clause::OMPC_sizes, v,
+        /*paramName=*/"parameter", /*allowZero=*/false);
+}
+
 // Following clauses do not have a separate node in parse-tree.h.
 CHECK_SIMPLE_CLAUSE(Absent, OMPC_absent)
 CHECK_SIMPLE_CLAUSE(Affinity, OMPC_affinity)
@@ -2790,7 +2797,6 @@ CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch)
 CHECK_SIMPLE_CLAUSE(Partial, OMPC_partial)
 CHECK_SIMPLE_CLAUSE(ProcBind, OMPC_proc_bind)
 CHECK_SIMPLE_CLAUSE(Simd, OMPC_simd)
-CHECK_SIMPLE_CLAUSE(Sizes, OMPC_sizes)
 CHECK_SIMPLE_CLAUSE(Permutation, OMPC_permutation)
 CHECK_SIMPLE_CLAUSE(Uniform, OMPC_uniform)
 CHECK_SIMPLE_CLAUSE(Unknown, OMPC_unknown)
diff --git a/flang/lib/Semantics/resolve-directives.cpp 
b/flang/lib/Semantics/resolve-directives.cpp
index 5f2c9f676099c..2fa6cbc5d8167 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2392,10 +2392,18 @@ void 
OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
 void OmpAttributeVisitor::CheckAssocLoopLevel(
     std::int64_t level, const parser::OmpClause *clause) {
   if (clause && level != 0) {
-    context_.Say(clause->source,
-        "The value of the parameter in the COLLAPSE or ORDERED clause must"
-        " not be larger than the number of nested loops"
-        " following the construct."_err_en_US);
+    switch (clause->Id()) {
+    case llvm::omp::OMPC_sizes:
+      context_.Say(clause->source,
+          "The SIZES clause has more entries than there are nested canonical 
loops."_err_en_US);
+      break;
+    default:
+      context_.Say(clause->source,
+          "The value of the parameter in the COLLAPSE or ORDERED clause must"
+          " not be larger than the number of nested loops"
+          " following the construct."_err_en_US);
+      break;
+    }
   }
 }
 
diff --git a/flang/test/Lower/OpenMP/tile01.f90 
b/flang/test/Lower/OpenMP/tile01.f90
new file mode 100644
index 0000000000000..7603eee4b18d8
--- /dev/null
+++ b/flang/test/Lower/OpenMP/tile01.f90
@@ -0,0 +1,58 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s | FileCheck 
%s
+
+
+subroutine omp_tile01(lb, ub, inc)
+  integer res, i, lb, ub, inc
+
+  !$omp tile sizes(4)
+  do i = lb, ub, inc
+    res = i
+  end do
+  !$omp end tile
+
+end subroutine omp_tile01
+
+
+! CHECK: func.func @_QPomp_tile01(
+! CHECK:    %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"},
+! CHECK:    %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"},
+! CHECK:    %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) {
+! CHECK:         %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:         %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = 
"_QFomp_tile01Ei"}
+! CHECK:         %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = 
"_QFomp_tile01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope 
%[[VAL_0]] {uniq_name = "_QFomp_tile01Einc"} : (!fir.ref<i32>, !fir.dscope) -> 
(!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope 
%[[VAL_0]] {uniq_name = "_QFomp_tile01Elb"} : (!fir.ref<i32>, !fir.dscope) -> 
(!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name 
= "_QFomp_tile01Eres"}
+! CHECK:         %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = 
"_QFomp_tile01Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope 
%[[VAL_0]] {uniq_name = "_QFomp_tile01Eub"} : (!fir.ref<i32>, !fir.dscope) -> 
(!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_8:.*]] = arith.constant 4 : i32
+! CHECK:         %[[VAL_9:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+! CHECK:         %[[VAL_10:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:         %[[VAL_11:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK:         %[[VAL_12:.*]] = arith.constant 0 : i32
+! CHECK:         %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK:         %[[VAL_14:.*]] = arith.cmpi slt, %[[VAL_11]], %[[VAL_12]] : 
i32
+! CHECK:         %[[VAL_15:.*]] = arith.subi %[[VAL_12]], %[[VAL_11]] : i32
+! CHECK:         %[[VAL_16:.*]] = arith.select %[[VAL_14]], %[[VAL_15]], 
%[[VAL_11]] : i32
+! CHECK:         %[[VAL_17:.*]] = arith.select %[[VAL_14]], %[[VAL_10]], 
%[[VAL_9]] : i32
+! CHECK:         %[[VAL_18:.*]] = arith.select %[[VAL_14]], %[[VAL_9]], 
%[[VAL_10]] : i32
+! CHECK:         %[[VAL_19:.*]] = arith.subi %[[VAL_18]], %[[VAL_17]] 
overflow<nuw> : i32
+! CHECK:         %[[VAL_20:.*]] = arith.divui %[[VAL_19]], %[[VAL_16]] : i32
+! CHECK:         %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_13]] 
overflow<nuw> : i32
+! CHECK:         %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_17]] : 
i32
+! CHECK:         %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_12]], 
%[[VAL_21]] : i32
+! CHECK:         %[[VAL_24:.*]] = omp.new_cli
+! CHECK:         omp.canonical_loop(%[[VAL_24]]) %[[VAL_25:.*]] : i32 in 
range(%[[VAL_23]]) {
+! CHECK:           %[[VAL_26:.*]] = arith.muli %[[VAL_25]], %[[VAL_11]] : i32
+! CHECK:           %[[VAL_27:.*]] = arith.addi %[[VAL_9]], %[[VAL_26]] : i32
+! CHECK:           hlfir.assign %[[VAL_27]] to %[[VAL_2]]#0 : i32, 
!fir.ref<i32>
+! CHECK:           %[[VAL_28:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32>
+! CHECK:           hlfir.assign %[[VAL_28]] to %[[VAL_6]]#0 : i32, 
!fir.ref<i32>
+! CHECK:           omp.terminator
+! CHECK:         }
+! CHECK:         %[[VAL_29:.*]] = omp.new_cli
+! CHECK:         %[[VAL_30:.*]] = omp.new_cli
+! CHECK:         omp.tile (%[[VAL_29]], %[[VAL_30]]) <- (%[[VAL_24]]) 
sizes(%[[VAL_8]] : i32)
+! CHECK:         return
+! CHECK:       }
+
diff --git a/flang/test/Lower/OpenMP/tile02.f90 
b/flang/test/Lower/OpenMP/tile02.f90
new file mode 100644
index 0000000000000..5df506d17ed05
--- /dev/null
+++ b/flang/test/Lower/OpenMP/tile02.f90
@@ -0,0 +1,88 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s | FileCheck 
%s
+
+
+subroutine omp_tile02(lb, ub, inc)
+  integer res, i, lb, ub, inc
+
+  !$omp tile sizes(3,7)
+  do i = lb, ub, inc
+    do j = lb, ub, inc
+      res = i + j
+    end do
+  end do
+  !$omp end tile
+
+end subroutine omp_tile02
+
+
+! CHECK: func.func @_QPomp_tile02(
+! CHECK:    %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"},
+! CHECK:    %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"},
+! CHECK:    %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) {
+! CHECK:         %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:         %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = 
"_QFomp_tile02Ei"}
+! CHECK:         %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = 
"_QFomp_tile02Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope 
%[[VAL_0]] {uniq_name = "_QFomp_tile02Einc"} : (!fir.ref<i32>, !fir.dscope) -> 
(!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = 
"_QFomp_tile02Ej"}
+! CHECK:         %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = 
"_QFomp_tile02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope 
%[[VAL_0]] {uniq_name = "_QFomp_tile02Elb"} : (!fir.ref<i32>, !fir.dscope) -> 
(!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name 
= "_QFomp_tile02Eres"}
+! CHECK:         %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = 
"_QFomp_tile02Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope 
%[[VAL_0]] {uniq_name = "_QFomp_tile02Eub"} : (!fir.ref<i32>, !fir.dscope) -> 
(!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:         %[[VAL_10:.*]] = arith.constant 3 : i32
+! CHECK:         %[[VAL_11:.*]] = arith.constant 7 : i32
+! CHECK:         %[[VAL_12:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:         %[[VAL_13:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
+! CHECK:         %[[VAL_14:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK:         %[[VAL_15:.*]] = arith.constant 0 : i32
+! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
+! CHECK:         %[[VAL_17:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_15]] : 
i32
+! CHECK:         %[[VAL_18:.*]] = arith.subi %[[VAL_15]], %[[VAL_14]] : i32
+! CHECK:         %[[VAL_19:.*]] = arith.select %[[VAL_17]], %[[VAL_18]], 
%[[VAL_14]] : i32
+! CHECK:         %[[VAL_20:.*]] = arith.select %[[VAL_17]], %[[VAL_13]], 
%[[VAL_12]] : i32
+! CHECK:         %[[VAL_21:.*]] = arith.select %[[VAL_17]], %[[VAL_12]], 
%[[VAL_13]] : i32
+! CHECK:         %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] 
overflow<nuw> : i32
+! CHECK:         %[[VAL_23:.*]] = arith.divui %[[VAL_22]], %[[VAL_19]] : i32
+! CHECK:         %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_16]] 
overflow<nuw> : i32
+! CHECK:         %[[VAL_25:.*]] = arith.cmpi slt, %[[VAL_21]], %[[VAL_20]] : 
i32
+! CHECK:         %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_15]], 
%[[VAL_24]] : i32
+! CHECK:         %[[VAL_27:.*]] = omp.new_cli
+! CHECK:         %[[VAL_28:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK:         %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
+! CHECK:         %[[VAL_30:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK:         %[[VAL_31:.*]] = arith.constant 0 : i32
+! CHECK:         %[[VAL_32:.*]] = arith.constant 1 : i32
+! CHECK:         %[[VAL_33:.*]] = arith.cmpi slt, %[[VAL_30]], %[[VAL_31]] : 
i32
+! CHECK:         %[[VAL_34:.*]] = arith.subi %[[VAL_31]], %[[VAL_30]] : i32
+! CHECK:         %[[VAL_35:.*]] = arith.select %[[VAL_33]], %[[VAL_34]], 
%[[VAL_30]] : i32
+! CHECK:         %[[VAL_36:.*]] = arith.select %[[VAL_33]], %[[VAL_29]], 
%[[VAL_28]] : i32
+! CHECK:         %[[VAL_37:.*]] = arith.select %[[VAL_33]], %[[VAL_28]], 
%[[VAL_29]] : i32
+! CHECK:         %[[VAL_38:.*]] = arith.subi %[[VAL_37]], %[[VAL_36]] 
overflow<nuw> : i32
+! CHECK:         %[[VAL_39:.*]] = arith.divui %[[VAL_38]], %[[VAL_35]] : i32
+! CHECK:         %[[VAL_40:.*]] = arith.addi %[[VAL_39]], %[[VAL_32]] 
overflow<nuw> : i32
+! CHECK:         %[[VAL_41:.*]] = arith.cmpi slt, %[[VAL_37]], %[[VAL_36]] : 
i32
+! CHECK:         %[[VAL_42:.*]] = arith.select %[[VAL_41]], %[[VAL_31]], 
%[[VAL_40]] : i32
+! CHECK:         %[[VAL_43:.*]] = omp.new_cli
+! CHECK:         omp.canonical_loop(%[[VAL_27]]) %[[VAL_44:.*]] : i32 in 
range(%[[VAL_26]]) {
+! CHECK:           omp.canonical_loop(%[[VAL_43]]) %[[VAL_45:.*]] : i32 in 
range(%[[VAL_42]]) {
+! CHECK:             %[[VAL_46:.*]] = arith.muli %[[VAL_44]], %[[VAL_14]] : i32
+! CHECK:             %[[VAL_47:.*]] = arith.addi %[[VAL_12]], %[[VAL_46]] : i32
+! CHECK:             hlfir.assign %[[VAL_47]] to %[[VAL_2]]#0 : i32, 
!fir.ref<i32>
+! CHECK:             %[[VAL_48:.*]] = arith.muli %[[VAL_45]], %[[VAL_30]] : i32
+! CHECK:             %[[VAL_49:.*]] = arith.addi %[[VAL_28]], %[[VAL_48]] : i32
+! CHECK:             hlfir.assign %[[VAL_49]] to %[[VAL_5]]#0 : i32, 
!fir.ref<i32>
+! CHECK:             %[[VAL_50:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_51:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_52:.*]] = arith.addi %[[VAL_50]], %[[VAL_51]] : i32
+! CHECK:             hlfir.assign %[[VAL_52]] to %[[VAL_8]]#0 : i32, 
!fir.ref<i32>
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           omp.terminator
+! CHECK:         }
+! CHECK:         %[[VAL_53:.*]] = omp.new_cli
+! CHECK:         %[[VAL_54:.*]] = omp.new_cli
+! CHECK:         %[[VAL_55:.*]] = omp.new_cli
+! CHECK:         %[[VAL_56:.*]] = omp.new_cli
+! CHECK:         omp.tile (%[[VAL_53]], %[[VAL_55]], %[[VAL_54]], %[[VAL_56]]) 
<- (%[[VAL_27]], %[[VAL_43]]) sizes(%[[VAL_10]], %[[VAL_11]] : i32, i32)
+! CHECK:         return
+! CHECK:       }
diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 
b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90
index a6af35a0111a3..a876c77a274b5 100644
--- a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90
+++ b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90
@@ -11,7 +11,7 @@ subroutine loop_transformation_construct
 
   !$omp do
   !$omp unroll
-  !$omp tile
+  !$omp tile sizes(2)
   do i = 1, I
     y(i) = y(i) * 5
   end do
@@ -34,7 +34,8 @@ subroutine loop_transformation_construct
 !CHECK-PARSE-NEXT: | | | | OpenMPLoopConstruct
 !CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective
 !CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile
-!CHECK-PARSE-NEXT: | | | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar 
-> Integer -> Expr = '2_4'
+!CHECK-PARSE-NEXT: | | | | | | | LiteralConstant -> IntLiteralConstant = '2'
 !CHECK-PARSE-NEXT: | | | | | | Flags = None
 !CHECK-PARSE-NEXT: | | | | | DoConstruct
 !CHECK-PARSE-NEXT: | | | | | | NonLabelDoStmt
diff --git a/flang/test/Parser/OpenMP/tile-fail.f90 
b/flang/test/Parser/OpenMP/tile-fail.f90
new file mode 100644
index 0000000000000..267ed0ad48437
--- /dev/null
+++ b/flang/test/Parser/OpenMP/tile-fail.f90
@@ -0,0 +1,32 @@
+! RUN: split-file %s %t
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | 
FileCheck %t/stray_end1.f90
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | 
FileCheck %t/stray_end2.f90
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | 
FileCheck %t/stray_begin.f90
+
+
+!--- stray_end1.f90
+! Parser error
+
+subroutine stray_end1
+  !CHECK: error: expected OpenMP construct
+  !$omp end tile
+end subroutine
+
+
+!--- stray_end2.f90
+! Semantic error
+
+subroutine stray_end2
+  print *
+  !CHECK: error: The END TILE directive must follow the DO loop associated 
with the loop construct
+  !$omp end tile
+end subroutine
+
+
+!--- stray_begin.f90
+
+subroutine stray_begin
+  !CHECK: error: A DO loop must follow the TILE directive
+  !$omp tile sizes(2)
+end subroutine
+
diff --git a/flang/test/Parser/OpenMP/tile.f90 
b/flang/test/Parser/OpenMP/tile.f90
index 2ea17471866a4..82004fd37a0f2 100644
--- a/flang/test/Parser/OpenMP/tile.f90
+++ b/flang/test/Parser/OpenMP/tile.f90
@@ -1,12 +1,12 @@
-! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s
-! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck 
--check-prefix="PARSE-TREE" %s
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck 
--ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | 
FileCheck --check-prefix="PARSE-TREE" %s
 
 subroutine openmp_tiles(x)
 
   integer, intent(inout)::x
 
-!CHECK: !$omp tile
-!$omp  tile
+!CHECK: !$omp tile sizes(2_4)
+!$omp tile sizes(2)
 !CHECK: do
   do x = 1, 100
        call F1()
@@ -17,7 +17,12 @@ subroutine openmp_tiles(x)
 
 !PARSE-TREE: OpenMPConstruct -> OpenMPLoopConstruct
 !PARSE-TREE: OmpBeginLoopDirective
+!PARSE-TREE:   OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> 
Expr = '2_4'
+!PARSE-TREE:     LiteralConstant -> IntLiteralConstant = '2'
+!PARSE-TREE:     Flags = None
+!PARSE-TREE:   DoConstruct
+!PARSE-TREE:   EndDoStmt
+!PARSE-TREE: OmpEndLoopDirective
 !PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = tile
 
 END subroutine openmp_tiles
-
diff --git a/flang/test/Semantics/OpenMP/tile01.f90 
b/flang/test/Semantics/OpenMP/tile01.f90
new file mode 100644
index 0000000000000..3d7b3f4f42e92
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile01.f90
@@ -0,0 +1,26 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine missing_sizes
+  implicit none
+  integer i
+
+  !ERROR: At least one of SIZES clause must appear on the TILE directive
+  !$omp tile
+  do i = 1, 42
+    print *, i
+  end do
+end subroutine
+
+
+subroutine double_sizes
+  implicit none
+  integer i
+
+  !ERROR: At most one SIZES clause can appear on the TILE directive
+  !$omp tile sizes(2) sizes(2)
+  do i = 1, 5
+    print *, i
+  end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile02.f90 
b/flang/test/Semantics/OpenMP/tile02.f90
new file mode 100644
index 0000000000000..676796375353f
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile02.f90
@@ -0,0 +1,15 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine on_unroll
+  implicit none
+  integer i
+
+  !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled
+  !$omp tile sizes(2)
+  !$omp unroll
+  do i = 1, 5
+    print *, i
+  end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile03.f90 
b/flang/test/Semantics/OpenMP/tile03.f90
new file mode 100644
index 0000000000000..e5c134638ac8d
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile03.f90
@@ -0,0 +1,15 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine loop_assoc
+  implicit none
+  integer :: i = 0
+
+  !$omp tile sizes(2)
+  !ERROR: The associated loop of a loop-associated directive cannot be a DO 
WHILE.
+  do while (i <= 10)
+    i = i + 1
+    print *, i
+  end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile04.f90 
b/flang/test/Semantics/OpenMP/tile04.f90
new file mode 100644
index 0000000000000..2b503efbcf52b
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile04.f90
@@ -0,0 +1,38 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine threads_zero
+  implicit none
+  integer i
+
+  !ERROR: The parameter of the NUM_THREADS clause must be a positive integer 
expression
+  !$omp parallel do num_threads(-1)
+  do i = 1, 5
+    print *, i
+  end do
+end subroutine
+
+
+subroutine sizes_zero
+  implicit none
+  integer i
+
+  !ERROR: The parameter of the SIZES clause must be a positive integer 
expression
+  !$omp tile sizes(0)
+  do i = 1, 5
+    print *, i
+  end do
+end subroutine
+
+
+subroutine sizes_negative
+  implicit none
+  integer i
+
+  !ERROR: The parameter of the SIZES clause must be a positive integer 
expression
+  !$omp tile sizes(-1)
+  do i = 1, 5
+    print *, i
+  end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile05.f90 
b/flang/test/Semantics/OpenMP/tile05.f90
new file mode 100644
index 0000000000000..70c43811a5832
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile05.f90
@@ -0,0 +1,14 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine insufficient_loops
+  implicit none
+  integer i
+
+  !ERROR: The SIZES clause has more entries than there are nested canonical 
loops.
+  !$omp tile sizes(2, 2)
+  do i = 1, 5
+    print *, i
+  end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile06.f90 
b/flang/test/Semantics/OpenMP/tile06.f90
new file mode 100644
index 0000000000000..52518d43f0554
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile06.f90
@@ -0,0 +1,44 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine nonrectangular_loop_lb
+  implicit none
+  integer i, j
+
+  !ERROR: Trip count must be computable and invariant
+  !$omp tile sizes(2,2)
+  do i = 1, 5
+    do j = 1, i
+      print *, i, j
+    end do
+  end do
+end subroutine
+
+
+subroutine nonrectangular_loop_ub
+  implicit none
+  integer i, j
+
+  !ERROR: Trip count must be computable and invariant
+  !$omp tile sizes(2,2)
+  do i = 1, 5
+    do j = 1, i
+      print *, i, j
+    end do
+  end do
+end subroutine
+
+
+subroutine nonrectangular_loop_step
+  implicit none
+  integer i, j
+
+  !ERROR: Trip count must be computable and invariant
+  !$omp tile sizes(2,2)
+  do i = 1, 5
+    do j = 1, 42, i
+      print *, i, j
+    end do
+  end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile07.f90 
b/flang/test/Semantics/OpenMP/tile07.f90
new file mode 100644
index 0000000000000..70a6f5fc529a4
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile07.f90
@@ -0,0 +1,35 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine non_perfectly_nested_loop_behind
+  implicit none
+  integer i, j
+
+  !ERROR: Canonical loop nest must be perfectly nested.
+  !$omp tile sizes(2,2)
+  do i = 1, 5
+    do j = 1, 42
+      print *, j
+    end do
+    print *, i
+  end do
+end subroutine
+
+
+subroutine non_perfectly_nested_loop_before
+  implicit none
+  integer i, j
+
+  !ERROR: The SIZES clause has more entries than there are nested canonical 
loops.
+  !$omp tile sizes(2,2)
+  do i = 1, 5
+    print *, i
+    do j = 1, 42
+      print *, j
+    end do
+  end do
+end subroutine
+
+
+
diff --git a/flang/test/Semantics/OpenMP/tile08.f90 
b/flang/test/Semantics/OpenMP/tile08.f90
new file mode 100644
index 0000000000000..f42805cb81b7d
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile08.f90
@@ -0,0 +1,15 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine do_concurrent
+  implicit none
+  integer i, j
+
+
+  !$omp tile sizes(2,2)
+  !ERROR: DO CONCURRENT loops cannot form part of a loop nest.
+  do concurrent (i = 1:42, j = 1:42)
+    print *, i, j
+  end do
+end subroutine
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td 
b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 4d9b8f8a6c51e..2911b4c8df1b1 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -1324,6 +1324,9 @@ def OMP_Tile : Directive<[Spelling<"tile">]> {
   let allowedOnceClauses = [
     VersionedClause<OMPC_Sizes, 51>,
   ];
+  let requiredClauses = [
+    VersionedClause<OMPC_Sizes, 51>,
+  ];
   let association = AS_Loop;
   let category = CA_Executable;
 }
diff --git a/openmp/runtime/test/transform/tile/intfor.f90 
b/openmp/runtime/test/transform/tile/intfor.f90
new file mode 100644
index 0000000000000..dac0de6a99021
--- /dev/null
+++ b/openmp/runtime/test/transform/tile/intfor.f90
@@ -0,0 +1,31 @@
+! This test checks lowering of the OpenMP tile directive
+! It is done 3 times corresponding to every possible fraction of the last
+! iteration before passing beyond UB.
+
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 -DUB=16 %s -o 
%t-ub16.exe
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 -DUB=17 %s -o 
%t-ub17.exe
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 -DUB=18 %s -o 
%t-ub18.exe
+! RUN: %t-ub16.exe | FileCheck %s --match-full-lines
+! RUN: %t-ub17.exe | FileCheck %s --match-full-lines
+! RUN: %t-ub18.exe | FileCheck %s --match-full-lines
+
+program tile_intfor_1d
+  integer i
+  print *, 'do'
+
+  !$OMP TILE SIZES(2)
+  do i=7, UB, 3
+    print '("i=", I0)', i
+  end do
+  !$OMP END TILE
+
+  print *, 'done'
+end program
+
+
+! CHECK:      do
+! CHECK-NEXT: i=7
+! CHECK-NEXT: i=10
+! CHECK-NEXT: i=13
+! CHECK-NEXT: i=16
+! CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/tile/intfor_2d.f90 
b/openmp/runtime/test/transform/tile/intfor_2d.f90
new file mode 100644
index 0000000000000..6bc90c768b8d3
--- /dev/null
+++ b/openmp/runtime/test/transform/tile/intfor_2d.f90
@@ -0,0 +1,53 @@
+! This test checks lowering of OpenMP tile directive
+
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe
+! RUN: %t.exe | FileCheck %s --match-full-lines
+
+
+program tile_intfor_2d
+  integer i, j
+  print *, 'do'
+
+  !$OMP TILE SIZES(2,3)
+  do i = 7, 16, 3
+    do j = 0, 4
+      print '("i=", I0," j=", I0)', i, j
+    end do
+  end do
+  !$OMP END TILE
+
+  print *, 'done'
+end program
+
+
+! CHECK:      do
+
+! complete tile
+! CHECK-NEXT: i=7 j=0
+! CHECK-NEXT: i=7 j=1
+! CHECK-NEXT: i=7 j=2
+! CHECK-NEXT: i=10 j=0
+! CHECK-NEXT: i=10 j=1
+! CHECK-NEXT: i=10 j=2
+
+! partial tile
+! CHECK-NEXT: i=7 j=3
+! CHECK-NEXT: i=7 j=4
+! CHECK-NEXT: i=10 j=3
+! CHECK-NEXT: i=10 j=4
+
+! complete tile
+! CHECK-NEXT: i=13 j=0
+! CHECK-NEXT: i=13 j=1
+! CHECK-NEXT: i=13 j=2
+! CHECK-NEXT: i=16 j=0
+! CHECK-NEXT: i=16 j=1
+! CHECK-NEXT: i=16 j=2
+
+! partial tile
+! CHECK-NEXT: i=13 j=3
+! CHECK-NEXT: i=13 j=4
+! CHECK-NEXT: i=16 j=3
+! CHECK-NEXT: i=16 j=4
+
+! CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90 
b/openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90
new file mode 100644
index 0000000000000..4cb5adf606dd2
--- /dev/null
+++ b/openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90
@@ -0,0 +1,60 @@
+! This test checks lowering of OpenMP tile directive
+
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe
+! RUN: %t.exe | FileCheck %s --match-full-lines
+
+program tile_intfor_varsizes
+  integer i
+
+  call kernel(7,17,3,2)
+  call kernel(7,17,3,3)
+
+end program
+
+
+subroutine kernel(lb, ub, step, ts)
+  integer i, j, lb, ub, step, ts
+
+  print *, 'do'
+
+  !$OMP TILE SIZES(ts,ts)
+  do i = lb, ub, step
+    do j = 0, 2
+      print '("i=", I0," j=", I0)', i, j
+    end do
+  end do
+  !$OMP END TILE
+
+  print *, 'done'
+
+end subroutine
+
+! CHECK:      do
+! CHECK-NEXT: i=7 j=0
+! CHECK-NEXT: i=7 j=1
+! CHECK-NEXT: i=10 j=0
+! CHECK-NEXT: i=10 j=1
+! CHECK-NEXT: i=7 j=2
+! CHECK-NEXT: i=10 j=2
+! CHECK-NEXT: i=13 j=0
+! CHECK-NEXT: i=13 j=1
+! CHECK-NEXT: i=16 j=0
+! CHECK-NEXT: i=16 j=1
+! CHECK-NEXT: i=13 j=2
+! CHECK-NEXT: i=16 j=2
+! CHECK-NEXT: done
+
+! CHECK:      do
+! CHECK-NEXT: i=7 j=0
+! CHECK-NEXT: i=7 j=1
+! CHECK-NEXT: i=7 j=2
+! CHECK-NEXT: i=10 j=0
+! CHECK-NEXT: i=10 j=1
+! CHECK-NEXT: i=10 j=2
+! CHECK-NEXT: i=13 j=0
+! CHECK-NEXT: i=13 j=1
+! CHECK-NEXT: i=13 j=2
+! CHECK-NEXT: i=16 j=0
+! CHECK-NEXT: i=16 j=1
+! CHECK-NEXT: i=16 j=2
+! CHECK-NEXT: done

>From e7c0c5a1d64797acaddbbca50927545868f7256c Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-proj...@meinersbur.de>
Date: Tue, 23 Sep 2025 16:03:43 +0200
Subject: [PATCH 2/3] dos2unix

---
 flang/test/Parser/OpenMP/tile-fail.f90 | 64 +++++++++++++-------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/flang/test/Parser/OpenMP/tile-fail.f90 
b/flang/test/Parser/OpenMP/tile-fail.f90
index 267ed0ad48437..0a92e5bcb6570 100644
--- a/flang/test/Parser/OpenMP/tile-fail.f90
+++ b/flang/test/Parser/OpenMP/tile-fail.f90
@@ -1,32 +1,32 @@
-! RUN: split-file %s %t
-! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | 
FileCheck %t/stray_end1.f90
-! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | 
FileCheck %t/stray_end2.f90
-! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | 
FileCheck %t/stray_begin.f90
-
-
-!--- stray_end1.f90
-! Parser error
-
-subroutine stray_end1
-  !CHECK: error: expected OpenMP construct
-  !$omp end tile
-end subroutine
-
-
-!--- stray_end2.f90
-! Semantic error
-
-subroutine stray_end2
-  print *
-  !CHECK: error: The END TILE directive must follow the DO loop associated 
with the loop construct
-  !$omp end tile
-end subroutine
-
-
-!--- stray_begin.f90
-
-subroutine stray_begin
-  !CHECK: error: A DO loop must follow the TILE directive
-  !$omp tile sizes(2)
-end subroutine
-
+! RUN: split-file %s %t
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | 
FileCheck %t/stray_end1.f90
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | 
FileCheck %t/stray_end2.f90
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | 
FileCheck %t/stray_begin.f90
+
+
+!--- stray_end1.f90
+! Parser error
+
+subroutine stray_end1
+  !CHECK: error: expected OpenMP construct
+  !$omp end tile
+end subroutine
+
+
+!--- stray_end2.f90
+! Semantic error
+
+subroutine stray_end2
+  print *
+  !CHECK: error: The END TILE directive must follow the DO loop associated 
with the loop construct
+  !$omp end tile
+end subroutine
+
+
+!--- stray_begin.f90
+
+subroutine stray_begin
+  !CHECK: error: A DO loop must follow the TILE directive
+  !$omp tile sizes(2)
+end subroutine
+

>From 01a056f711c139ee0f1e26b19cf7513701f52992 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-proj...@meinersbur.de>
Date: Wed, 24 Sep 2025 12:01:39 +0200
Subject: [PATCH 3/3] avoid compiler warning

---
 flang/lib/Lower/OpenMP/Utils.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 4a392d4944fc8..29cccbd1bfe5a 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -652,7 +652,6 @@ int64_t collectLoopRelatedInfo(
     mlir::omp::LoopRelatedClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
   int64_t numCollapse = 1;
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
   // Collect the loops to collapse.
   lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation();

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [llvm] [openmp] [Flang] Add standalone tile support (PR #160298)

Reply via email to