https://github.com/Meinersbur created https://github.com/llvm/llvm-project/pull/91325
One of the constraints of an AST is that every node object must appear at most once, hence we define lamdas that create a new AST node at every use. >From 1c1910b6885cd5be18cb15e364569f2a2f662955 Mon Sep 17 00:00:00 2001 From: Michael Kruse <llvm-proj...@meinersbur.de> Date: Tue, 7 May 2024 14:47:45 +0200 Subject: [PATCH] Make unique instances --- clang/lib/Sema/SemaOpenMP.cpp | 77 ++-- clang/test/OpenMP/tile_codegen.cpp | 58 +-- .../OpenMP/tile_codegen_for_dependent.cpp | 326 +++++++------ clang/test/OpenMP/tile_codegen_tile_for.cpp | 435 +++++++++--------- 4 files changed, 447 insertions(+), 449 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index cf5447f223d45..ba86bd4e62786 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -15109,6 +15109,8 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, SourceLocation StartLoc, SourceLocation EndLoc) { ASTContext &Context = getASTContext(); + Scope *CurScope = SemaRef.getCurScope(); + auto SizesClauses = OMPExecutableDirective::getClausesOfKind<OMPSizesClause>(Clauses); if (SizesClauses.empty()) { @@ -15137,6 +15139,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, NumLoops, AStmt, nullptr, nullptr); SmallVector<Decl *, 4> PreInits; + CaptureVars CopyTransformer(SemaRef); // Create iteration variables for the generated loops. SmallVector<VarDecl *, 4> FloorIndVars; @@ -15194,25 +15197,37 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, // Once the original iteration values are set, append the innermost body. Stmt *Inner = Body; + auto MakeDimTileSize = [&SemaRef = this->SemaRef, &CopyTransformer, &Context, + SizesClause, CurScope](int I) -> Expr * { + Expr *DimTileSizeExpr = SizesClause->getSizesRefs()[I]; + return AssertSuccess(CopyTransformer.TransformExpr(DimTileSizeExpr)); + }; + // Create tile loops from the inside to the outside. for (int I = NumLoops - 1; I >= 0; --I) { OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I]; Expr *NumIterations = LoopHelper.NumIterations; auto *OrigCntVar = cast<DeclRefExpr>(LoopHelper.Counters[0]); - QualType CntTy = OrigCntVar->getType(); - Expr *DimTileSize = SizesClause->getSizesRefs()[I]; - Scope *CurScope = SemaRef.getCurScope(); - - // Commonly used variables. - DeclRefExpr *TileIV = buildDeclRefExpr(SemaRef, TileIndVars[I], CntTy, - OrigCntVar->getExprLoc()); - DeclRefExpr *FloorIV = buildDeclRefExpr(SemaRef, FloorIndVars[I], CntTy, - OrigCntVar->getExprLoc()); + QualType IVTy = NumIterations->getType(); + + // Commonly used variables. One of the constraints of an AST is that every + // node object must appear at most once, hence we define lamdas that create + // a new AST node at every use. + auto MakeTileIVRef = [&SemaRef = this->SemaRef, &TileIndVars, I, IVTy, + OrigCntVar]() { + return buildDeclRefExpr(SemaRef, TileIndVars[I], IVTy, + OrigCntVar->getExprLoc()); + }; + auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy, + OrigCntVar]() { + return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy, + OrigCntVar->getExprLoc()); + }; // For init-statement: auto .tile.iv = .floor.iv - SemaRef.AddInitializerToDecl(TileIndVars[I], - SemaRef.DefaultLvalueConversion(FloorIV).get(), - /*DirectInit=*/false); + SemaRef.AddInitializerToDecl( + TileIndVars[I], SemaRef.DefaultLvalueConversion(MakeFloorIVRef()).get(), + /*DirectInit=*/false); Decl *CounterDecl = TileIndVars[I]; StmtResult InitStmt = new (Context) DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1), @@ -15220,10 +15235,11 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, if (!InitStmt.isUsable()) return StmtError(); - // For cond-expression: .tile.iv < min(.floor.iv + DimTileSize, - // NumIterations) - ExprResult EndOfTile = SemaRef.BuildBinOp( - CurScope, LoopHelper.Cond->getExprLoc(), BO_Add, FloorIV, DimTileSize); + // For cond-expression: + // .tile.iv < min(.floor.iv + DimTileSize, NumIterations) + ExprResult EndOfTile = + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_Add, + MakeFloorIVRef(), MakeDimTileSize(I)); if (!EndOfTile.isUsable()) return StmtError(); ExprResult IsPartialTile = @@ -15238,25 +15254,28 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, return StmtError(); ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, - TileIV, MinTileAndIterSpace.get()); + MakeTileIVRef(), MinTileAndIterSpace.get()); if (!CondExpr.isUsable()) return StmtError(); // For incr-statement: ++.tile.iv ExprResult IncrStmt = SemaRef.BuildUnaryOp( - CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, TileIV); + CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeTileIVRef()); if (!IncrStmt.isUsable()) return StmtError(); // Statements to set the original iteration variable's value from the // logical iteration number. // Generated for loop is: + // \code // Original_for_init; - // for (auto .tile.iv = .floor.iv; .tile.iv < min(.floor.iv + DimTileSize, - // NumIterations); ++.tile.iv) { + // for (auto .tile.iv = .floor.iv; + // .tile.iv < min(.floor.iv + DimTileSize, NumIterations); + // ++.tile.iv) { // Original_Body; // Original_counter_update; // } + // \endcode // FIXME: If the innermost body is an loop itself, inserting these // statements stops it being recognized as a perfectly nested loop (e.g. // for applying tiling again). If this is the case, sink the expressions @@ -15277,13 +15296,14 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, auto &LoopHelper = LoopHelpers[I]; Expr *NumIterations = LoopHelper.NumIterations; DeclRefExpr *OrigCntVar = cast<DeclRefExpr>(LoopHelper.Counters[0]); - QualType CntTy = OrigCntVar->getType(); - Expr *DimTileSize = SizesClause->getSizesRefs()[I]; - Scope *CurScope = SemaRef.getCurScope(); + QualType IVTy = NumIterations->getType(); // Commonly used variables. - DeclRefExpr *FloorIV = buildDeclRefExpr(SemaRef, FloorIndVars[I], CntTy, - OrigCntVar->getExprLoc()); + auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy, + OrigCntVar]() { + return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy, + OrigCntVar->getExprLoc()); + }; // For init-statement: auto .floor.iv = 0 SemaRef.AddInitializerToDecl( @@ -15298,15 +15318,16 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, return StmtError(); // For cond-expression: .floor.iv < NumIterations - ExprResult CondExpr = SemaRef.BuildBinOp( - CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, FloorIV, NumIterations); + ExprResult CondExpr = + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + MakeFloorIVRef(), NumIterations); if (!CondExpr.isUsable()) return StmtError(); // For incr-statement: .floor.iv += DimTileSize ExprResult IncrStmt = SemaRef.BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign, - FloorIV, DimTileSize); + MakeFloorIVRef(), MakeDimTileSize(I)); if (!IncrStmt.isUsable()) return StmtError(); diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp index 76cf2d8f1992d..45f4cf0a44a6b 100644 --- a/clang/test/OpenMP/tile_codegen.cpp +++ b/clang/test/OpenMP/tile_codegen.cpp @@ -98,7 +98,7 @@ extern "C" void tfoo7() { // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 @@ -108,7 +108,7 @@ extern "C" void tfoo7() { // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC2Ev -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca ptr, align 8 @@ -219,7 +219,7 @@ extern "C" void tfoo7() { // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP11]], 1 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP12]], 5 // CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: @@ -228,7 +228,7 @@ extern "C" void tfoo7() { // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP14]], 5 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] @@ -246,14 +246,14 @@ extern "C" void tfoo7() { // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[FOR_INC15:%.*]] // CHECK1: for.inc15: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5 +// CHECK1-NEXT: [[ADD16:%.*]] = add i32 [[TMP20]], 5 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: for.end17: @@ -885,7 +885,7 @@ extern "C" void tfoo7() { // // // CHECK1-LABEL: define {{[^@]+}}@foo6.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1031,7 +1031,7 @@ extern "C" void tfoo7() { // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP9]], 1 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], 5 // CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: @@ -1040,7 +1040,7 @@ extern "C" void tfoo7() { // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP12]], 5 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] @@ -1057,14 +1057,14 @@ extern "C" void tfoo7() { // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[FOR_INC15:%.*]] // CHECK1: for.inc15: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP17]], 5 +// CHECK1-NEXT: [[ADD16:%.*]] = add i32 [[TMP17]], 5 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: for.end17: @@ -1159,13 +1159,13 @@ extern "C" void tfoo7() { // // // CHECK2-LABEL: define {{[^@]+}}@body -// CHECK2-SAME: (...) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (...) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@foo1 -// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR2]] { +// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 @@ -1213,7 +1213,7 @@ extern "C" void tfoo7() { // CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add i32 [[TMP11]], 1 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK2-NEXT: [[ADD8:%.*]] = add i32 [[TMP12]], 5 // CHECK2-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: @@ -1222,7 +1222,7 @@ extern "C" void tfoo7() { // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP14]], 5 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] @@ -1240,14 +1240,14 @@ extern "C" void tfoo7() { // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: // CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: for.end: // CHECK2-NEXT: br label [[FOR_INC15:%.*]] // CHECK2: for.inc15: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5 +// CHECK2-NEXT: [[ADD16:%.*]] = add i32 [[TMP20]], 5 // CHECK2-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK2: for.end17: @@ -1255,7 +1255,7 @@ extern "C" void tfoo7() { // // // CHECK2-LABEL: define {{[^@]+}}@foo2 -// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR2]] { +// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 @@ -1368,7 +1368,7 @@ extern "C" void tfoo7() { // // // CHECK2-LABEL: define {{[^@]+}}@foo3 -// CHECK2-SAME: () #[[ATTR2]] { +// CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1510,7 +1510,7 @@ extern "C" void tfoo7() { // // // CHECK2-LABEL: define {{[^@]+}}@foo4 -// CHECK2-SAME: () #[[ATTR2]] { +// CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1663,7 +1663,7 @@ extern "C" void tfoo7() { // // // CHECK2-LABEL: define {{[^@]+}}@foo5 -// CHECK2-SAME: () #[[ATTR2]] { +// CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1872,14 +1872,14 @@ extern "C" void tfoo7() { // // // CHECK2-LABEL: define {{[^@]+}}@foo6 -// CHECK2-SAME: () #[[ATTR2]] { +// CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo6.omp_outlined) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@foo6.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -1975,14 +1975,14 @@ extern "C" void tfoo7() { // // // CHECK2-LABEL: define {{[^@]+}}@tfoo7 -// CHECK2-SAME: () #[[ATTR2]] { +// CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: call void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_(i32 noundef 0, i32 noundef 42) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_ -// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR2]] comdat { +// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR1]] comdat { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 @@ -2025,7 +2025,7 @@ extern "C" void tfoo7() { // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add i32 [[TMP9]], 1 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK2-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], 5 // CHECK2-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: @@ -2034,7 +2034,7 @@ extern "C" void tfoo7() { // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP12]], 5 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] @@ -2051,14 +2051,14 @@ extern "C" void tfoo7() { // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: for.end: // CHECK2-NEXT: br label [[FOR_INC15:%.*]] // CHECK2: for.inc15: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP17]], 5 +// CHECK2-NEXT: [[ADD16:%.*]] = add i32 [[TMP17]], 5 // CHECK2-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: for.end17: diff --git a/clang/test/OpenMP/tile_codegen_for_dependent.cpp b/clang/test/OpenMP/tile_codegen_for_dependent.cpp index 93c51c9165a47..a95bb919ac41b 100644 --- a/clang/test/OpenMP/tile_codegen_for_dependent.cpp +++ b/clang/test/OpenMP/tile_codegen_for_dependent.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 4 // Check code generation // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR @@ -17,172 +18,6 @@ extern "C" void body(...) {} -// IR-LABEL: @func( -// IR-NEXT: [[ENTRY:.*]]: -// IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[STEP_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_IV:.+]] = alloca i32, align 4 -// IR-NEXT: %[[TMP:.+]] = alloca i32, align 4 -// IR-NEXT: %[[I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_LB:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_UB:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTFLOOR_0_IV_I12:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(ptr @2) -// IR-NEXT: store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4 -// IR-NEXT: store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4 -// IR-NEXT: store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: %[[TMP1:.+]] = load i32, ptr %[[START_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP1]], ptr %[[I]], align 4 -// IR-NEXT: %[[TMP2:.+]] = load i32, ptr %[[START_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP2]], ptr %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[TMP3:.+]] = load i32, ptr %[[END_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP3]], ptr %[[DOTCAPTURE_EXPR_1]], align 4 -// IR-NEXT: %[[TMP4:.+]] = load i32, ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP4]], ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_1]], align 4 -// IR-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]] -// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[SUB]], 1 -// IR-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]] -// IR-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]] -// IR-NEXT: %[[SUB5:.+]] = sub i32 %[[DIV]], 1 -// IR-NEXT: store i32 %[[SUB5]], ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD7:.+]] = add i32 %[[TMP9]], 1 -// IR-NEXT: store i32 %[[ADD7]], ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[TMP10:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[SUB9:.+]] = sub i32 %[[TMP10]], -3 -// IR-NEXT: %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4 -// IR-NEXT: %[[SUB11:.+]] = sub i32 %[[DIV10]], 1 -// IR-NEXT: store i32 %[[SUB11]], ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: store i32 0, ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[TMP11:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[CMP:.+]] = icmp ult i32 0, %[[TMP11]] -// IR-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_PRECOND_THEN]]: -// IR-NEXT: store i32 0, ptr %[[DOTOMP_LB]], align 4 -// IR-NEXT: %[[TMP12:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: store i32 %[[TMP12]], ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: store i32 1, ptr %[[DOTOMP_STRIDE]], align 4 -// IR-NEXT: store i32 0, ptr %[[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[TMP0]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-NEXT: %[[TMP13:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP14:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[CMP13:.+]] = icmp ugt i32 %[[TMP13]], %[[TMP14]] -// IR-NEXT: br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_TRUE]]: -// IR-NEXT: %[[TMP15:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: br label %[[COND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_FALSE]]: -// IR-NEXT: %[[TMP16:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: br label %[[COND_END]] -// IR-EMPTY: -// IR-NEXT: [[COND_END]]: -// IR-NEXT: %[[COND:.+]] = phi i32 [ %[[TMP15]], %[[COND_TRUE]] ], [ %[[TMP16]], %[[COND_FALSE]] ] -// IR-NEXT: store i32 %[[COND]], ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP17:.+]] = load i32, ptr %[[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 %[[TMP17]], ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_COND]]: -// IR-NEXT: %[[TMP18:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[TMP19:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[ADD14:.+]] = add i32 %[[TMP19]], 1 -// IR-NEXT: %[[CMP15:.+]] = icmp ult i32 %[[TMP18]], %[[ADD14]] -// IR-NEXT: br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_BODY]]: -// IR-NEXT: %[[TMP20:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[MUL:.+]] = mul i32 %[[TMP20]], 4 -// IR-NEXT: %[[ADD16:.+]] = add i32 0, %[[MUL]] -// IR-NEXT: store i32 %[[ADD16]], ptr %[[DOTFLOOR_0_IV_I12]], align 4 -// IR-NEXT: %[[TMP21:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I12]], align 4 -// IR-NEXT: store i32 %[[TMP21]], ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_COND]]: -// IR-NEXT: %[[TMP22:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[TMP23:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP23]], 1 -// IR-NEXT: %[[TMP24:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I12]], align 4 -// IR-NEXT: %[[ADD18:.+]] = add nsw i32 %[[TMP24]], 4 -// IR-NEXT: %[[CMP19:.+]] = icmp ult i32 %[[ADD17]], %[[ADD18]] -// IR-NEXT: br i1 %[[CMP19]], label %[[COND_TRUE20:.+]], label %[[COND_FALSE22:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_TRUE20]]: -// IR-NEXT: %[[TMP25:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD21:.+]] = add i32 %[[TMP25]], 1 -// IR-NEXT: br label %[[COND_END24:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_FALSE22]]: -// IR-NEXT: %[[TMP26:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I12]], align 4 -// IR-NEXT: %[[ADD23:.+]] = add nsw i32 %[[TMP26]], 4 -// IR-NEXT: br label %[[COND_END24]] -// IR-EMPTY: -// IR-NEXT: [[COND_END24]]: -// IR-NEXT: %[[COND25:.+]] = phi i32 [ %[[ADD21]], %[[COND_TRUE20]] ], [ %[[ADD23]], %[[COND_FALSE22]] ] -// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP22]], %[[COND25]] -// IR-NEXT: br i1 %[[CMP26]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_BODY]]: -// IR-NEXT: %[[TMP27:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[TMP28:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[TMP29:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[MUL27:.+]] = mul i32 %[[TMP28]], %[[TMP29]] -// IR-NEXT: %[[ADD28:.+]] = add i32 %[[TMP27]], %[[MUL27]] -// IR-NEXT: store i32 %[[ADD28]], ptr %[[I]], align 4 -// IR-NEXT: %[[TMP30:.+]] = load i32, ptr %[[START_ADDR]], align 4 -// IR-NEXT: %[[TMP31:.+]] = load i32, ptr %[[END_ADDR]], align 4 -// IR-NEXT: %[[TMP32:.+]] = load i32, ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: %[[TMP33:.+]] = load i32, ptr %[[I]], align 4 -// IR-NEXT: call void (...) @body(i32 noundef %[[TMP30]], i32 noundef %[[TMP31]], i32 noundef %[[TMP32]], i32 noundef %[[TMP33]]) -// IR-NEXT: br label %[[FOR_INC:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_INC]]: -// IR-NEXT: %[[TMP34:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP34]], 1 -// IR-NEXT: store i32 %[[INC]], ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_END]]: -// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_BODY_CONTINUE]]: -// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_INC]]: -// IR-NEXT: %[[TMP35:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[ADD29:.+]] = add i32 %[[TMP35]], 1 -// IR-NEXT: store i32 %[[ADD29]], ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: br label %[[OMP_INNER_FOR_COND]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_END]]: -// IR-NEXT: br label %[[OMP_LOOP_EXIT:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_LOOP_EXIT]]: -// IR-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[TMP0]]) -// IR-NEXT: br label %[[OMP_PRECOND_END]] -// IR-EMPTY: -// IR-NEXT: [[OMP_PRECOND_END]]: -// IR-NEXT: call void @__kmpc_barrier(ptr @3, i32 %[[TMP0]]) -// IR-NEXT: ret void -// IR-NEXT: } extern "C" void func(int start, int end, int step) { #pragma omp for #pragma omp tile sizes(4) @@ -191,3 +26,162 @@ extern "C" void func(int start, int end, int step) { } #endif /* HEADER */ +// IR-LABEL: define dso_local void @body( +// IR-SAME: ...) #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: ret void +// +// +// IR-LABEL: define dso_local void @func( +// IR-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { +// IR-NEXT: entry: +// IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// IR-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP1]], ptr [[I]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]] +// IR-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP7]] +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP8]] +// IR-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1 +// IR-NEXT: store i32 [[ADD6]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: [[SUB8:%.*]] = sub i32 [[TMP10]], -3 +// IR-NEXT: [[DIV9:%.*]] = udiv i32 [[SUB8]], 4 +// IR-NEXT: [[SUB10:%.*]] = sub i32 [[DIV9]], 1 +// IR-NEXT: store i32 [[SUB10]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP11]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: [[CMP12:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// IR-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[ADD13:%.*]] = add i32 [[TMP19]], 1 +// IR-NEXT: [[CMP14:%.*]] = icmp ult i32 [[TMP18]], [[ADD13]] +// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 4 +// IR-NEXT: [[ADD15:%.*]] = add i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD15]], ptr [[DOTFLOOR_0_IV_I11]], align 4 +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I11]], align 4 +// IR-NEXT: store i32 [[TMP21]], ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[ADD16:%.*]] = add i32 [[TMP23]], 1 +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I11]], align 4 +// IR-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 4 +// IR-NEXT: [[CMP18:%.*]] = icmp ult i32 [[ADD16]], [[ADD17]] +// IR-NEXT: br i1 [[CMP18]], label [[COND_TRUE19:%.*]], label [[COND_FALSE21:%.*]] +// IR: cond.true19: +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[ADD20:%.*]] = add i32 [[TMP25]], 1 +// IR-NEXT: br label [[COND_END23:%.*]] +// IR: cond.false21: +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I11]], align 4 +// IR-NEXT: [[ADD22:%.*]] = add i32 [[TMP26]], 4 +// IR-NEXT: br label [[COND_END23]] +// IR: cond.end23: +// IR-NEXT: [[COND24:%.*]] = phi i32 [ [[ADD20]], [[COND_TRUE19]] ], [ [[ADD22]], [[COND_FALSE21]] ] +// IR-NEXT: [[CMP25:%.*]] = icmp ult i32 [[TMP22]], [[COND24]] +// IR-NEXT: br i1 [[CMP25]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[MUL26:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// IR-NEXT: [[ADD27:%.*]] = add i32 [[TMP27]], [[MUL26]] +// IR-NEXT: store i32 [[ADD27]], ptr [[I]], align 4 +// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// IR-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: call void (...) @body(i32 noundef [[TMP30]], i32 noundef [[TMP31]], i32 noundef [[TMP32]], i32 noundef [[TMP33]]) +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add i32 [[TMP34]], 1 +// IR-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// IR: for.end: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD28:%.*]] = add i32 [[TMP35]], 1 +// IR-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// IR-NEXT: ret void +// +//. +// IR: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// IR: [[META4]] = !{!"llvm.loop.mustprogress"} +//. diff --git a/clang/test/OpenMP/tile_codegen_tile_for.cpp b/clang/test/OpenMP/tile_codegen_tile_for.cpp index d0fb89398c241..8b5756677e843 100644 --- a/clang/test/OpenMP/tile_codegen_tile_for.cpp +++ b/clang/test/OpenMP/tile_codegen_tile_for.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 4 // Check code generation // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR @@ -16,227 +17,6 @@ extern "C" void body(...) {} -// IR-LABEL: @func( -// IR-NEXT: [[ENTRY:.*]]: -// IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[STEP_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_IV:.+]] = alloca i32, align 4 -// IR-NEXT: %[[TMP:.+]] = alloca i32, align 4 -// IR-NEXT: %[[I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_12:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_14:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTFLOOR_0_IV__FLOOR_0_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_LB:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_UB:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTTILE_0_IV__FLOOR_0_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(ptr @2) -// IR-NEXT: store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4 -// IR-NEXT: store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4 -// IR-NEXT: store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: %[[TMP1:.+]] = load i32, ptr %[[START_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP1]], ptr %[[I]], align 4 -// IR-NEXT: %[[TMP2:.+]] = load i32, ptr %[[START_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP2]], ptr %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[TMP3:.+]] = load i32, ptr %[[END_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP3]], ptr %[[DOTCAPTURE_EXPR_1]], align 4 -// IR-NEXT: %[[TMP4:.+]] = load i32, ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP4]], ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_1]], align 4 -// IR-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]] -// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[SUB]], 1 -// IR-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]] -// IR-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]] -// IR-NEXT: %[[SUB5:.+]] = sub i32 %[[DIV]], 1 -// IR-NEXT: store i32 %[[SUB5]], ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: store i32 0, ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD7:.+]] = add i32 %[[TMP9]], 1 -// IR-NEXT: store i32 %[[ADD7]], ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[TMP10:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[SUB9:.+]] = sub i32 %[[TMP10]], -3 -// IR-NEXT: %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4 -// IR-NEXT: %[[SUB11:.+]] = sub i32 %[[DIV10]], 1 -// IR-NEXT: store i32 %[[SUB11]], ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[TMP11:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[ADD13:.+]] = add i32 %[[TMP11]], 1 -// IR-NEXT: store i32 %[[ADD13]], ptr %[[DOTCAPTURE_EXPR_12]], align 4 -// IR-NEXT: %[[TMP12:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_12]], align 4 -// IR-NEXT: %[[SUB15:.+]] = sub i32 %[[TMP12]], -2 -// IR-NEXT: %[[DIV16:.+]] = udiv i32 %[[SUB15]], 3 -// IR-NEXT: %[[SUB17:.+]] = sub i32 %[[DIV16]], 1 -// IR-NEXT: store i32 %[[SUB17]], ptr %[[DOTCAPTURE_EXPR_14]], align 4 -// IR-NEXT: store i32 0, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[TMP13:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_12]], align 4 -// IR-NEXT: %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]] -// IR-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_PRECOND_THEN]]: -// IR-NEXT: store i32 0, ptr %[[DOTOMP_LB]], align 4 -// IR-NEXT: %[[TMP14:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_14]], align 4 -// IR-NEXT: store i32 %[[TMP14]], ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: store i32 1, ptr %[[DOTOMP_STRIDE]], align 4 -// IR-NEXT: store i32 0, ptr %[[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[TMP0]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-NEXT: %[[TMP15:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP16:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_14]], align 4 -// IR-NEXT: %[[CMP19:.+]] = icmp ugt i32 %[[TMP15]], %[[TMP16]] -// IR-NEXT: br i1 %[[CMP19]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_TRUE]]: -// IR-NEXT: %[[TMP17:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_14]], align 4 -// IR-NEXT: br label %[[COND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_FALSE]]: -// IR-NEXT: %[[TMP18:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: br label %[[COND_END]] -// IR-EMPTY: -// IR-NEXT: [[COND_END]]: -// IR-NEXT: %[[COND:.+]] = phi i32 [ %[[TMP17]], %[[COND_TRUE]] ], [ %[[TMP18]], %[[COND_FALSE]] ] -// IR-NEXT: store i32 %[[COND]], ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP19:.+]] = load i32, ptr %[[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 %[[TMP19]], ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_COND]]: -// IR-NEXT: %[[TMP20:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[TMP21:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[ADD20:.+]] = add i32 %[[TMP21]], 1 -// IR-NEXT: %[[CMP21:.+]] = icmp ult i32 %[[TMP20]], %[[ADD20]] -// IR-NEXT: br i1 %[[CMP21]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_BODY]]: -// IR-NEXT: %[[TMP22:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[MUL:.+]] = mul i32 %[[TMP22]], 3 -// IR-NEXT: %[[ADD22:.+]] = add i32 0, %[[MUL]] -// IR-NEXT: store i32 %[[ADD22]], ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4 -// IR-NEXT: %[[TMP23:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4 -// IR-NEXT: store i32 %[[TMP23]], ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_COND]]: -// IR-NEXT: %[[TMP24:.+]] = load i32, ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[TMP25:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP25]], 1 -// IR-NEXT: %[[TMP26:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4 -// IR-NEXT: %[[ADD24:.+]] = add i32 %[[TMP26]], 3 -// IR-NEXT: %[[CMP25:.+]] = icmp ult i32 %[[ADD23]], %[[ADD24]] -// IR-NEXT: br i1 %[[CMP25]], label %[[COND_TRUE26:.+]], label %[[COND_FALSE28:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_TRUE26]]: -// IR-NEXT: %[[TMP27:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[ADD27:.+]] = add i32 %[[TMP27]], 1 -// IR-NEXT: br label %[[COND_END30:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_FALSE28]]: -// IR-NEXT: %[[TMP28:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4 -// IR-NEXT: %[[ADD29:.+]] = add i32 %[[TMP28]], 3 -// IR-NEXT: br label %[[COND_END30]] -// IR-EMPTY: -// IR-NEXT: [[COND_END30]]: -// IR-NEXT: %[[COND31:.+]] = phi i32 [ %[[ADD27]], %[[COND_TRUE26]] ], [ %[[ADD29]], %[[COND_FALSE28]] ] -// IR-NEXT: %[[CMP32:.+]] = icmp ult i32 %[[TMP24]], %[[COND31]] -// IR-NEXT: br i1 %[[CMP32]], label %[[FOR_BODY:.+]], label %[[FOR_END51:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_BODY]]: -// IR-NEXT: %[[TMP29:.+]] = load i32, ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[MUL33:.+]] = mul i32 %[[TMP29]], 4 -// IR-NEXT: %[[ADD34:.+]] = add i32 0, %[[MUL33]] -// IR-NEXT: store i32 %[[ADD34]], ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[TMP30:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: store i32 %[[TMP30]], ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND35:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_COND35]]: -// IR-NEXT: %[[TMP31:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[TMP32:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD36:.+]] = add i32 %[[TMP32]], 1 -// IR-NEXT: %[[TMP33:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[ADD37:.+]] = add nsw i32 %[[TMP33]], 4 -// IR-NEXT: %[[CMP38:.+]] = icmp ult i32 %[[ADD36]], %[[ADD37]] -// IR-NEXT: br i1 %[[CMP38]], label %[[COND_TRUE39:.+]], label %[[COND_FALSE41:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_TRUE39]]: -// IR-NEXT: %[[TMP34:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD40:.+]] = add i32 %[[TMP34]], 1 -// IR-NEXT: br label %[[COND_END43:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_FALSE41]]: -// IR-NEXT: %[[TMP35:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[ADD42:.+]] = add nsw i32 %[[TMP35]], 4 -// IR-NEXT: br label %[[COND_END43]] -// IR-EMPTY: -// IR-NEXT: [[COND_END43]]: -// IR-NEXT: %[[COND44:.+]] = phi i32 [ %[[ADD40]], %[[COND_TRUE39]] ], [ %[[ADD42]], %[[COND_FALSE41]] ] -// IR-NEXT: %[[CMP45:.+]] = icmp ult i32 %[[TMP31]], %[[COND44]] -// IR-NEXT: br i1 %[[CMP45]], label %[[FOR_BODY46:.+]], label %[[FOR_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_BODY46]]: -// IR-NEXT: %[[TMP36:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[TMP37:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[TMP38:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[MUL47:.+]] = mul i32 %[[TMP37]], %[[TMP38]] -// IR-NEXT: %[[ADD48:.+]] = add i32 %[[TMP36]], %[[MUL47]] -// IR-NEXT: store i32 %[[ADD48]], ptr %[[I]], align 4 -// IR-NEXT: %[[TMP39:.+]] = load i32, ptr %[[START_ADDR]], align 4 -// IR-NEXT: %[[TMP40:.+]] = load i32, ptr %[[END_ADDR]], align 4 -// IR-NEXT: %[[TMP41:.+]] = load i32, ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: %[[TMP42:.+]] = load i32, ptr %[[I]], align 4 -// IR-NEXT: call void (...) @body(i32 noundef %[[TMP39]], i32 noundef %[[TMP40]], i32 noundef %[[TMP41]], i32 noundef %[[TMP42]]) -// IR-NEXT: br label %[[FOR_INC:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_INC]]: -// IR-NEXT: %[[TMP43:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP43]], 1 -// IR-NEXT: store i32 %[[INC]], ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND35]], !llvm.loop ![[LOOP2:[0-9]+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_END]]: -// IR-NEXT: br label %[[FOR_INC49:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_INC49]]: -// IR-NEXT: %[[TMP44:.+]] = load i32, ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[INC50:.+]] = add i32 %[[TMP44]], 1 -// IR-NEXT: store i32 %[[INC50]], ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP4:[0-9]+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_END51]]: -// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_BODY_CONTINUE]]: -// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_INC]]: -// IR-NEXT: %[[TMP45:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[ADD52:.+]] = add i32 %[[TMP45]], 1 -// IR-NEXT: store i32 %[[ADD52]], ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: br label %[[OMP_INNER_FOR_COND]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_END]]: -// IR-NEXT: br label %[[OMP_LOOP_EXIT:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_LOOP_EXIT]]: -// IR-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[TMP0]]) -// IR-NEXT: br label %[[OMP_PRECOND_END]] -// IR-EMPTY: -// IR-NEXT: [[OMP_PRECOND_END]]: -// IR-NEXT: call void @__kmpc_barrier(ptr @3, i32 %[[TMP0]]) -// IR-NEXT: ret void -// IR-NEXT: } extern "C" void func(int start, int end, int step) { #pragma omp for #pragma omp tile sizes(3) @@ -246,8 +26,211 @@ extern "C" void func(int start, int end, int step) { } #endif /* HEADER */ -// IR: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// IR: ![[META1:[0-9]+]] = !{!"{{[^"]*}}"} -// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]]} -// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"} -// IR: ![[LOOP4]] = distinct !{![[LOOP4]], ![[LOOPPROP3]]} +// IR-LABEL: define dso_local void @body( +// IR-SAME: ...) #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: ret void +// +// +// IR-LABEL: define dso_local void @func( +// IR-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { +// IR-NEXT: entry: +// IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV__FLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTFLOOR_0_IV__FLOOR_0_IV_I17:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV__FLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// IR-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP1]], ptr [[I]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]] +// IR-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP7]] +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP8]] +// IR-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1 +// IR-NEXT: store i32 [[ADD6]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: [[SUB8:%.*]] = sub i32 [[TMP10]], -3 +// IR-NEXT: [[DIV9:%.*]] = udiv i32 [[SUB8]], 4 +// IR-NEXT: [[SUB10:%.*]] = sub i32 [[DIV9]], 1 +// IR-NEXT: store i32 [[SUB10]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: [[ADD12:%.*]] = add i32 [[TMP11]], 1 +// IR-NEXT: store i32 [[ADD12]], ptr [[DOTCAPTURE_EXPR_11]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4 +// IR-NEXT: [[SUB14:%.*]] = sub i32 [[TMP12]], -2 +// IR-NEXT: [[DIV15:%.*]] = udiv i32 [[SUB14]], 3 +// IR-NEXT: [[SUB16:%.*]] = sub i32 [[DIV15]], 1 +// IR-NEXT: store i32 [[SUB16]], ptr [[DOTCAPTURE_EXPR_13]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP13]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4 +// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4 +// IR-NEXT: [[CMP18:%.*]] = icmp ugt i32 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP18]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[ADD19:%.*]] = add i32 [[TMP21]], 1 +// IR-NEXT: [[CMP20:%.*]] = icmp ult i32 [[TMP20]], [[ADD19]] +// IR-NEXT: br i1 [[CMP20]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul i32 [[TMP22]], 3 +// IR-NEXT: [[ADD21:%.*]] = add i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD21]], ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4 +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4 +// IR-NEXT: store i32 [[TMP23]], ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: [[ADD22:%.*]] = add i32 [[TMP25]], 1 +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4 +// IR-NEXT: [[ADD23:%.*]] = add i32 [[TMP26]], 3 +// IR-NEXT: [[CMP24:%.*]] = icmp ult i32 [[ADD22]], [[ADD23]] +// IR-NEXT: br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE27:%.*]] +// IR: cond.true25: +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: [[ADD26:%.*]] = add i32 [[TMP27]], 1 +// IR-NEXT: br label [[COND_END29:%.*]] +// IR: cond.false27: +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4 +// IR-NEXT: [[ADD28:%.*]] = add i32 [[TMP28]], 3 +// IR-NEXT: br label [[COND_END29]] +// IR: cond.end29: +// IR-NEXT: [[COND30:%.*]] = phi i32 [ [[ADD26]], [[COND_TRUE25]] ], [ [[ADD28]], [[COND_FALSE27]] ] +// IR-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP24]], [[COND30]] +// IR-NEXT: br i1 [[CMP31]], label [[FOR_BODY:%.*]], label [[FOR_END50:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 +// IR-NEXT: [[MUL32:%.*]] = mul i32 [[TMP29]], 4 +// IR-NEXT: [[ADD33:%.*]] = add i32 0, [[MUL32]] +// IR-NEXT: store i32 [[ADD33]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: store i32 [[TMP30]], ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND34:%.*]] +// IR: for.cond34: +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[ADD35:%.*]] = add i32 [[TMP32]], 1 +// IR-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD36:%.*]] = add i32 [[TMP33]], 4 +// IR-NEXT: [[CMP37:%.*]] = icmp ult i32 [[ADD35]], [[ADD36]] +// IR-NEXT: br i1 [[CMP37]], label [[COND_TRUE38:%.*]], label [[COND_FALSE40:%.*]] +// IR: cond.true38: +// IR-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[ADD39:%.*]] = add i32 [[TMP34]], 1 +// IR-NEXT: br label [[COND_END42:%.*]] +// IR: cond.false40: +// IR-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: [[ADD41:%.*]] = add i32 [[TMP35]], 4 +// IR-NEXT: br label [[COND_END42]] +// IR: cond.end42: +// IR-NEXT: [[COND43:%.*]] = phi i32 [ [[ADD39]], [[COND_TRUE38]] ], [ [[ADD41]], [[COND_FALSE40]] ] +// IR-NEXT: [[CMP44:%.*]] = icmp ult i32 [[TMP31]], [[COND43]] +// IR-NEXT: br i1 [[CMP44]], label [[FOR_BODY45:%.*]], label [[FOR_END:%.*]] +// IR: for.body45: +// IR-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[MUL46:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// IR-NEXT: [[ADD47:%.*]] = add i32 [[TMP36]], [[MUL46]] +// IR-NEXT: store i32 [[ADD47]], ptr [[I]], align 4 +// IR-NEXT: [[TMP39:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// IR-NEXT: [[TMP40:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// IR-NEXT: [[TMP41:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// IR-NEXT: [[TMP42:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: call void (...) @body(i32 noundef [[TMP39]], i32 noundef [[TMP40]], i32 noundef [[TMP41]], i32 noundef [[TMP42]]) +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 +// IR-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND34]], !llvm.loop [[LOOP3:![0-9]+]] +// IR: for.end: +// IR-NEXT: br label [[FOR_INC48:%.*]] +// IR: for.inc48: +// IR-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 +// IR-NEXT: [[INC49:%.*]] = add i32 [[TMP44]], 1 +// IR-NEXT: store i32 [[INC49]], ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// IR: for.end50: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD51:%.*]] = add i32 [[TMP45]], 1 +// IR-NEXT: store i32 [[ADD51]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// IR-NEXT: ret void +// +//. +// IR: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// IR: [[META4]] = !{!"llvm.loop.mustprogress"} +// IR: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +//. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits