gtbercea created this revision.
gtbercea added reviewers: ABataev, caomhin.
Herald added subscribers: cfe-commits, guansong, jholewinski.

This patch adds a new code generation path for bound sharing directives 
containing distribute parallel for. The new code generation scheme applies to 
chunked schedules on distribute and parallel for directives. The scheme 
simplifies the code that is being generated by eliminating the need for an 
outer for loop over chunks for both distribute and parallel for directives. In 
the case of distribute it applies to any sized chunk while in the parallel for 
case it only applies when chunk size is 1.


Repository:
  rC Clang

https://reviews.llvm.org/D53448

Files:
  include/clang/AST/StmtOpenMP.h
  include/clang/Basic/OpenMPKinds.h
  lib/AST/StmtOpenMP.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/Sema/SemaOpenMP.cpp
  lib/Serialization/ASTReaderStmt.cpp
  lib/Serialization/ASTWriterStmt.cpp
  test/OpenMP/distribute_parallel_for_codegen.cpp
  test/OpenMP/distribute_parallel_for_simd_codegen.cpp

Index: test/OpenMP/distribute_parallel_for_simd_codegen.cpp
===================================================================
--- test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -406,18 +406,16 @@
       a[i] = b[i] + c[i];
       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
+      // LAMBDA-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
-      // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-      // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
       // check EUB for distribute
       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
       // LAMBDA-DAG: [[EUB_TRUE]]:
@@ -436,18 +434,9 @@
 
       // check exit condition
       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
-      // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-      // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-      // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
       // check that PrevLB and PrevUB are passed to the 'for'
       // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
@@ -466,25 +455,39 @@
       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_END]]:
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
-      // check NextLB and NextUB
       // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
       // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
       // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-      // outer loop exit
-      // LAMBDA: [[DIST_OUTER_LOOP_END]]:
+      // Update UB
+      // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+      // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+      // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+      // LAMBDA-DAG: [[EUB_TRUE_1]]:
+      // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+      // LAMBDA: br label %[[EUB_END_1:.+]]
+      // LAMBDA-DAG: [[EUB_FALSE_1]]:
+      // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: br label %[[EUB_END_1]]
+      // LAMBDA-DAG: [[EUB_END_1]]:
+      // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+      // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+      // Store LB in IV
+      // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+      // LAMBDA: [[DIST_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[LOOP_EXIT:.+]]
+
+      // loop exit
+      // LAMBDA: [[LOOP_EXIT]]:
       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
       // LAMBDA: ret
 
@@ -1154,18 +1157,17 @@
     a[i] = b[i] + c[i];
     // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+    // CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
     // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-    // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
     // check EUB for distribute
     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
     // CHECK-DAG: [[EUB_TRUE]]:
@@ -1184,18 +1186,9 @@
 
     // check exit condition
     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_BODY]]:
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-    // CHECK: [[DIST_INNER_LOOP_HEADER]]:
-    // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-    // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-    // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
     // check that PrevLB and PrevUB are passed to the 'for'
     // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1214,25 +1207,39 @@
     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-    // CHECK: [[DIST_INNER_LOOP_END]]:
-    // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_INC]]:
-    // check NextLB and NextUB
     // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
     // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
     // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
     // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
     // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
     // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-    // outer loop exit
-    // CHECK: [[DIST_OUTER_LOOP_END]]:
+    // Update UB
+    // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+    // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+    // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+    // CHECK-DAG: [[EUB_TRUE_1]]:
+    // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+    // CHECK: br label %[[EUB_END_1:.+]]
+    // CHECK-DAG: [[EUB_FALSE_1]]:
+    // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: br label %[[EUB_END_1]]
+    // CHECK-DAG: [[EUB_END_1]]:
+    // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+    // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+    // Store LB in IV
+    // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+    // CHECK: [[DIST_INNER_LOOP_END]]:
+    // CHECK: br label %[[LOOP_EXIT:.+]]
+
+    // loop exit
+    // CHECK: [[LOOP_EXIT]]:
     // CHECK-DAG: call void @__kmpc_for_static_fini(
     // CHECK: ret
 
@@ -1867,18 +1874,17 @@
 
 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+// CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
 // check EUB for distribute
 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
 // CHECK-DAG: [[EUB_TRUE]]:
@@ -1897,18 +1903,9 @@
 
 // check exit condition
 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
 // check that PrevLB and PrevUB are passed to the 'for'
 // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1927,25 +1924,39 @@
 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-// CHECK: [[DIST_INNER_LOOP_END]]:
-// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_INC]]:
-// check NextLB and NextUB
 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-// outer loop exit
-// CHECK: [[DIST_OUTER_LOOP_END]]:
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
+// CHECK: br label %[[LOOP_EXIT:.+]]
+
+// loop exit
+// CHECK: [[LOOP_EXIT]]:
 // CHECK-DAG: call void @__kmpc_for_static_fini(
 // CHECK: ret
 
Index: test/OpenMP/distribute_parallel_for_codegen.cpp
===================================================================
--- test/OpenMP/distribute_parallel_for_codegen.cpp
+++ test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -407,18 +407,16 @@
       a[i] = b[i] + c[i];
       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
+      // LAMBDA-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
-      // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-      // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
       // check EUB for distribute
       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
       // LAMBDA-DAG: [[EUB_TRUE]]:
@@ -437,18 +435,9 @@
 
       // check exit condition
       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
-      // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-      // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-      // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
       // check that PrevLB and PrevUB are passed to the 'for'
       // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
@@ -467,25 +456,39 @@
       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_END]]:
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
-      // check NextLB and NextUB
       // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
       // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
       // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-      // outer loop exit
-      // LAMBDA: [[DIST_OUTER_LOOP_END]]:
+      // Update UB
+      // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+      // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+      // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+      // LAMBDA-DAG: [[EUB_TRUE_1]]:
+      // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+      // LAMBDA: br label %[[EUB_END_1:.+]]
+      // LAMBDA-DAG: [[EUB_FALSE_1]]:
+      // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: br label %[[EUB_END_1]]
+      // LAMBDA-DAG: [[EUB_END_1]]:
+      // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+      // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+      // Store LB in IV
+      // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+      // LAMBDA: [[DIST_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[LOOP_EXIT:.+]]
+
+      // loop exit
+      // LAMBDA: [[LOOP_EXIT]]:
       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
       // LAMBDA: ret
 
@@ -1155,18 +1158,17 @@
     a[i] = b[i] + c[i];
     // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+    // CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
     // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-    // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
     // check EUB for distribute
     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
     // CHECK-DAG: [[EUB_TRUE]]:
@@ -1185,18 +1187,9 @@
 
     // check exit condition
     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_BODY]]:
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-    // CHECK: [[DIST_INNER_LOOP_HEADER]]:
-    // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-    // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-    // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
     // check that PrevLB and PrevUB are passed to the 'for'
     // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1215,25 +1208,39 @@
     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-    // CHECK: [[DIST_INNER_LOOP_END]]:
-    // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_INC]]:
-    // check NextLB and NextUB
     // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
     // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
     // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
     // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
     // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
     // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-    // outer loop exit
-    // CHECK: [[DIST_OUTER_LOOP_END]]:
+    // Update UB
+    // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+    // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+    // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+    // CHECK-DAG: [[EUB_TRUE_1]]:
+    // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+    // CHECK: br label %[[EUB_END_1:.+]]
+    // CHECK-DAG: [[EUB_FALSE_1]]:
+    // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: br label %[[EUB_END_1]]
+    // CHECK-DAG: [[EUB_END_1]]:
+    // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+    // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+    // Store LB in IV
+    // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+    // CHECK: [[DIST_INNER_LOOP_END]]:
+    // CHECK: br label %[[LOOP_EXIT:.+]]
+
+    // loop exit
+    // CHECK: [[LOOP_EXIT]]:
     // CHECK-DAG: call void @__kmpc_for_static_fini(
     // CHECK: ret
 
@@ -1868,18 +1875,17 @@
 
 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+// CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
 // check EUB for distribute
 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
 // CHECK-DAG: [[EUB_TRUE]]:
@@ -1898,18 +1904,9 @@
 
 // check exit condition
 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
 // check that PrevLB and PrevUB are passed to the 'for'
 // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1928,25 +1925,39 @@
 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-// CHECK: [[DIST_INNER_LOOP_END]]:
-// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_INC]]:
-// check NextLB and NextUB
 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-// outer loop exit
-// CHECK: [[DIST_OUTER_LOOP_END]]:
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
+// CHECK: br label %[[LOOP_EXIT:.+]]
+
+// loop exit
+// CHECK: [[LOOP_EXIT]]:
 // CHECK-DAG: call void @__kmpc_for_static_fini(
 // CHECK: ret
 
Index: lib/Serialization/ASTWriterStmt.cpp
===================================================================
--- lib/Serialization/ASTWriterStmt.cpp
+++ lib/Serialization/ASTWriterStmt.cpp
@@ -1854,6 +1854,8 @@
     Record.AddStmt(D->getCombinedCond());
     Record.AddStmt(D->getCombinedNextLowerBound());
     Record.AddStmt(D->getCombinedNextUpperBound());
+    Record.AddStmt(D->getCombinedDistCond());
+    Record.AddStmt(D->getCombinedParForInDistCond());
   }
   for (auto I : D->counters()) {
     Record.AddStmt(I);
Index: lib/Serialization/ASTReaderStmt.cpp
===================================================================
--- lib/Serialization/ASTReaderStmt.cpp
+++ lib/Serialization/ASTReaderStmt.cpp
@@ -1856,6 +1856,8 @@
     D->setCombinedCond(Record.readSubExpr());
     D->setCombinedNextLowerBound(Record.readSubExpr());
     D->setCombinedNextUpperBound(Record.readSubExpr());
+    D->setCombinedDistCond(Record.readSubExpr());
+    D->setCombinedParForInDistCond(Record.readSubExpr());
   }
   SmallVector<Expr *, 4> Sub;
   unsigned CollapsedNum = D->getCollapsedNumber();
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -354,7 +354,7 @@
       return OMPD_unknown;
     return std::next(Stack.back().first.rbegin())->Directive;
   }
-  
+
   /// Add requires decl to internal vector
   void addRequiresDecl(OMPRequiresDecl *RD) {
     RequiresDecls.push_back(RD);
@@ -381,7 +381,7 @@
     }
     return IsDuplicate;
   }
-  
+
   /// Set default data sharing attribute to none.
   void setDefaultDSANone(SourceLocation Loc) {
     assert(!isStackEmpty());
@@ -5201,6 +5201,12 @@
           ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
           : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
                                NumIterations.get());
+  ExprResult CombDistCond;
+  if (isOpenMPLoopBoundSharingDirective(DKind)) {
+    CombDistCond =
+        SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), LastIteration.get());
+  }
+
   ExprResult CombCond;
   if (isOpenMPLoopBoundSharingDirective(DKind)) {
     CombCond =
@@ -5275,7 +5281,7 @@
   // on PrevUB instead of NumIterations - used to implement 'for' when found
   // in combination with 'distribute', like in 'distribute parallel for'
   SourceLocation DistIncLoc = AStmt->getBeginLoc();
-  ExprResult DistCond, DistInc, PrevEUB;
+  ExprResult DistCond, DistInc, PrevEUB, ParForInDistCond;
   if (isOpenMPLoopBoundSharingDirective(DKind)) {
     DistCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get());
     assert(DistCond.isUsable() && "distribute cond expr was not built");
@@ -5298,6 +5304,11 @@
     PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(),
                                  CondOp.get());
     PrevEUB = SemaRef.ActOnFinishFullExpr(PrevEUB.get());
+
+    // Build IV <= PrevEUB to be used in parallel for is in combination with
+    // a distribute directive with schedule(static, 1)
+    ParForInDistCond =
+        SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), PrevUB.get());
   }
 
   // Build updates and final values of the loop counters.
@@ -5421,6 +5432,8 @@
   Built.DistCombinedFields.Cond = CombCond.get();
   Built.DistCombinedFields.NLB = CombNextLB.get();
   Built.DistCombinedFields.NUB = CombNextUB.get();
+  Built.DistCombinedFields.DistCond = CombDistCond.get();
+  Built.DistCombinedFields.ParForInDistCond = ParForInDistCond.get();
 
   return NestedLoopCount;
 }
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -2006,7 +2006,7 @@
   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
 
   // for combined 'distribute' and 'for' the increment expression of distribute
-  // is store in DistInc. For 'distribute' alone, it is in Inc.
+  // is stored in DistInc. For 'distribute' alone, it is in Inc.
   Expr *IncExpr;
   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
     IncExpr = S.getDistInc();
@@ -2357,6 +2357,42 @@
                                                          S.getDirectiveKind());
         };
         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
+      } else if (RT.isStaticChunked(ScheduleKind.Schedule,
+                                    /* Chunked */ Chunk != nullptr) &&
+                 ScheduleKind.HasChunkOne &&
+                 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) {
+        if (isOpenMPSimdDirective(S.getDirectiveKind()))
+          EmitOMPSimdInit(S, /*IsMonotonic=*/true);
+        CGOpenMPRuntime::StaticRTInput StaticInit(
+            IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
+            UB.getAddress(), ST.getAddress(), Chunk);
+        RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
+                             ScheduleKind, StaticInit);
+        JumpDest LoopExit =
+            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
+        // IV = LB;
+        EmitIgnoredExpr(S.getInit());
+
+        // Generate the following loop:
+        //
+        // while (IV <= PrevUB) {
+        //   BODY;
+        //   IV += ST;
+        // }
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCombinedParForInDistCond(),
+                         S.getDistInc(),
+                         [&S, LoopExit](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S, LoopExit);
+                           CGF.EmitStopPoint(&S);
+                         },
+                         [&](CodeGenFunction &) {});
+        EmitBlock(LoopExit.getBlock());
+        // Tell the runtime we are done.
+        auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+          CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
+                                                         S.getDirectiveKind());
+        };
+        OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
       } else {
         const bool IsMonotonic =
             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
@@ -3370,7 +3406,7 @@
                 ? S.getCombinedCond()
                 : S.getCond();
 
-        // for distribute alone,  codegen
+        // for distribute alone, codegen
         // while (idx <= UB) { BODY; ++idx; }
         // when combined with 'for' (e.g. as in 'distribute parallel for')
         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
@@ -3382,6 +3418,53 @@
         EmitBlock(LoopExit.getBlock());
         // Tell the runtime we are done.
         RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
+      } else if (RT.isStaticChunked(ScheduleKind,
+                                    /* Chunked */ Chunk != nullptr) &&
+                 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) {
+        // Generate a single loop when distribute is in combination with
+        // other worksharing pragmas like parallel for.
+        if (isOpenMPSimdDirective(S.getDirectiveKind()))
+          EmitOMPSimdInit(S, /*IsMonotonic=*/true);
+        CGOpenMPRuntime::StaticRTInput StaticInit(
+            IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), LB.getAddress(),
+            UB.getAddress(), ST.getAddress(), Chunk);
+        RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
+                                    StaticInit);
+
+        JumpDest LoopExit =
+            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
+        // UB = min(UB, GlobalUB);
+        EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
+
+        // IV = LB;
+        EmitIgnoredExpr(S.getCombinedInit());
+
+        // IV < GlobalUB;
+        const Expr *Cond = S.getCombinedDistCond();
+
+        // Generate the following loop:
+        //
+        // while (IV <= GlobalUB) {
+        //   <CodeGen rest of pragma>(LB, UB);
+        //   LB += ST;
+        //   UB += ST;
+        //   UB = min(UB, GlobalUB);
+        //   IV = LB;
+        // }
+        //
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
+                         [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
+                           CodeGenLoop(CGF, S, LoopExit);
+                         },
+                         [&S](CodeGenFunction &CGF) {
+                           CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
+                           CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
+                           CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
+                           CGF.EmitIgnoredExpr(S.getCombinedInit());
+                         });
+        EmitBlock(LoopExit.getBlock());
+        // Tell the runtime we are done.
+        RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
       } else {
         // Emit the outer loop, which requests its work chunk [LB..UB] from
         // runtime and runs the inner loop to process it.
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4249,6 +4249,7 @@
     OpenMPScheduleTy &ScheduleKind,
     llvm::Value *&Chunk) const {
   ScheduleKind.Schedule = OMPC_SCHEDULE_static;
+  ScheduleKind.HasChunkOne = true;
   Chunk = CGF.Builder.getIntN(CGF.getContext().getTypeSize(
       S.getIterationVariable()->getType()), 1);
 }
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -890,6 +890,20 @@
   virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
                                   bool Chunked) const;
 
+  /// Check if the specified \a ScheduleKind is static chunked.
+  /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+  /// \param Chunked True if chunk is specified in the clause.
+  ///
+  virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+                               bool Chunked) const;
+
+  /// Check if the specified \a ScheduleKind is static non-chunked.
+  /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
+  /// \param Chunked True if chunk is specified in the clause.
+  ///
+  virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,
+                               bool Chunked) const;
+
   /// Check if the specified \a ScheduleKind is dynamic.
   /// This kind of worksharing directive is emitted without outer loop.
   /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3292,6 +3292,18 @@
   return Schedule == OMP_dist_sch_static;
 }
 
+bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+                                      bool Chunked) const {
+  OpenMPSchedType Schedule =
+      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
+  return Schedule == OMP_sch_static_chunked;
+}
+
+bool CGOpenMPRuntime::isStaticChunked(
+    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+  return Schedule == OMP_dist_sch_static_chunked;
+}
 
 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   OpenMPSchedType Schedule =
Index: lib/AST/StmtOpenMP.cpp
===================================================================
--- lib/AST/StmtOpenMP.cpp
+++ lib/AST/StmtOpenMP.cpp
@@ -1079,6 +1079,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1145,6 +1147,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   return Dir;
 }
 
@@ -1457,6 +1461,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   return Dir;
 }
 
@@ -1524,6 +1530,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1670,6 +1678,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1741,6 +1751,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   return Dir;
 }
 
Index: include/clang/Basic/OpenMPKinds.h
===================================================================
--- include/clang/Basic/OpenMPKinds.h
+++ include/clang/Basic/OpenMPKinds.h
@@ -125,6 +125,7 @@
   OpenMPScheduleClauseKind Schedule = OMPC_SCHEDULE_unknown;
   OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown;
   OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown;
+  bool HasChunkOne = false;
 };
 
 OpenMPDirectiveKind getOpenMPDirectiveKind(llvm::StringRef Str);
Index: include/clang/AST/StmtOpenMP.h
===================================================================
--- include/clang/AST/StmtOpenMP.h
+++ include/clang/AST/StmtOpenMP.h
@@ -392,9 +392,11 @@
     CombinedConditionOffset = 25,
     CombinedNextLowerBoundOffset = 26,
     CombinedNextUpperBoundOffset = 27,
+    CombinedDistConditionOffset = 28,
+    CombinedParForInDistConditionOffset = 29,
     // Offset to the end (and start of the following counters/updates/finals
     // arrays) for combined distribute loop directives.
-    CombinedDistributeEnd = 28,
+    CombinedDistributeEnd = 30,
   };
 
   /// Get the counters storage.
@@ -605,6 +607,17 @@
            "expected loop bound sharing directive");
     *std::next(child_begin(), CombinedNextUpperBoundOffset) = CombNUB;
   }
+  void setCombinedDistCond(Expr *CombDistCond) {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    *std::next(child_begin(), CombinedDistConditionOffset) = CombDistCond;
+  }
+  void setCombinedParForInDistCond(Expr *CombParForInDistCond) {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    *std::next(child_begin(),
+               CombinedParForInDistConditionOffset) = CombParForInDistCond;
+  }
   void setCounters(ArrayRef<Expr *> A);
   void setPrivateCounters(ArrayRef<Expr *> A);
   void setInits(ArrayRef<Expr *> A);
@@ -637,6 +650,13 @@
     /// Update of UpperBound for statically scheduled omp loops for
     /// outer loop in combined constructs (e.g. 'distribute parallel for')
     Expr *NUB;
+    /// Distribute Loop condition used when composing 'omp distribute'
+    ///  with 'omp for' in a same construct when schedule is chunked.
+    Expr *DistCond;
+    /// 'omp parallel for' loop condition used when composed with
+    /// 'omp distribute' in the same construct and when schedule is
+    /// chunked and the chunk size is 1.
+    Expr *ParForInDistCond;
   };
 
   /// The expressions built for the OpenMP loop CodeGen for the
@@ -754,6 +774,8 @@
       DistCombinedFields.Cond = nullptr;
       DistCombinedFields.NLB = nullptr;
       DistCombinedFields.NUB = nullptr;
+      DistCombinedFields.DistCond = nullptr;
+      DistCombinedFields.ParForInDistCond = nullptr;
     }
   };
 
@@ -922,6 +944,18 @@
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), CombinedNextUpperBoundOffset)));
   }
+  Expr *getCombinedDistCond() const {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+        *std::next(child_begin(), CombinedDistConditionOffset)));
+  }
+  Expr *getCombinedParForInDistCond() const {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+        *std::next(child_begin(), CombinedParForInDistConditionOffset)));
+  }
   const Stmt *getBody() const {
     // This relies on the loop form is already checked by Sema.
     const Stmt *Body =
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to