https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748
>From a0f2307ce374355449877178cea70049b9e861d5 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Sun, 4 Aug 2024 17:33:52 +0900 Subject: [PATCH] Add workshare loop wrapper lowerings Bufferize test Bufferize test Bufferize test Add test for should use workshare lowering Add integration test for workshare One more integration test Add test for cfg workshare bufferization Fix tests Test coverage for all changes Integration tests bufferize fix --- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 4 +- .../Transforms/OptimizedBufferization.cpp | 10 +- flang/test/HLFIR/bufferize-workshare.fir | 57 ++++++ .../OpenMP/workshare-array-array-assign.f90 | 34 ++++ .../Integration/OpenMP/workshare-axpy.f90 | 57 ++++++ .../OpenMP/workshare-scalar-array-assign.f90 | 45 +++++ .../OpenMP/workshare-scalar-array-mul.f90 | 65 +++++++ .../OpenMP/should-use-workshare-lowering.mlir | 162 ++++++++++++++++++ 8 files changed, 430 insertions(+), 4 deletions(-) create mode 100644 flang/test/HLFIR/bufferize-workshare.fir create mode 100644 flang/test/Integration/OpenMP/workshare-array-array-assign.f90 create mode 100644 flang/test/Integration/OpenMP/workshare-axpy.f90 create mode 100644 flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 create mode 100644 flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 create mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index 07794828fce267..1848dbe2c7a2c2 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -26,6 +26,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" @@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); auto insPt = builder.saveInsertionPoint(); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index 166649d955dabd..a0160b233e3cd1 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Transforms/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Dominance.h" @@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( // Generate a loop nest looping around the hlfir.elemental shape and clone // hlfir.elemental region inside the inner loop hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, loopNest.oneBasedIndices); @@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite( llvm::SmallVector<mlir::Value> extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto arrayElement = hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); @@ -651,7 +654,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite( llvm::SmallVector<mlir::Value> extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto rhsArrayElement = hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices); diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir new file mode 100644 index 00000000000000..af5abb381937ec --- /dev/null +++ b/flang/test/HLFIR/bufferize-workshare.fir @@ -0,0 +1,57 @@ +// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s + +// CHECK-LABEL: func.func @simple( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) { +// CHECK: omp.parallel { +// CHECK: omp.workshare { +// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) +// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""} +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>) +// CHECK: %[[VAL_7:.*]] = arith.constant true +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK: omp.workshare.loop_wrapper { +// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) { +// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32> +// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32 +// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32> +// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32> +// CHECK: omp.yield +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1> +// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> +// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> +// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>> +// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>> +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.terminator +// CHECK: } +// CHECK: return +// CHECK: } +func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) { + omp.parallel { + omp.workshare { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + %ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> + %val = fir.load %ref : !fir.ref<i32> + %sub = arith.subi %val, %c1_i32 : i32 + hlfir.yield_element %sub : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} diff --git a/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 new file mode 100644 index 00000000000000..e9ec5d9175beb5 --- /dev/null +++ b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 @@ -0,0 +1,34 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR +!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR + +subroutine sb1(x, y) + integer :: x(:) + integer :: y(:) + !$omp parallel workshare + x = y + !$omp end parallel workshare +end subroutine + +! HLFIR: omp.parallel { +! HLFIR: omp.workshare { +! HLFIR: hlfir.assign +! HLFIR: omp.terminator +! HLFIR: } +! HLFIR: omp.terminator +! HLFIR: } + +! FIR: omp.parallel { +! FIR: omp.wsloop nowait { +! FIR: omp.loop_nest +! FIR: } +! FIR: omp.barrier +! FIR: omp.terminator +! FIR: } diff --git a/flang/test/Integration/OpenMP/workshare-axpy.f90 b/flang/test/Integration/OpenMP/workshare-axpy.f90 new file mode 100644 index 00000000000000..0c4524f8552906 --- /dev/null +++ b/flang/test/Integration/OpenMP/workshare-axpy.f90 @@ -0,0 +1,57 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR +!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR + +subroutine sb1(a, x, y, z) + integer :: a + integer :: x(:) + integer :: y(:) + integer :: z(:) + !$omp parallel workshare + z = a * x + y + !$omp end parallel workshare +end subroutine + +! HLFIR: func.func @_QPsb1 +! HLFIR: omp.parallel { +! HLFIR: omp.workshare { +! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> { +! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> { +! HLFIR: hlfir.assign +! HLFIR: hlfir.destroy +! HLFIR: hlfir.destroy +! HLFIR-NOT: omp.barrier +! HLFIR: omp.terminator +! HLFIR: } +! HLFIR-NOT: omp.barrier +! HLFIR: omp.terminator +! HLFIR: } +! HLFIR: return +! HLFIR: } +! HLFIR:} + + +! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>> +! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32> + +! FIR: func.func @_QPsb1 +! FIR: omp.parallel { +! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) { +! FIR: fir.allocmem +! FIR: omp.wsloop { +! FIR: omp.loop_nest +! FIR: omp.single nowait { +! FIR: fir.call @_FortranAAssign +! FIR: fir.freemem +! FIR: omp.terminator +! FIR: } +! FIR: omp.barrier +! FIR: omp.terminator +! FIR: } diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 new file mode 100644 index 00000000000000..6c180cd639997c --- /dev/null +++ b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 @@ -0,0 +1,45 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR +!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR + +subroutine sb1(a, x) + integer :: a + integer :: x(:) + !$omp parallel workshare + x = a + !$omp end parallel workshare +end subroutine + +! HLFIR: omp.parallel { +! HLFIR: omp.workshare { +! HLFIR: %[[SCALAR:.*]] = fir.load %1#0 : !fir.ref<i32> +! HLFIR: hlfir.assign %[[SCALAR]] to +! HLFIR: omp.terminator +! HLFIR: } +! HLFIR: omp.terminator +! HLFIR: } + +! FIR: omp.parallel { +! FIR: %[[SCALAR_ALLOCA:.*]] = fir.alloca i32 +! FIR: omp.single copyprivate(%[[SCALAR_ALLOCA]] -> @_workshare_copy_i32 : !fir.ref<i32>) { +! FIR: %[[SCALAR_LOAD:.*]] = fir.load %{{.*}} : !fir.ref<i32> +! FIR: fir.store %[[SCALAR_LOAD]] to %[[SCALAR_ALLOCA]] : !fir.ref<i32> +! FIR: omp.terminator +! FIR: } +! FIR: %[[SCALAR_RELOAD:.*]] = fir.load %[[SCALAR_ALLOCA]] : !fir.ref<i32> +! FIR: %6:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index) +! FIR: omp.wsloop nowait { +! FIR: omp.loop_nest (%arg2) : index = (%c1) to (%6#1) inclusive step (%c1) { +! FIR: fir.store %[[SCALAR_RELOAD]] +! FIR: omp.yield +! FIR: } +! FIR: } +! FIR: omp.barrier +! FIR: omp.terminator diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 new file mode 100644 index 00000000000000..9b8ef66b48f47d --- /dev/null +++ b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 @@ -0,0 +1,65 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR-O3 +!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR-O3 + +!RUN: %flang_fc1 -emit-hlfir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix HLFIR-O0 +!RUN: %flang_fc1 -emit-fir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix FIR-O0 + +program test + real :: arr_01(10) + !$omp parallel workshare + arr_01 = arr_01*2 + !$omp end parallel workshare +end program + +! HLFIR-O3: omp.parallel { +! HLFIR-O3: omp.workshare { +! HLFIR-O3: hlfir.elemental +! HLFIR-O3: hlfir.assign +! HLFIR-O3: hlfir.destroy +! HLFIR-O3: omp.terminator +! HLFIR-O3: omp.terminator + +! FIR-O3: omp.parallel { +! FIR-O3: omp.wsloop nowait { +! FIR-O3: omp.loop_nest +! FIR-O3: omp.barrier +! FIR-O3: omp.terminator + +! HLFIR-O0: omp.parallel { +! HLFIR-O0: omp.workshare { +! HLFIR-O0: hlfir.elemental +! HLFIR-O0: hlfir.assign +! HLFIR-O0: hlfir.destroy +! HLFIR-O0: omp.terminator +! HLFIR-O0: omp.terminator + +! Check the copyprivate copy function +! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}}) +! FIR-O0: fir.load %[[SRC]] +! FIR-O0: fir.store {{.*}} to %[[DST]] + +! Check that we properly handle the temporary array +! FIR-O0: omp.parallel { +! FIR-O0: %[[CP:.*]] = fir.alloca !fir.heap<!fir.array<10xf32>> +! FIR-O0: omp.single copyprivate(%[[CP]] -> @_workshare_copy_heap_ +! FIR-O0: fir.allocmem +! FIR-O0: fir.store +! FIR-O0: omp.terminator +! FIR-O0: fir.load %[[CP]] +! FIR-O0: omp.wsloop { +! FIR-O0: omp.loop_nest +! FIR-O0: omp.yield +! FIR-O0: omp.single nowait { +! FIR-O0: fir.call @_FortranAAssign +! FIR-O0: fir.freemem +! FIR-O0: omp.terminator +! FIR-O0: omp.barrier +! FIR-O0: omp.terminator diff --git a/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir new file mode 100644 index 00000000000000..91b08123cce422 --- /dev/null +++ b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir @@ -0,0 +1,162 @@ +// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s + +// Checks that we correctly identify when to use the lowering to +// omp.workshare.loop_wrapper + +// CHECK-LABEL: @should_parallelize_0 +// CHECK: omp.workshare.loop_wrapper +func.func @should_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + return +} + +// CHECK-LABEL: @should_parallelize_1 +// CHECK: omp.workshare.loop_wrapper +func.func @should_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.parallel { + omp.workshare { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} + + +// CHECK-LABEL: @should_not_parallelize_0 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + omp.single { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-LABEL: @should_not_parallelize_1 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + omp.critical { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-LABEL: @should_not_parallelize_2 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_2(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + omp.parallel { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-LABEL: @should_not_parallelize_3 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_3(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + omp.parallel { + omp.workshare { + omp.parallel { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + omp.terminator + } + omp.terminator + } + return +} + +// CHECK-LABEL: @should_not_parallelize_4 +// CHECK-NOT: omp.workshare.loop_wrapper +func.func @should_not_parallelize_4(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) { + omp.workshare { + ^bb1: + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + hlfir.yield_element %c1_i32 : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + cf.br ^bb2 + ^bb2: + omp.terminator + } + return +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits