https://github.com/vzakhari updated https://github.com/llvm/llvm-project/pull/134002
>From 5c83d849a7129840ad8d8accd83e72836d1b2697 Mon Sep 17 00:00:00 2001 From: Slava Zakharin <szakha...@nvidia.com> Date: Tue, 1 Apr 2025 15:55:30 -0700 Subject: [PATCH 1/3] [flang] Added driver options for arrays repacking. Added options: * -f[no-]repack-arrays * -f[no-]stack-repack-arrays * -frepack-arrays-contiguity=whole/innermost --- clang/include/clang/Driver/Options.td | 62 ++++++++++++++++++- clang/lib/Driver/ToolChains/Flang.cpp | 30 ++++++--- flang/docs/ArrayRepacking.md | 19 +++--- flang/include/flang/Lower/LoweringOptions.def | 5 ++ flang/lib/Frontend/CompilerInvocation.cpp | 14 +++++ flang/lib/Lower/ConvertVariable.cpp | 2 +- .../test/Driver/frepack-arrays-contiguity.f90 | 27 ++++++++ flang/test/Driver/frepack-arrays.f90 | 24 +++++++ flang/test/Driver/fstack-repack-arrays.f90 | 24 +++++++ flang/test/Lower/repack-arrays.f90 | 8 +-- flang/tools/bbc/bbc.cpp | 12 +++- 11 files changed, 199 insertions(+), 28 deletions(-) create mode 100644 flang/test/Driver/frepack-arrays-contiguity.f90 create mode 100644 flang/test/Driver/frepack-arrays.f90 create mode 100644 flang/test/Driver/fstack-repack-arrays.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e69b804de63b5..43bcdc6d1111f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6825,7 +6825,6 @@ defm real_8_real_10 : BooleanFFlag<"real-8-real-10">, Group<gfortran_Group>; defm real_8_real_16 : BooleanFFlag<"real-8-real-16">, Group<gfortran_Group>; defm real_8_real_4 : BooleanFFlag<"real-8-real-4">, Group<gfortran_Group>; defm recursive : BooleanFFlag<"recursive">, Group<gfortran_Group>; -defm repack_arrays : BooleanFFlag<"repack-arrays">, Group<gfortran_Group>; defm second_underscore : BooleanFFlag<"second-underscore">, Group<gfortran_Group>; defm sign_zero : BooleanFFlag<"sign-zero">, Group<gfortran_Group>; defm whole_file : BooleanFFlag<"whole-file">, Group<gfortran_Group>; @@ -6967,6 +6966,51 @@ defm unsigned : OptInFC1FFlag<"unsigned", "Enables UNSIGNED type">; def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>, HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">; +defm repack_arrays + : BoolOptionWithoutMarshalling< + "f", "repack-arrays", PosFlag<SetTrue, [], [], "Pack">, + NegFlag<SetFalse, [], [], "Do not pack">, + BothFlags<[], [], + " non-contiguous assumed shape dummy arrays into " + "contiguous memory">>, + DocBrief<[{Create temporary copies of non-contiguous assumed shape dummy +arrays in subprogram prologues, and destroy them in subprotram epilogues. +The temporary copy is initialized with values from the original array +in the prologue, if needed. In the epilogue, the current values +in the temporary array are copied into the original array, if needed. + +Accessing the contiguous temporary in the program code may result +in faster execution comparing to accessing elements of the original array, +when they are sparse in memory. At the same time, the overhead +of copying values between the original and the temporary arrays +may be significant, which may slow down some programs. + +Enabling array repacking may also change the behavior of certain +programs: +* The copy actions may introduce a data race in valid OpenACC/OpenMP programs. + For example, if different threads execute the same subprogram + with a non-contiguous assumed shape dummy array, and the different threads + access unrelated parts of the array, then the whole array copy + made in each thread will cause a data race. +* OpenACC/OpenMP offload programs may behave incorrectly with regards + to the device data environment, due to the fact that the original + array and the temporary may have different presence status on the device. +* ``IS_CONTIGUOUS`` intrinsic may return ``TRUE`` with the array repacking + enabled, whereas if would return ``FALSE`` with the repacking disabled. +* The result of ``LOC`` intrinsic applied to an actual argument associated + with a non-contiguous assumed shape dummy array, may be different + from the result of ``LOC`` applied to the dummy array.}]>; + +def frepack_arrays_contiguity_EQ + : Joined<["-"], "frepack-arrays-contiguity=">, + Group<f_Group>, + Values<"whole,innermost">, + HelpText< + "When -frepack-arrays is in effect, 'whole' enables " + "repacking for arrays that are non-contiguous in any dimension, " + "'innermost' enables repacking for arrays that are non-contiguous " + "in the innermost dimension (the default)">; + defm save_main_program : BoolOptionWithoutMarshalling<"f", "save-main-program", PosFlag<SetTrue, [], [], "Place all main program variables in static memory (otherwise scalars may be placed on the stack)">, @@ -6980,6 +7024,22 @@ defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stri PosFlag<SetTrue, [], [ClangOption], "Create unit-strided versions of loops">, NegFlag<SetFalse, [], [ClangOption], "Do not create unit-strided loops (default)">>; +defm stack_repack_arrays + : BoolOptionWithoutMarshalling< + "f", "stack-repack-arrays", + PosFlag<SetTrue, [], [], + "Attempt to allocate array temporaries created under " + "-frepack-arrays on the stack">, + NegFlag< + SetFalse, [], [], + "Allocate -frepack-arrays temporaries on the heap (default)">>, + DocBrief<[{Controls whether the array temporaries created under +**-frepack-arrays** are allocated on the stack or on the heap. + +By default, the heap is used. Allocations of the polymorphic types +are always done on the heap, though this may change in future releases. + }]>; + def fhermetic_module_files : Flag<["-"], "fhermetic-module-files">, Group<f_Group>, HelpText<"Emit hermetic module files (no nested USE association)">; diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 8312234e33a64..96e2486da764c 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -157,16 +157,26 @@ void Flang::addCodegenOptions(const ArgList &Args, if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); - Args.addAllArgs(CmdArgs, - {options::OPT_fdo_concurrent_to_openmp_EQ, - options::OPT_flang_experimental_hlfir, - options::OPT_flang_deprecated_no_hlfir, - options::OPT_fno_ppc_native_vec_elem_order, - options::OPT_fppc_native_vec_elem_order, - options::OPT_finit_global_zero, - options::OPT_fno_init_global_zero, options::OPT_ftime_report, - options::OPT_ftime_report_EQ, options::OPT_funroll_loops, - options::OPT_fno_unroll_loops}); + for (const auto &arg : + Args.getAllArgValues(options::OPT_frepack_arrays_contiguity_EQ)) + if (arg.compare("whole") != 0 && arg.compare("innermost") != 0) { + getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument) + << "-frepack-arrays-contiguity=" << arg; + } + + Args.addAllArgs( + CmdArgs, + {options::OPT_fdo_concurrent_to_openmp_EQ, + options::OPT_flang_experimental_hlfir, + options::OPT_flang_deprecated_no_hlfir, + options::OPT_fno_ppc_native_vec_elem_order, + options::OPT_fppc_native_vec_elem_order, options::OPT_finit_global_zero, + options::OPT_fno_init_global_zero, options::OPT_frepack_arrays, + options::OPT_fno_repack_arrays, + options::OPT_frepack_arrays_contiguity_EQ, + options::OPT_fstack_repack_arrays, options::OPT_fno_stack_repack_arrays, + options::OPT_ftime_report, options::OPT_ftime_report_EQ, + options::OPT_funroll_loops, options::OPT_fno_unroll_loops}); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/docs/ArrayRepacking.md b/flang/docs/ArrayRepacking.md index 87cfc5d1bb4bc..7de599f293e40 100755 --- a/flang/docs/ArrayRepacking.md +++ b/flang/docs/ArrayRepacking.md @@ -39,13 +39,13 @@ Having these results it seems reasonable to provide support for arrays repacking #### Facts and guesses about the implementation -The dynamic checks for continuity and the array copy code is located completely in the [runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35), so the compiler inserts unconditional calls in the subprogram prologue/epilogue. +The dynamic checks for contiguity and the array copy code is located completely in the [runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35), so the compiler inserts unconditional calls in the subprogram prologue/epilogue. It looks like `gfortran` ignores `intent(out)/intent(in)` which could have helped to avoid some of the `pack/unpack` overhead. It looks like the `pack`/`unpack` actions are inserted early in the compilation pipeline, and these extra calls affect behavior of the later optimization passes. For example, `Polyhedron/fatigue2` slows down by about 2x with `-frepack-arrays`: this slowdown is not caused by the `pack`/`unpack` overhead, but is a consequence of worse function inlining decisions made after the calls insertion. The benchmarks becomes even faster than the original version with `-frepack-arrays` and proper `-finline-limit=` settings, but it does not look like the benchmark contains code that would benefit from the array repacking. -It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code after the function inlining, if the actual argument is statically known to be contiguous. So the overhead from the dynamic continuity checks is inevitable when `-frepack-arrays` is specified. +It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code after the function inlining, if the actual argument is statically known to be contiguous. So the overhead from the dynamic contiguity checks is inevitable when `-frepack-arrays` is specified. It does not look like `gfortran` tries to optimize the insertion of `pack`/`unpack` code. For example, if a dummy array is only used under a condition within the subprogram, the repacking code might be inserted under the same condition to minimize the overhead on the unconditional path through the subprogram. @@ -59,7 +59,7 @@ It does not look like `gfortran` tries to optimize the insertion of `pack`/`unpa #### Facts and guesses about the implementation -The `pack` code is only generated if the actual argument may be non-contiguous in the innermost dimension, as determined statically, i.e. the compiler does not generate any dynamic continuity checks. For example: +The `pack` code is only generated if the actual argument may be non-contiguous in the innermost dimension, as determined statically, i.e. the compiler does not generate any dynamic contiguity checks. For example: ```Fortran interface @@ -132,8 +132,8 @@ So it does not seem practical/reasonable to enable the array repacking by defaul ### Performance 1. Minimize the overhead of array repacking, e.g. avoid copy-in/out whenever possible, execute copy-in/out only on the execution paths where the array is accessed. -2. Provide different modes of repacking depending on the "continuity" meaning, i.e. one - array is contiguous in the innermost dimension, two - array is contiguous in all dimensions. -3. Avoid generating repacking code, when the "continuity" can be statically proven (including after optimization passes like constant propagation, function inlining, etc.). +2. Provide different modes of repacking depending on the "contiguity" meaning, i.e. one - array is contiguous in the innermost dimension, two - array is contiguous in all dimensions. +3. Avoid generating repacking code, when the "contiguity" can be statically proven (including after optimization passes like constant propagation, function inlining, etc.). 4. Use a set of heuristics to avoid generating repacking code based on the array usage pattern, e.g. if an array is proven not to be used in an array expression or a loop, etc. 5. Use a set of heuristics to avoid repacking actions dynamically, e.g. based on the array size, element size, byte stride(s) of the [innermost] dimension(s), etc. 6. Minimize the impact of the IR changes, introduced by repacking, on the later optimization passes. @@ -156,7 +156,7 @@ Controlled by cli options, Lowering will generate a `fir.pack_array` operation i The new operations will hold all the information that customizes further handling of the `pack`/`unpack` actions, such as: * Optional array of attributes supporting an interface to generate a predicate that says if the repacking is safe in the current context. -* The continuity mode: `innermost` vs `whole`. +* The contiguity mode: `innermost` vs `whole`. * Attributes selecting the heuristics (both compiler and runtime ones) that may be applied to avoid `pack`/`unpack` actions. * Other attributes, like `stack` vs `heap` to manage the temporary allocation according to `-fstack-arrays`, etc. @@ -195,7 +195,7 @@ The operation creates a new `!fir.box/class<!fir.array<>>` value to represent ei Arguments: * `stack` - indicates if `-fstack-arrays` is in effect for compiling this function. -* `innermost` - tells that the repacking has to be done iff the array is not contiguous in the innermost dimension. This also describes what type of continuity can be expected from `%new_var`, i.e. `innermost` means that the resulting array is definitely contiguous in the innermost dimension, but may be non-contiguous in other dimensions (unless additional analysis proves otherwise). For 1-D arrays, `innermost` attribute is not valid. +* `innermost` - tells that the repacking has to be done iff the array is not contiguous in the innermost dimension. This also describes what type of contiguity can be expected from `%new_var`, i.e. `innermost` means that the resulting array is definitely contiguous in the innermost dimension, but may be non-contiguous in other dimensions (unless additional analysis proves otherwise). For 1-D arrays, `innermost` attribute is not valid. * `no_copy` - indicates that, in case a temporary array is created, `%var` to `%new_var` copy is not required (`intent(out)` dummy argument case). * `heuristics` * `loop-only` - `fir.pack_array` can be optimized away, if the array is not used in a loop. @@ -351,7 +351,7 @@ The `fir.pack_array`'s copy-in action cannot be skipped for `INTENT(OUT)` dummy #### Optional behavior -In case of the `whole` continuity mode or with 1-D array, Flang can propagate this information to `hlfir.declare` - this may improve optimizations down the road. This can be done iff the repacking has no dynamic constraints and/or heuristics. For example: +In case of the `whole` contiguity mode or with 1-D array, Flang can propagate this information to `hlfir.declare` - this may improve optimizations down the road. This can be done iff the repacking has no dynamic constraints and/or heuristics. For example: ``` %c0 = arith.constant 0 : index @@ -441,10 +441,11 @@ In cases where `fir.pack_array` is statically known to produce a copy that is co The following user options are proposed: * `-frepack-arrays` - the option forces Flang to repack a non-contiguous assumed-shape dummy array into a temporary contiguous memory, which may result in faster accesses of the array. The compiler will insert special code in subprogram prologue to allocate a temporary array and copy the original array into the temporary; in subprogram epilogue, it will insert a copy from the temporary array into the original array and deallocate the temporary. The overhead of the allocation/deallocation and the copies may be significant depending on the array size. The compiler will try to optimize the unnecessary/unprofitable repacking. +* `-fstack-repack-arrays` - attempt allocating the temporary arrays in stack memory. By default, they are allocated in heap memory (note that `-fstack-arrays` does not affect the allocation of the temporaries created for the arrays repacking). * `-frepack-arrays-opts=[none|loop-only]` - the option enables optimizations that may eliminate the array repacking code depending on the array usage pattern: * `none` - no optimizations. * `loop-only` - the array repacking code will be removed in any subprogram where the array is not used inside a loop or an array expression. -* `-frepack-arrays-continuity=[whole|innermost]`: +* `-frepack-arrays-contiguity=[whole|innermost]`: * `whole` - the option will repack arrays that are non-contiguous in any dimension (default). * `innermost` - the option will repack arrays that are non-contiguous in the innermost dimension. * `-frepack-arrays-max-size=<int>` - arrays bigger than the specified size will not be repacked. diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index 6735bea551414..d98823a0e3341 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -56,6 +56,11 @@ ENUM_LOWERINGOPT(StackArrays, unsigned, 1, 0) /// packed into contiguous memory. ENUM_LOWERINGOPT(RepackArrays, unsigned, 1, 0) +/// If true, the temporary arrays created under RepackArrays +/// control will be allocated in stack memory. If false, +/// they will be allocated in heap memory. +ENUM_LOWERINGOPT(StackRepackArrays, unsigned, 1, 0) + /// If true, the repacking (RepackArrays option above) /// will be done for arrays non-contiguous in any dimension, /// otherwise, it will be done only for arrays non-contiguous diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 466d939b7b840..20a4a0a6cf745 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1476,6 +1476,19 @@ bool CompilerInvocation::createFromArgs( clang::driver::options::OPT_fno_realloc_lhs, true)) invoc.loweringOpts.setReallocateLHS(false); + invoc.loweringOpts.setRepackArrays( + args.hasFlag(clang::driver::options::OPT_frepack_arrays, + clang::driver::options::OPT_fno_repack_arrays, + /*default=*/false)); + invoc.loweringOpts.setStackRepackArrays( + args.hasFlag(clang::driver::options::OPT_fstack_repack_arrays, + clang::driver::options::OPT_fno_stack_repack_arrays, + /*default=*/false)); + if (auto *arg = args.getLastArg( + clang::driver::options::OPT_frepack_arrays_contiguity_EQ)) + invoc.loweringOpts.setRepackArraysWhole(arg->getValue() == + llvm::StringRef{"whole"}); + success &= parseFrontendArgs(invoc.getFrontendOpts(), args, diags); parseTargetArgs(invoc.getTargetOpts(), args); parsePreprocessorArgs(invoc.getPreprocessorOpts(), args); @@ -1715,6 +1728,7 @@ void CompilerInvocation::setLoweringOptions() { const Fortran::common::LangOptions &langOptions = getLangOpts(); loweringOpts.setIntegerWrapAround(langOptions.getSignedOverflowBehavior() == Fortran::common::LangOptions::SOB_Defined); + loweringOpts.setStackArrays(codegenOpts.StackArrays); Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions(); // TODO: when LangOptions are finalized, we can represent // the math related options using Fortran::commmon::MathOptionsBase, diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 0b22b743edee9..366ff328bfa27 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -2630,7 +2630,7 @@ Fortran::lower::genPackArray(Fortran::lower::AbstractConverter &converter, }); fir::FirOpBuilder &builder = converter.getFirOpBuilder(); const mlir::Location loc = genLocation(converter, sym); - bool stackAlloc = opts.getStackArrays(); + bool stackAlloc = opts.getStackRepackArrays(); // 1D arrays must always use 'whole' mode. bool isInnermostMode = !opts.getRepackArraysWhole() && sym.Rank() > 1; // Avoid copy-in for 'intent(out)' variable, unless this is a dummy diff --git a/flang/test/Driver/frepack-arrays-contiguity.f90 b/flang/test/Driver/frepack-arrays-contiguity.f90 new file mode 100644 index 0000000000000..d642cdac598af --- /dev/null +++ b/flang/test/Driver/frepack-arrays-contiguity.f90 @@ -0,0 +1,27 @@ +! Test forwarding just the forwarding of -frepack-arrays-contiguity options: +! RUN: %flang -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=WHOLECMD %s +! RUN: %flang -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=INNERMOSTCMD %s +! RUN: %flang -frepack-arrays-contiguity=innermost -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=WHOLECMD %s +! RUN: %flang -frepack-arrays-contiguity=whole -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=INNERMOSTCMD %s + +! Test proper setting of the lowering options: +! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=whole %s -emit-hlfir -o - | FileCheck --check-prefix=WHOLE %s +! RUN: %flang_fc1 -frepack-arrays-contiguity=whole %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s +! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=innermost %s -emit-hlfir -o - | FileCheck --check-prefix=INNERMOST %s +! RUN: %flang_fc1 -frepack-arrays-contiguity=innermost %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s + +! Default setting is 'innermost': +! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=INNERMOST %s + +! WHOLECMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=whole" +! INNERMOSTCMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=innermost" + +subroutine test(x) + real :: x(:,:) + ! WHOLE: fir.pack_array{{.*}}whole + ! WHOLE: fir.unpack_array + ! INERMOST: fir.pack_array{{.*}}innermost + ! INNERMOST: fir.unpack_array + ! NOREPACK-NOT: fir.pack_array + ! NOREPACK-NOT: fir.unpack_array +end subroutine diff --git a/flang/test/Driver/frepack-arrays.f90 b/flang/test/Driver/frepack-arrays.f90 new file mode 100644 index 0000000000000..ad96a14f74378 --- /dev/null +++ b/flang/test/Driver/frepack-arrays.f90 @@ -0,0 +1,24 @@ +! Test forwarding just the forwarding of -f[no-]repack-arrays options: +! RUN: %flang -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=REPACKCMD %s +! RUN: %flang -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=NOREPACKCMD %s +! RUN: %flang -frepack-arrays -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=NOREPACKCMD %s +! RUN: %flang -fno-repack-arrays -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=REPACKCMD %s + +! Test proper setting of the lowering options: +! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=REPACK %s +! RUN: %flang_fc1 -fno-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s +! RUN: %flang_fc1 -frepack-arrays -fno-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s +! RUN: %flang_fc1 -fno-repack-arrays -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=REPACK %s + +! REPACKCMD: "-fc1"{{.*}}"-frepack-arrays" +! REPACKCMD-NOT: -fno-repack-arrays +! NOREPACKCMD: "-fc1"{{.*}}"-fno-repack-arrays" +! NOREPACKCMD-NOT: -frepack-arrays + +subroutine test(x) + real :: x(:) + ! REPACK: fir.pack_array + ! REPACK: fir.unpack_array + ! NOREPACK-NOT: fir.pack_array + ! NOREPACK-NOT: fir.unpack_array +end subroutine diff --git a/flang/test/Driver/fstack-repack-arrays.f90 b/flang/test/Driver/fstack-repack-arrays.f90 new file mode 100644 index 0000000000000..4fb5aa0c18032 --- /dev/null +++ b/flang/test/Driver/fstack-repack-arrays.f90 @@ -0,0 +1,24 @@ +! Test forwarding just the forwarding of -f[no-]stack-repack-arrays options: +! RUN: %flang -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=STACKCMD %s +! RUN: %flang -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s +! RUN: %flang -fstack-repack-arrays -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s +! RUN: %flang -fno-stack-repack-arrays -fstack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s +! RUN: %flang -fno-stack-repack-arrays -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=STACKCMD %s + +! Test proper setting of the lowering options: +! RUN: %flang_fc1 -frepack-arrays -fstack-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=STACK %s +! RUN: %flang_fc1 -frepack-arrays -fno-stack-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=HEAP %s + +! Default setting is 'heap': +! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=HEAP %s + +! STACKCMD: "-fc1"{{.*}}"-fstack-repack-arrays" +! HEAPCMD: "-fc1"{{.*}}"-fno-stack-repack-arrays" + +subroutine test(x) + real :: x(:,:) + ! STACK: fir.pack_array{{.*}}stack + ! STACK: fir.unpack_array{{.*}}stack + ! HEAP: fir.pack_array{{.*}}heap + ! HEAP: fir.unpack_array{{.*}}heap +end subroutine diff --git a/flang/test/Lower/repack-arrays.f90 b/flang/test/Lower/repack-arrays.f90 index 19ea93a3521a3..ff89df82793a3 100644 --- a/flang/test/Lower/repack-arrays.f90 +++ b/flang/test/Lower/repack-arrays.f90 @@ -1,7 +1,7 @@ -! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,WHOLE %s -! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,WHOLE %s -! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,INNER %s -! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,INNER %s +! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,WHOLE %s +! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,WHOLE %s +! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,INNER %s +! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,INNER %s ! ALL-LABEL: func.func @_QPtest1( ! ALL-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) { diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index 78ce510968ca5..434a9f3d1e986 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -251,15 +251,20 @@ static llvm::cl::opt<bool> "the LHS of the intrinsic assignment"), llvm::cl::init(true)); -// TODO: -fstack-arrays is currently only used for fir.pack_array, -// but it should probably be used for deciding how arrays/temporaries -// are allocated during lowering. +// TODO: -fstack-arrays is currently unused, but it should probably be used +// for deciding how arrays/temporaries are allocated during lowering. static llvm::cl::opt<bool> stackArrays("fstack-arrays", llvm::cl::desc("Allocate all arrays of unknown size and " "temporary arrays in stack memory"), llvm::cl::init(false)); +static llvm::cl::opt<bool> stackRepackArrays( + "fstack-repack-arrays", + llvm::cl::desc("Allocate temporary arrays for -frepack-arrays " + "in stack memory"), + llvm::cl::init(false)); + static llvm::cl::opt<bool> repackArrays("frepack-arrays", llvm::cl::desc("Pack non-contiguous assummed shape arrays " @@ -430,6 +435,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR( loweringOptions.setInitGlobalZero(initGlobalZero); loweringOptions.setReallocateLHS(reallocateLHS); loweringOptions.setStackArrays(stackArrays); + loweringOptions.setStackRepackArrays(stackRepackArrays); loweringOptions.setRepackArrays(repackArrays); loweringOptions.setRepackArraysWhole(repackArraysWhole); std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {}; >From 1b8f3502054beab0dabc9a40dbf90cbcefafb3fc Mon Sep 17 00:00:00 2001 From: Slava Zakharin <szakha...@nvidia.com> Date: Tue, 1 Apr 2025 16:31:10 -0700 Subject: [PATCH 2/3] Fixed sphinx build. --- clang/include/clang/Driver/Options.td | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 43bcdc6d1111f..65166adbf06c2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6987,6 +6987,7 @@ may be significant, which may slow down some programs. Enabling array repacking may also change the behavior of certain programs: + * The copy actions may introduce a data race in valid OpenACC/OpenMP programs. For example, if different threads execute the same subprogram with a non-contiguous assumed shape dummy array, and the different threads >From ee7665d85ffc79b9da7666705bd8253b21742b39 Mon Sep 17 00:00:00 2001 From: Slava Zakharin <szakha...@nvidia.com> Date: Wed, 2 Apr 2025 11:32:35 -0700 Subject: [PATCH 3/3] Addressed review comments. --- clang/include/clang/Driver/Options.td | 4 ++-- flang/include/flang/Lower/LoweringOptions.def | 4 ---- flang/lib/Frontend/CompilerInvocation.cpp | 1 - flang/test/Driver/frepack-arrays-contiguity.f90 | 7 ++++++- flang/test/Driver/frepack-arrays.f90 | 2 +- flang/test/Driver/fstack-repack-arrays.f90 | 2 +- flang/tools/bbc/bbc.cpp | 9 --------- 7 files changed, 10 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 65166adbf06c2..2ca5f99e4ca63 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6974,7 +6974,7 @@ defm repack_arrays " non-contiguous assumed shape dummy arrays into " "contiguous memory">>, DocBrief<[{Create temporary copies of non-contiguous assumed shape dummy -arrays in subprogram prologues, and destroy them in subprotram epilogues. +arrays in subprogram prologues, and destroy them in subprogram epilogues. The temporary copy is initialized with values from the original array in the prologue, if needed. In the epilogue, the current values in the temporary array are copied into the original array, if needed. @@ -7037,7 +7037,7 @@ defm stack_repack_arrays DocBrief<[{Controls whether the array temporaries created under **-frepack-arrays** are allocated on the stack or on the heap. -By default, the heap is used. Allocations of the polymorphic types +By default, the heap is used. Allocations of polymorphic types are always done on the heap, though this may change in future releases. }]>; diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index d98823a0e3341..b062ea1a805ac 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -48,10 +48,6 @@ ENUM_LOWERINGOPT(ReallocateLHS, unsigned, 1, 1) /// On by default. ENUM_LOWERINGOPT(InitGlobalZero, unsigned, 1, 1) -/// If true, the arrays of unknown size and array temporaries -/// are requested to be allocated in stack memory. -ENUM_LOWERINGOPT(StackArrays, unsigned, 1, 0) - /// If true, the dummy assumed shape arrays are conditionally /// packed into contiguous memory. ENUM_LOWERINGOPT(RepackArrays, unsigned, 1, 0) diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 20a4a0a6cf745..6f87a18d69c3d 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1728,7 +1728,6 @@ void CompilerInvocation::setLoweringOptions() { const Fortran::common::LangOptions &langOptions = getLangOpts(); loweringOpts.setIntegerWrapAround(langOptions.getSignedOverflowBehavior() == Fortran::common::LangOptions::SOB_Defined); - loweringOpts.setStackArrays(codegenOpts.StackArrays); Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions(); // TODO: when LangOptions are finalized, we can represent // the math related options using Fortran::commmon::MathOptionsBase, diff --git a/flang/test/Driver/frepack-arrays-contiguity.f90 b/flang/test/Driver/frepack-arrays-contiguity.f90 index d642cdac598af..88e5af4129eda 100644 --- a/flang/test/Driver/frepack-arrays-contiguity.f90 +++ b/flang/test/Driver/frepack-arrays-contiguity.f90 @@ -1,8 +1,11 @@ -! Test forwarding just the forwarding of -frepack-arrays-contiguity options: +! Test forwarding of -frepack-arrays-contiguity options: ! RUN: %flang -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=WHOLECMD %s ! RUN: %flang -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=INNERMOSTCMD %s ! RUN: %flang -frepack-arrays-contiguity=innermost -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=WHOLECMD %s ! RUN: %flang -frepack-arrays-contiguity=whole -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=INNERMOSTCMD %s +! RUN: not %flang -frepack-arrays-contiguity= -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=ERROR %s +! RUN: not %flang -frepack-arrays-contiguity=whole3 -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=ERROR %s +! RUN: not %flang -frepack-arrays-contiguity=innermostg -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=ERROR %s ! Test proper setting of the lowering options: ! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=whole %s -emit-hlfir -o - | FileCheck --check-prefix=WHOLE %s @@ -13,6 +16,8 @@ ! Default setting is 'innermost': ! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=INNERMOST %s +! ERROR: error: unsupported argument '{{.*}}' to option '-frepack-arrays-contiguity=' + ! WHOLECMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=whole" ! INNERMOSTCMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=innermost" diff --git a/flang/test/Driver/frepack-arrays.f90 b/flang/test/Driver/frepack-arrays.f90 index ad96a14f74378..0d1913d282446 100644 --- a/flang/test/Driver/frepack-arrays.f90 +++ b/flang/test/Driver/frepack-arrays.f90 @@ -1,4 +1,4 @@ -! Test forwarding just the forwarding of -f[no-]repack-arrays options: +! Test forwarding of -f[no-]repack-arrays options: ! RUN: %flang -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=REPACKCMD %s ! RUN: %flang -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=NOREPACKCMD %s ! RUN: %flang -frepack-arrays -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=NOREPACKCMD %s diff --git a/flang/test/Driver/fstack-repack-arrays.f90 b/flang/test/Driver/fstack-repack-arrays.f90 index 4fb5aa0c18032..406228cfe6105 100644 --- a/flang/test/Driver/fstack-repack-arrays.f90 +++ b/flang/test/Driver/fstack-repack-arrays.f90 @@ -1,4 +1,4 @@ -! Test forwarding just the forwarding of -f[no-]stack-repack-arrays options: +! Test forwarding of -f[no-]stack-repack-arrays options: ! RUN: %flang -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=STACKCMD %s ! RUN: %flang -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s ! RUN: %flang -fstack-repack-arrays -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index 434a9f3d1e986..c544008a24d56 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -251,14 +251,6 @@ static llvm::cl::opt<bool> "the LHS of the intrinsic assignment"), llvm::cl::init(true)); -// TODO: -fstack-arrays is currently unused, but it should probably be used -// for deciding how arrays/temporaries are allocated during lowering. -static llvm::cl::opt<bool> - stackArrays("fstack-arrays", - llvm::cl::desc("Allocate all arrays of unknown size and " - "temporary arrays in stack memory"), - llvm::cl::init(false)); - static llvm::cl::opt<bool> stackRepackArrays( "fstack-repack-arrays", llvm::cl::desc("Allocate temporary arrays for -frepack-arrays " @@ -434,7 +426,6 @@ static llvm::LogicalResult convertFortranSourceToMLIR( loweringOptions.setIntegerWrapAround(integerWrapAround); loweringOptions.setInitGlobalZero(initGlobalZero); loweringOptions.setReallocateLHS(reallocateLHS); - loweringOptions.setStackArrays(stackArrays); loweringOptions.setStackRepackArrays(stackRepackArrays); loweringOptions.setRepackArrays(repackArrays); loweringOptions.setRepackArraysWhole(repackArraysWhole); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits