https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/134002
Added options: * -f[no-]repack-arrays * -f[no-]stack-repack-arrays * -frepack-arrays-contiguity=whole/innermost >From b936044f9a77cb717d74248cad5021b5d997d407 Mon Sep 17 00:00:00 2001 From: Slava Zakharin <szakha...@nvidia.com> Date: Tue, 1 Apr 2025 15:55:30 -0700 Subject: [PATCH] [flang] Added driver options for arrays repacking. Added options: * -f[no-]repack-arrays * -f[no-]stack-repack-arrays * -frepack-arrays-contiguity=whole/innermost --- clang/include/clang/Driver/Options.td | 62 ++++++++++++++++++- clang/lib/Driver/ToolChains/Flang.cpp | 28 ++++++--- flang/docs/ArrayRepacking.md | 19 +++--- flang/include/flang/Lower/LoweringOptions.def | 5 ++ flang/lib/Frontend/CompilerInvocation.cpp | 14 +++++ flang/lib/Lower/ConvertVariable.cpp | 2 +- .../test/Driver/frepack-arrays-contiguity.f90 | 27 ++++++++ flang/test/Driver/frepack-arrays.f90 | 24 +++++++ flang/test/Driver/fstack-repack-arrays.f90 | 24 +++++++ flang/test/Lower/repack-arrays.f90 | 8 +-- flang/tools/bbc/bbc.cpp | 12 +++- 11 files changed, 198 insertions(+), 27 deletions(-) create mode 100644 flang/test/Driver/frepack-arrays-contiguity.f90 create mode 100644 flang/test/Driver/frepack-arrays.f90 create mode 100644 flang/test/Driver/fstack-repack-arrays.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 89cb03cc33b98..49a59c0417455 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6819,7 +6819,6 @@ defm real_8_real_10 : BooleanFFlag<"real-8-real-10">, Group<gfortran_Group>; defm real_8_real_16 : BooleanFFlag<"real-8-real-16">, Group<gfortran_Group>; defm real_8_real_4 : BooleanFFlag<"real-8-real-4">, Group<gfortran_Group>; defm recursive : BooleanFFlag<"recursive">, Group<gfortran_Group>; -defm repack_arrays : BooleanFFlag<"repack-arrays">, Group<gfortran_Group>; defm second_underscore : BooleanFFlag<"second-underscore">, Group<gfortran_Group>; defm sign_zero : BooleanFFlag<"sign-zero">, Group<gfortran_Group>; defm whole_file : BooleanFFlag<"whole-file">, Group<gfortran_Group>; @@ -6961,6 +6960,51 @@ defm unsigned : OptInFC1FFlag<"unsigned", "Enables UNSIGNED type">; def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>, HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">; +defm repack_arrays + : BoolOptionWithoutMarshalling< + "f", "repack-arrays", PosFlag<SetTrue, [], [], "Pack">, + NegFlag<SetFalse, [], [], "Do not pack">, + BothFlags<[], [], + " non-contiguous assumed shape dummy arrays into " + "contiguous memory">>, + DocBrief<[{Create temporary copies of non-contiguous assumed shape dummy +arrays in subprogram prologues, and destroy them in subprotram epilogues. +The temporary copy is initialized with values from the original array +in the prologue, if needed. In the epilogue, the current values +in the temporary array are copied into the original array, if needed. + +Accessing the contiguous temporary in the program code may result +in faster execution comparing to accessing elements of the original array, +when they are sparse in memory. At the same time, the overhead +of copying values between the original and the temporary arrays +may be significant, which may slow down some programs. + +Enabling array repacking may also change the behavior of certain +programs: +* The copy actions may introduce a data race in valid OpenACC/OpenMP programs. + For example, if different threads execute the same subprogram + with a non-contiguous assumed shape dummy array, and the different threads + access unrelated parts of the array, then the whole array copy + made in each thread will cause a data race. +* OpenACC/OpenMP offload programs may behave incorrectly with regards + to the device data environment, due to the fact that the original + array and the temporary may have different presence status on the device. +* ``IS_CONTIGUOUS`` intrinsic may return ``TRUE`` with the array repacking + enabled, whereas if would return ``FALSE`` with the repacking disabled. +* The result of ``LOC`` intrinsic applied to an actual argument associated + with a non-contiguous assumed shape dummy array, may be different + from the result of ``LOC`` applied to the dummy array.}]>; + +def frepack_arrays_contiguity_EQ + : Joined<["-"], "frepack-arrays-contiguity=">, + Group<f_Group>, + Values<"whole,innermost">, + HelpText< + "When -frepack-arrays is in effect, 'whole' enables " + "repacking for arrays that are non-contiguous in any dimension, " + "'innermost' enables repacking for arrays that are non-contiguous " + "in the innermost dimension (the default)">; + defm save_main_program : BoolOptionWithoutMarshalling<"f", "save-main-program", PosFlag<SetTrue, [], [], "Place all main program variables in static memory (otherwise scalars may be placed on the stack)">, @@ -6974,6 +7018,22 @@ defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stri PosFlag<SetTrue, [], [ClangOption], "Create unit-strided versions of loops">, NegFlag<SetFalse, [], [ClangOption], "Do not create unit-strided loops (default)">>; +defm stack_repack_arrays + : BoolOptionWithoutMarshalling< + "f", "stack-repack-arrays", + PosFlag<SetTrue, [], [], + "Attempt to allocate array temporaries created under " + "-frepack-arrays on the stack">, + NegFlag< + SetFalse, [], [], + "Allocate -frepack-arrays temporaries on the heap (default)">>, + DocBrief<[{Controls whether the array temporaries created under +**-frepack-arrays** are allocated on the stack or on the heap. + +By default, the heap is used. Allocations of the polymorphic types +are always done on the heap, though this may change in future releases. + }]>; + def fhermetic_module_files : Flag<["-"], "fhermetic-module-files">, Group<f_Group>, HelpText<"Emit hermetic module files (no nested USE association)">; } // let Visibility = [FC1Option, FlangOption] diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index a44513a83a2d7..ef451eb618a05 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -157,15 +157,25 @@ void Flang::addCodegenOptions(const ArgList &Args, if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); - Args.addAllArgs(CmdArgs, - {options::OPT_flang_experimental_hlfir, - options::OPT_flang_deprecated_no_hlfir, - options::OPT_fno_ppc_native_vec_elem_order, - options::OPT_fppc_native_vec_elem_order, - options::OPT_finit_global_zero, - options::OPT_fno_init_global_zero, options::OPT_ftime_report, - options::OPT_ftime_report_EQ, options::OPT_funroll_loops, - options::OPT_fno_unroll_loops}); + for (const auto &arg : + Args.getAllArgValues(options::OPT_frepack_arrays_contiguity_EQ)) + if (arg.compare("whole") != 0 && arg.compare("innermost") != 0) { + getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument) + << "-frepack-arrays-contiguity=" << arg; + } + + Args.addAllArgs( + CmdArgs, + {options::OPT_flang_experimental_hlfir, + options::OPT_flang_deprecated_no_hlfir, + options::OPT_fno_ppc_native_vec_elem_order, + options::OPT_fppc_native_vec_elem_order, options::OPT_finit_global_zero, + options::OPT_fno_init_global_zero, options::OPT_frepack_arrays, + options::OPT_fno_repack_arrays, + options::OPT_frepack_arrays_contiguity_EQ, + options::OPT_fstack_repack_arrays, options::OPT_fno_stack_repack_arrays, + options::OPT_ftime_report, options::OPT_ftime_report_EQ, + options::OPT_funroll_loops, options::OPT_fno_unroll_loops}); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/docs/ArrayRepacking.md b/flang/docs/ArrayRepacking.md index 87cfc5d1bb4bc..7de599f293e40 100755 --- a/flang/docs/ArrayRepacking.md +++ b/flang/docs/ArrayRepacking.md @@ -39,13 +39,13 @@ Having these results it seems reasonable to provide support for arrays repacking #### Facts and guesses about the implementation -The dynamic checks for continuity and the array copy code is located completely in the [runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35), so the compiler inserts unconditional calls in the subprogram prologue/epilogue. +The dynamic checks for contiguity and the array copy code is located completely in the [runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35), so the compiler inserts unconditional calls in the subprogram prologue/epilogue. It looks like `gfortran` ignores `intent(out)/intent(in)` which could have helped to avoid some of the `pack/unpack` overhead. It looks like the `pack`/`unpack` actions are inserted early in the compilation pipeline, and these extra calls affect behavior of the later optimization passes. For example, `Polyhedron/fatigue2` slows down by about 2x with `-frepack-arrays`: this slowdown is not caused by the `pack`/`unpack` overhead, but is a consequence of worse function inlining decisions made after the calls insertion. The benchmarks becomes even faster than the original version with `-frepack-arrays` and proper `-finline-limit=` settings, but it does not look like the benchmark contains code that would benefit from the array repacking. -It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code after the function inlining, if the actual argument is statically known to be contiguous. So the overhead from the dynamic continuity checks is inevitable when `-frepack-arrays` is specified. +It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code after the function inlining, if the actual argument is statically known to be contiguous. So the overhead from the dynamic contiguity checks is inevitable when `-frepack-arrays` is specified. It does not look like `gfortran` tries to optimize the insertion of `pack`/`unpack` code. For example, if a dummy array is only used under a condition within the subprogram, the repacking code might be inserted under the same condition to minimize the overhead on the unconditional path through the subprogram. @@ -59,7 +59,7 @@ It does not look like `gfortran` tries to optimize the insertion of `pack`/`unpa #### Facts and guesses about the implementation -The `pack` code is only generated if the actual argument may be non-contiguous in the innermost dimension, as determined statically, i.e. the compiler does not generate any dynamic continuity checks. For example: +The `pack` code is only generated if the actual argument may be non-contiguous in the innermost dimension, as determined statically, i.e. the compiler does not generate any dynamic contiguity checks. For example: ```Fortran interface @@ -132,8 +132,8 @@ So it does not seem practical/reasonable to enable the array repacking by defaul ### Performance 1. Minimize the overhead of array repacking, e.g. avoid copy-in/out whenever possible, execute copy-in/out only on the execution paths where the array is accessed. -2. Provide different modes of repacking depending on the "continuity" meaning, i.e. one - array is contiguous in the innermost dimension, two - array is contiguous in all dimensions. -3. Avoid generating repacking code, when the "continuity" can be statically proven (including after optimization passes like constant propagation, function inlining, etc.). +2. Provide different modes of repacking depending on the "contiguity" meaning, i.e. one - array is contiguous in the innermost dimension, two - array is contiguous in all dimensions. +3. Avoid generating repacking code, when the "contiguity" can be statically proven (including after optimization passes like constant propagation, function inlining, etc.). 4. Use a set of heuristics to avoid generating repacking code based on the array usage pattern, e.g. if an array is proven not to be used in an array expression or a loop, etc. 5. Use a set of heuristics to avoid repacking actions dynamically, e.g. based on the array size, element size, byte stride(s) of the [innermost] dimension(s), etc. 6. Minimize the impact of the IR changes, introduced by repacking, on the later optimization passes. @@ -156,7 +156,7 @@ Controlled by cli options, Lowering will generate a `fir.pack_array` operation i The new operations will hold all the information that customizes further handling of the `pack`/`unpack` actions, such as: * Optional array of attributes supporting an interface to generate a predicate that says if the repacking is safe in the current context. -* The continuity mode: `innermost` vs `whole`. +* The contiguity mode: `innermost` vs `whole`. * Attributes selecting the heuristics (both compiler and runtime ones) that may be applied to avoid `pack`/`unpack` actions. * Other attributes, like `stack` vs `heap` to manage the temporary allocation according to `-fstack-arrays`, etc. @@ -195,7 +195,7 @@ The operation creates a new `!fir.box/class<!fir.array<>>` value to represent ei Arguments: * `stack` - indicates if `-fstack-arrays` is in effect for compiling this function. -* `innermost` - tells that the repacking has to be done iff the array is not contiguous in the innermost dimension. This also describes what type of continuity can be expected from `%new_var`, i.e. `innermost` means that the resulting array is definitely contiguous in the innermost dimension, but may be non-contiguous in other dimensions (unless additional analysis proves otherwise). For 1-D arrays, `innermost` attribute is not valid. +* `innermost` - tells that the repacking has to be done iff the array is not contiguous in the innermost dimension. This also describes what type of contiguity can be expected from `%new_var`, i.e. `innermost` means that the resulting array is definitely contiguous in the innermost dimension, but may be non-contiguous in other dimensions (unless additional analysis proves otherwise). For 1-D arrays, `innermost` attribute is not valid. * `no_copy` - indicates that, in case a temporary array is created, `%var` to `%new_var` copy is not required (`intent(out)` dummy argument case). * `heuristics` * `loop-only` - `fir.pack_array` can be optimized away, if the array is not used in a loop. @@ -351,7 +351,7 @@ The `fir.pack_array`'s copy-in action cannot be skipped for `INTENT(OUT)` dummy #### Optional behavior -In case of the `whole` continuity mode or with 1-D array, Flang can propagate this information to `hlfir.declare` - this may improve optimizations down the road. This can be done iff the repacking has no dynamic constraints and/or heuristics. For example: +In case of the `whole` contiguity mode or with 1-D array, Flang can propagate this information to `hlfir.declare` - this may improve optimizations down the road. This can be done iff the repacking has no dynamic constraints and/or heuristics. For example: ``` %c0 = arith.constant 0 : index @@ -441,10 +441,11 @@ In cases where `fir.pack_array` is statically known to produce a copy that is co The following user options are proposed: * `-frepack-arrays` - the option forces Flang to repack a non-contiguous assumed-shape dummy array into a temporary contiguous memory, which may result in faster accesses of the array. The compiler will insert special code in subprogram prologue to allocate a temporary array and copy the original array into the temporary; in subprogram epilogue, it will insert a copy from the temporary array into the original array and deallocate the temporary. The overhead of the allocation/deallocation and the copies may be significant depending on the array size. The compiler will try to optimize the unnecessary/unprofitable repacking. +* `-fstack-repack-arrays` - attempt allocating the temporary arrays in stack memory. By default, they are allocated in heap memory (note that `-fstack-arrays` does not affect the allocation of the temporaries created for the arrays repacking). * `-frepack-arrays-opts=[none|loop-only]` - the option enables optimizations that may eliminate the array repacking code depending on the array usage pattern: * `none` - no optimizations. * `loop-only` - the array repacking code will be removed in any subprogram where the array is not used inside a loop or an array expression. -* `-frepack-arrays-continuity=[whole|innermost]`: +* `-frepack-arrays-contiguity=[whole|innermost]`: * `whole` - the option will repack arrays that are non-contiguous in any dimension (default). * `innermost` - the option will repack arrays that are non-contiguous in the innermost dimension. * `-frepack-arrays-max-size=<int>` - arrays bigger than the specified size will not be repacked. diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index 6735bea551414..d98823a0e3341 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -56,6 +56,11 @@ ENUM_LOWERINGOPT(StackArrays, unsigned, 1, 0) /// packed into contiguous memory. ENUM_LOWERINGOPT(RepackArrays, unsigned, 1, 0) +/// If true, the temporary arrays created under RepackArrays +/// control will be allocated in stack memory. If false, +/// they will be allocated in heap memory. +ENUM_LOWERINGOPT(StackRepackArrays, unsigned, 1, 0) + /// If true, the repacking (RepackArrays option above) /// will be done for arrays non-contiguous in any dimension, /// otherwise, it will be done only for arrays non-contiguous diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 229695b18d278..4c3b63f07b190 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1448,6 +1448,19 @@ bool CompilerInvocation::createFromArgs( clang::driver::options::OPT_fno_realloc_lhs, true)) invoc.loweringOpts.setReallocateLHS(false); + invoc.loweringOpts.setRepackArrays( + args.hasFlag(clang::driver::options::OPT_frepack_arrays, + clang::driver::options::OPT_fno_repack_arrays, + /*default=*/false)); + invoc.loweringOpts.setStackRepackArrays( + args.hasFlag(clang::driver::options::OPT_fstack_repack_arrays, + clang::driver::options::OPT_fno_stack_repack_arrays, + /*default=*/false)); + if (auto *arg = args.getLastArg( + clang::driver::options::OPT_frepack_arrays_contiguity_EQ)) + invoc.loweringOpts.setRepackArraysWhole(arg->getValue() == + llvm::StringRef{"whole"}); + success &= parseFrontendArgs(invoc.getFrontendOpts(), args, diags); parseTargetArgs(invoc.getTargetOpts(), args); parsePreprocessorArgs(invoc.getPreprocessorOpts(), args); @@ -1687,6 +1700,7 @@ void CompilerInvocation::setLoweringOptions() { const Fortran::common::LangOptions &langOptions = getLangOpts(); loweringOpts.setIntegerWrapAround(langOptions.getSignedOverflowBehavior() == Fortran::common::LangOptions::SOB_Defined); + loweringOpts.setStackArrays(codegenOpts.StackArrays); Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions(); // TODO: when LangOptions are finalized, we can represent // the math related options using Fortran::commmon::MathOptionsBase, diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 0b22b743edee9..366ff328bfa27 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -2630,7 +2630,7 @@ Fortran::lower::genPackArray(Fortran::lower::AbstractConverter &converter, }); fir::FirOpBuilder &builder = converter.getFirOpBuilder(); const mlir::Location loc = genLocation(converter, sym); - bool stackAlloc = opts.getStackArrays(); + bool stackAlloc = opts.getStackRepackArrays(); // 1D arrays must always use 'whole' mode. bool isInnermostMode = !opts.getRepackArraysWhole() && sym.Rank() > 1; // Avoid copy-in for 'intent(out)' variable, unless this is a dummy diff --git a/flang/test/Driver/frepack-arrays-contiguity.f90 b/flang/test/Driver/frepack-arrays-contiguity.f90 new file mode 100644 index 0000000000000..d642cdac598af --- /dev/null +++ b/flang/test/Driver/frepack-arrays-contiguity.f90 @@ -0,0 +1,27 @@ +! Test forwarding just the forwarding of -frepack-arrays-contiguity options: +! RUN: %flang -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=WHOLECMD %s +! RUN: %flang -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=INNERMOSTCMD %s +! RUN: %flang -frepack-arrays-contiguity=innermost -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=WHOLECMD %s +! RUN: %flang -frepack-arrays-contiguity=whole -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=INNERMOSTCMD %s + +! Test proper setting of the lowering options: +! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=whole %s -emit-hlfir -o - | FileCheck --check-prefix=WHOLE %s +! RUN: %flang_fc1 -frepack-arrays-contiguity=whole %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s +! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=innermost %s -emit-hlfir -o - | FileCheck --check-prefix=INNERMOST %s +! RUN: %flang_fc1 -frepack-arrays-contiguity=innermost %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s + +! Default setting is 'innermost': +! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=INNERMOST %s + +! WHOLECMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=whole" +! INNERMOSTCMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=innermost" + +subroutine test(x) + real :: x(:,:) + ! WHOLE: fir.pack_array{{.*}}whole + ! WHOLE: fir.unpack_array + ! INERMOST: fir.pack_array{{.*}}innermost + ! INNERMOST: fir.unpack_array + ! NOREPACK-NOT: fir.pack_array + ! NOREPACK-NOT: fir.unpack_array +end subroutine diff --git a/flang/test/Driver/frepack-arrays.f90 b/flang/test/Driver/frepack-arrays.f90 new file mode 100644 index 0000000000000..ad96a14f74378 --- /dev/null +++ b/flang/test/Driver/frepack-arrays.f90 @@ -0,0 +1,24 @@ +! Test forwarding just the forwarding of -f[no-]repack-arrays options: +! RUN: %flang -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=REPACKCMD %s +! RUN: %flang -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=NOREPACKCMD %s +! RUN: %flang -frepack-arrays -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=NOREPACKCMD %s +! RUN: %flang -fno-repack-arrays -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=REPACKCMD %s + +! Test proper setting of the lowering options: +! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=REPACK %s +! RUN: %flang_fc1 -fno-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s +! RUN: %flang_fc1 -frepack-arrays -fno-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s +! RUN: %flang_fc1 -fno-repack-arrays -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=REPACK %s + +! REPACKCMD: "-fc1"{{.*}}"-frepack-arrays" +! REPACKCMD-NOT: -fno-repack-arrays +! NOREPACKCMD: "-fc1"{{.*}}"-fno-repack-arrays" +! NOREPACKCMD-NOT: -frepack-arrays + +subroutine test(x) + real :: x(:) + ! REPACK: fir.pack_array + ! REPACK: fir.unpack_array + ! NOREPACK-NOT: fir.pack_array + ! NOREPACK-NOT: fir.unpack_array +end subroutine diff --git a/flang/test/Driver/fstack-repack-arrays.f90 b/flang/test/Driver/fstack-repack-arrays.f90 new file mode 100644 index 0000000000000..4fb5aa0c18032 --- /dev/null +++ b/flang/test/Driver/fstack-repack-arrays.f90 @@ -0,0 +1,24 @@ +! Test forwarding just the forwarding of -f[no-]stack-repack-arrays options: +! RUN: %flang -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=STACKCMD %s +! RUN: %flang -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s +! RUN: %flang -fstack-repack-arrays -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s +! RUN: %flang -fno-stack-repack-arrays -fstack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s +! RUN: %flang -fno-stack-repack-arrays -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=STACKCMD %s + +! Test proper setting of the lowering options: +! RUN: %flang_fc1 -frepack-arrays -fstack-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=STACK %s +! RUN: %flang_fc1 -frepack-arrays -fno-stack-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=HEAP %s + +! Default setting is 'heap': +! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=HEAP %s + +! STACKCMD: "-fc1"{{.*}}"-fstack-repack-arrays" +! HEAPCMD: "-fc1"{{.*}}"-fno-stack-repack-arrays" + +subroutine test(x) + real :: x(:,:) + ! STACK: fir.pack_array{{.*}}stack + ! STACK: fir.unpack_array{{.*}}stack + ! HEAP: fir.pack_array{{.*}}heap + ! HEAP: fir.unpack_array{{.*}}heap +end subroutine diff --git a/flang/test/Lower/repack-arrays.f90 b/flang/test/Lower/repack-arrays.f90 index 19ea93a3521a3..ff89df82793a3 100644 --- a/flang/test/Lower/repack-arrays.f90 +++ b/flang/test/Lower/repack-arrays.f90 @@ -1,7 +1,7 @@ -! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,WHOLE %s -! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,WHOLE %s -! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,INNER %s -! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,INNER %s +! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,WHOLE %s +! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,WHOLE %s +! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,INNER %s +! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,INNER %s ! ALL-LABEL: func.func @_QPtest1( ! ALL-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) { diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index 2cc75b7aa4e87..4cf9074fc93c4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -245,15 +245,20 @@ static llvm::cl::opt<bool> "the LHS of the intrinsic assignment"), llvm::cl::init(true)); -// TODO: -fstack-arrays is currently only used for fir.pack_array, -// but it should probably be used for deciding how arrays/temporaries -// are allocated during lowering. +// TODO: -fstack-arrays is currently unused, but it should probably be used +// for deciding how arrays/temporaries are allocated during lowering. static llvm::cl::opt<bool> stackArrays("fstack-arrays", llvm::cl::desc("Allocate all arrays of unknown size and " "temporary arrays in stack memory"), llvm::cl::init(false)); +static llvm::cl::opt<bool> stackRepackArrays( + "fstack-repack-arrays", + llvm::cl::desc("Allocate temporary arrays for -frepack-arrays " + "in stack memory"), + llvm::cl::init(false)); + static llvm::cl::opt<bool> repackArrays("frepack-arrays", llvm::cl::desc("Pack non-contiguous assummed shape arrays " @@ -412,6 +417,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR( loweringOptions.setInitGlobalZero(initGlobalZero); loweringOptions.setReallocateLHS(reallocateLHS); loweringOptions.setStackArrays(stackArrays); + loweringOptions.setStackRepackArrays(stackRepackArrays); loweringOptions.setRepackArrays(repackArrays); loweringOptions.setRepackArraysWhole(repackArraysWhole); std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {}; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits